信噪比分析之数据可视化的必要性

使用编程的方式能够高度自定义地对大量数据进行分析，但为保证分析的准确性，需要对中间步骤进行充分的可视化检查。

由于实际采集的数据通常比较“脏”（即存在你没有考虑到的情况），所以如果基于少数抽样样本来设计算法并执行分析程序，大概率会得到错误的结果。如果分析结果奇怪还好，这会倒逼你回过头去逐步检查。最怕的是直接得出一个符合预期的结果。这里记录一个分析信噪比数据的案例。

1
from roifile import roiread
2
from aicsimageio import AICSImage
3
from skimage.draw import polygon
4
import os
5

6

7
import matplotlib.pyplot as plt
8
import pandas as pd
9
import numpy as np
10

11

12

13

14
def getFileList(wks, ext):
15
    '''ext 为文件后缀名'''
16
    flist = []
17
    for root, ds, fs in os.walk(wks):
18
        for fname in fs:
19
            if fname.endswith(ext):
20
                fpath = os.path.join(root, fname)
21
                flist.append(fpath)
22
    return flist
23

24

25
def get_values(img, roi):
26
    '''根据roi从图像中截取矩形范围'''
27
    coords = roi.coordinates()
28
    c = coords[:, 0]
29
    r = coords[:, 1]
30
    rows, cols = polygon(r, c)
31
    values = img[rows, cols].flatten()
32
    return values
33

34

35
def calculateSNR(a:list, b:list)->dict:
36
    '''
37
    a:  signal+background intensity
38
    b： background intensity
39
    '''
40
    sig_bk = np.mean(a)
41
    bk = np.mean(b)
42
    sig = sig_bk - bk
43
    sig_bk_std = np.std(a)
44
    bk_std = np.std(b)
45
    sig_std = np.sqrt(pow(sig_bk_std, 2)+pow(bk_std, 2))
46
    SNR = sig/bk
47
    SNR_std = SNR * np.sqrt(pow(sig_std/sig, 2) + pow(bk_std/bk, 2))
48

49

50
    res = {
51
        "MEAN_SIG+BACK": sig_bk,
52
        "SD_SIG+BACK": sig_bk_std,
53
        "MEAN_BACK": bk,
54
        "SD_BACK": bk_std,
55
        "MEAN_SIG": sig,
56
        "SD_SIG": sig_std,
57
        "SNR": SNR,
58
        "SD_SNR": SNR_std,
59
    }
60

61

62
    return res

上述代码就包含了计算了信噪比的关键函数 calculateSNR，该方法来源于文献，需要同时输入信号的像素值（相机采集的信号区域，通常包含了背景），以及纯背景区域的像素值。然后采取均值相减的方式（sig_bk - bk = sig）获得实际的信号强度 sig，然后除以噪声均值 bk 得到信噪比 SNR。需要注意的是该方法默认噪声和信号的像素值都服从正态分布，所以选取ROI对信号和噪声的像素值进行采样时，注意避免人为偏误。

理论上上述代码能够完成SNR分析，但用户希望利用这个方法来实现细胞膜区域的一种特殊SNR分析，信号是他的方法增强的，而背景则是未使用该方法的对照组细胞膜区域的背景信号强度（存在细胞自发荧光的考虑）。这种基于指定object region从不同图像中采集sig和bk 的SNR分析也是可行的，出于描述简便，这种SNR分析本文简称为 cross-image SNR，然而因为数据量比较大，数据一致性就难以保证，所以就不得不进行必要的可视化。

[scode type=“yellow”]cross-image SNR分析的前提是成像参数都能保持一致，甚至要求拍摄时间都尽可能靠近，避免仪器状态的变化造成的影响[/scode]

1
fps = getFileList(wks='./', ext='czi')
2

3

4
df = {}
5

6

7
for idx, fp in enumerate(fps):
8
    n_channel = fp[2]  # 多少色
9
    pp = fp.split("\\")
10
    cell = pp[1]   # 细胞类型
11
    etype = pp[2]  # 对照或实验组
12
    rid = idx%2 + 1   # replicate次数
13
    fp2 = fp.replace(".czi", ".rois.zip")
14
    rois = roiread(fp2)
15
    img = AICSImage(fp)
16
    pixelsize = img.physical_pixel_sizes.X
17
    channels = img.channel_names
18
    for idy, ch in enumerate(channels):
19
        ch_ = channel_alt[ch]
20
        if n_channel not in df:
21
            df[n_channel] = {}
22
        if cell not in df[n_channel]:
23
            df[n_channel][cell] = {}
24
        if etype not in df[n_channel][cell]:
25
            df[n_channel][cell][etype] = {}
26
        if rid not in df[n_channel][cell][etype]:
27
            df[n_channel][cell][etype][rid] = {}
28
        if ch_ not in df[n_channel][cell][etype][rid]:
29
            df[n_channel][cell][etype][rid][ch_] = {}
30

31

32
        frame = img.get_image_data("YX", C=idy)
33
        bag = []
34
        bbag = []
35
        for idm, roi in enumerate(rois):
36
            values = get_values(frame, roi)
37
            if roi.roitype==1:
38
                bag.extend(values)
39
            elif roi.roitype==2:
40
                bbag.extend(values)
41
        df[n_channel][cell][etype][rid][ch_]['intensity'] = bag
42
        df[n_channel][cell][etype][rid][ch_]['background'] = bbag
43
        df[n_channel][cell][etype][rid][ch_]['rois'] = rois
44
        df[n_channel][cell][etype][rid][ch_]['img'] = frame
45
        df[n_channel][cell][etype][rid][ch_]['fp'] = fp
46
        df[n_channel][cell][etype][rid][ch_]['pixelsize'] = pixelsize

为了便于可视化检查，那么从原始数据开始收集和提取的信息就要尽可能保持完整，而且要遵循一定的逻辑，如上述代码所示，我主要使用字典的方式来装载包含了不同颜色数量，不同细胞，不同实验组，重复组次数，以及成像通道的数据。然后每个数据是xy的frame，并且提取指定ROI（细胞膜区域）的像素，还保留了相关的各种信息，例如文件路径，像素尺寸，roi记录等等，都是为了方便可视化检查。

1
import matplotlib as mpl
2
import matplotlib.colors as mcolors
3
mpl.rcParams['font.family'] = 'DengXian'
4
plt.rcParams['axes.unicode_minus'] = False
5

6

7
def create_basic_colormap(color_name, num_colors=256):
8
  # 定义黄色范围的RGB值
9
    bcmap = {
10
        'red': (1, 0, 0),
11
        'green': (0, 1, 0),
12
        'blue': (0, 0, 1),
13
        'yellow': (1, 1, 0),
14
        'cyan': (0, 1, 1),
15
        'purple': (1, 0, 1),
16
        'gray': (1, 1, 1)
17
    }
18
    assert color_name in bcmap
19
    colors = []
20
    color = bcmap[color_name]
21
    for i in range(num_colors):
22
        # 计算亮度值 (从0到1)
23
        brightness = i / (num_colors - 1)
24
        # 计算RGB值，注意保持色调，改变亮度
25
        r = color[0] * brightness
26
        g = color[1] * brightness
27
        b = color[2] * brightness
28
        colors.append((r, g, b))
29
  # 创建一个线性分段colormap
30
    cmap = mcolors.LinearSegmentedColormap.from_list(f"{color_name}_gradient", colors)
31
    return cmap
32

33

34
channel_alt = {
35
    'ESID-T1': "BF",
36
    'ESID-T2': "BF",
37
    'AF488-T1': "488",
38
    "AF488-T2": "488",
39
    "AF594-T2": "568",
40
    "AF568-T1": "568",
41
    "AF610-T3": "647",
42
    "AF647-T3": "647",
43
}
44

45

46

47

48
luts = {
49
    'BF': create_basic_colormap('gray'),
50
    '488': create_basic_colormap('green'),
51
    '568':  create_basic_colormap('yellow'),
52
    '647': create_basic_colormap('red'),
53
}
54

55

56
def analyzeSNR(df, n_plex, cell_type, channel, replicate, vmin=0, vmax=2000):
57
    '''计算SNR的同时，显示图像和ROI
58
    n_plex = '2'
59
    cell_type = '97h'
60
    channel = '488'
61
    replicate = 2
62
    vmin = 0
63
    vmax = 2000
64
    '''
65
    cmap = luts[channel]
66
    a = df[n_plex][cell_type]['实验组'][replicate]
67
    b = df[n_plex][cell_type]['mock'][replicate]
68
    res_a = calculateSNR(a[channel]['intensity'], a[channel]['background'])
69
    res_b = calculateSNR(b[channel]['intensity'], b[channel]['background'])
70
    print(f"实验组 SNR: {res_a['SNR']:.2f}, Mock组 SNR: {res_b['SNR']:.2f}")
71

72

73
    fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(12,12))
74

75

76
    ax[0,0].imshow(a[channel]['img'], cmap=luts[channel], vmin=vmin, vmax=vmax)
77
    ax[0,0].set_title("Exp"+f"\npixelsize: {a[channel]['pixelsize']:.3f} μm\n"+a[channel]['fp'])
78
    ax[1,0].imshow(a['BF']['img'], cmap=luts['BF'])
79
    rois = a[channel]['rois']
80
    for roi in rois:
81
        if roi.roitype==1:
82
            cc = 'red'
83
        elif roi.roitype==2:
84
            cc = 'blue'
85
        pts = roi.coordinates()
86
        ax[0,0].fill(pts[:,0], pts[:,1], facecolor='none', edgecolor=cc, linewidth=1)
87
        ax[1,0].fill(pts[:,0], pts[:,1], facecolor='none', edgecolor=cc, linewidth=1)
88

89

90

91

92
    ax[0,1].imshow(b[channel]['img'], cmap=cmap, vmin=vmin, vmax=vmax)
93
    ax[0,1].set_title("Mock"+f"\npixelsize: {b[channel]['pixelsize']:.3f} μm\n"+b[channel]['fp'])
94
    ax[1,1].imshow(b['BF']['img'], cmap=luts['BF'])
95
    rois = b[channel]['rois']
96
    for roi in rois:
97
        if roi.roitype==1:
98
            cc = 'red'
99
        elif roi.roitype==2:
100
            cc = 'blue'
101
        pts = roi.coordinates()
102
        ax[0,1].fill(pts[:,0], pts[:,1], facecolor='none', edgecolor=cc, linewidth=1)
103
        ax[1,1].fill(pts[:,0], pts[:,1], facecolor='none', edgecolor=cc, linewidth=1)
104

105

106
    plt.tight_layout()
107
    plt.show()
108

109

110
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(12,3))
111
    ax = ax.ravel()
112
    ax[0].hist(a[channel]["intensity"], alpha=0.5, fc='red', ec='white', label='intensity', bins=30)
113
    ax[0].hist(a[channel]["background"], alpha=0.5, fc='blue', ec='white', label='background', bins=30)
114
    ax[0].set_xlabel("Pixel Value")
115
    ax[0].set_ylabel("Pixel Count")
116
    ax[0].legend()
117
    ax[1].hist(b[channel]["intensity"], alpha=0.5, fc='red', ec='white', label='intensity', bins=30)
118
    ax[1].hist(b[channel]["background"], alpha=0.5, fc='blue', ec='white', label='background', bins=30)
119
    ax[1].set_xlabel("Pixel Value")
120
    ax[1].set_ylabel("Pixel Count")
121
    ax[1].legend()
122
    plt.tight_layout()
123
    plt.show()
124
    res = {
125
        '实验组': res_a['SNR'],
126
        'mock': res_b['SNR']
127
    }
128
    return res

可视化检查分析的代码就会变得非常复杂。如上述代码所示，为了使用合适伪彩显示细胞图像，还得先自定义一段代码（后面可以封装起来，作为自己常用的代码模块进行 import）；然后用户原始数据中channel name 比较乱，经确认是同一个channel，所以还写了一个 channel_alt 的字典方便做规范化，以便后面批量处理。同时再通过一个 luts 的字典，方便自动给不同的 channel 上合适的伪彩。

由于数据一致性无法保证，所以在 analyzeSNR 函数中，我只能是对每张图自身的信噪比进行分析，这种可以称之为 local SNR。然后每次 analyzeSNR，则会对指定条件和细胞的实验组和对照组数据分别计算 local SNR，然后显示细胞指定通道荧光图像和明场图像，sig 和 bk的ROI区域，原始文件路径，pixelsize等等信息，方便检查。

1
n_plex = '2'
2
cell = '97h'
3
channel = '488'
4
replicate = 1
5
print(f"====== {n_plex}-plex, cell: {cell}, channel: {channel}, replicate: {replicate} ======")
6
res = analyzeSNR(df, n_plex, cell, channel, replicate, vmin=0, vmax=5000)

封装好函数之后，具体的调用如上述代码所示，结果如下图：

总之，数据可视化是保证数据分析结果准确性的必要检查手段。