使用Grok编程真的太香了

之前想写一个函数来计算一个三维物体的最邻近间隙距离并进行可视化。发现Grok提供的代码最简洁，可视化效果最好。这里具体记录下这种99%的工作量依靠AI完成的编程过程。

生成代码用到的prompt详见👆。使用 Grok 3 得到的第一版代码如下，我仅修改了测试数据，并打印执行耗时。

1
import numpy as np
2
import pandas as pd
3
from skimage import io, measure
4
from scipy.spatial import distance
5
from scipy.ndimage import distance_transform_edt
6
import plotly.graph_objects as go
7
from itertools import combinations
8
import time
9

10

11
def compute_nearest_distances(mask_path):
12
    # Read the mask image
13
    mask = io.imread(mask_path)
14

15

16
    # Get unique labels (excluding background 0)
17
    labels = np.unique(mask)[1:]  # Exclude 0 (background)
18

19

20
    # Initialize lists for DataFrame
21
    data = {
22
        'label': [], 'xc': [], 'yc': [], 'zc': [],
23
        'd': [], 'label2': [], 'xc2': [], 'yc2': [], 'zc2': [],
24
        'xe1': [], 'ye1': [], 'ze1': [], 'xe2': [], 'ye2': [], 'ze2': []
25
    }
26

27

28
    # Compute properties for all regions
29
    props = measure.regionprops(mask)
30
    label_to_prop = {prop.label: prop for prop in props if prop.label > 0}
31

32

33
    # Function to compute boundary points
34
    def get_boundary_points(label_mask):
35
        # Create binary mask for the object
36
        binary = (label_mask > 0).astype(np.uint8)
37
        # Compute distance transform
38
        dist = distance_transform_edt(binary)
39
        # Boundary is where distance is small (e.g., <=1)
40
        boundary = (dist <= 1) & binary
41
        return np.where(boundary)
42

43

44
    # Process each pair of objects
45
    for label1, label2 in combinations(labels, 2):
46
        # Get binary masks for both objects
47
        mask1 = (mask == label1).astype(np.uint8)
48
        mask2 = (mask == label2).astype(np.uint8)
49

50

51
        # Get boundary points
52
        z1, y1, x1 = get_boundary_points(mask1)
53
        coords1 = np.vstack((x1, y1, z1)).T
54
        z2, y2, x2 = get_boundary_points(mask2)
55
        coords2 = np.vstack((x2, y2, z2)).T
56

57

58
        # Compute pairwise distances between boundary points
59
        dist_matrix = distance.cdist(coords1, coords2, 'euclidean')
60
        min_dist = dist_matrix.min()
61
        min_idx = dist_matrix.argmin()
62
        idx1, idx2 = np.unravel_index(min_idx, dist_matrix.shape)
63

64

65
        # Get closest boundary points
66
        point1 = coords1[idx1]
67
        point2 = coords2[idx2]
68

69

70
        # Get centroid for both objects
71
        prop1 = label_to_prop[label1]
72
        prop2 = label_to_prop[label2]
73
        centroid1 = prop1.centroid  # (z, y, x)
74
        centroid2 = prop2.centroid
75

76

77
        # Store data for label1
78
        data['label'].append(label1)
79
        data['xc'].append(centroid1[2])
80
        data['yc'].append(centroid1[1])
81
        data['zc'].append(centroid1[0])
82
        data['d'].append(min_dist)
83
        data['label2'].append(label2)
84
        data['xc2'].append(centroid2[2])
85
        data['yc2'].append(centroid2[1])
86
        data['zc2'].append(centroid2[0])
87
        data['xe1'].append(point1[0])
88
        data['ye1'].append(point1[1])
89
        data['ze1'].append(point1[2])
90
        data['xe2'].append(point2[0])
91
        data['ye2'].append(point2[1])
92
        data['ze2'].append(point2[2])
93

94

95
        # Store data for label2 (symmetric)
96
        data['label'].append(label2)
97
        data['xc'].append(centroid2[2])
98
        data['yc'].append(centroid2[1])
99
        data['zc'].append(centroid2[0])
100
        data['d'].append(min_dist)
101
        data['label2'].append(label1)
102
        data['xc2'].append(centroid1[2])
103
        data['yc2'].append(centroid1[1])
104
        data['zc2'].append(centroid1[0])
105
        data['xe1'].append(point2[0])
106
        data['ye1'].append(point2[1])
107
        data['ze1'].append(point2[2])
108
        data['xe2'].append(point1[0])
109
        data['ye2'].append(point1[1])
110
        data['ze2'].append(point1[2])
111

112

113
    # Create DataFrame
114
    df = pd.DataFrame(data)
115

116

117
    # For each label, select the row with minimum distance
118
    df = df.loc[df.groupby('label')['d'].idxmin()]
119

120

121
    # Reset index
122
    df = df.reset_index(drop=True)
123

124

125
    return df, mask, label_to_prop
126

127

128
def visualize_nearest_distances(df, mask, label_to_prop):
129
    fig = go.Figure()
130

131

132
    # Colors for different objects
133
    colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta']
134

135

136
    # Plot each object
137
    for idx, label in enumerate(df['label']):
138
        # Get binary mask for the object
139
        binary_mask = (mask == label).astype(np.uint8)
140

141

142
        # Get surface points using marching cubes
143
        verts, faces, _, _ = measure.marching_cubes(binary_mask, level=0.5)
144

145

146
        # Create surface plot
147
        fig.add_trace(go.Mesh3d(
148
            x=verts[:, 2], y=verts[:, 1], z=verts[:, 0],
149
            i=faces[:, 0], j=faces[:, 1], k=faces[:, 2],
150
            color=colors[idx % len(colors)],
151
            opacity=0.5,
152
            name=f'Object {label}'
153
        ))
154

155

156
        # Add label text at centroid
157
        prop = label_to_prop[label]
158
        centroid = prop.centroid
159
        fig.add_trace(go.Scatter3d(
160
            x=[centroid[2]], y=[centroid[1]], z=[centroid[0]],
161
            mode='text',
162
            text=[f'Label {label}'],
163
            textposition='middle center',
164
            showlegend=False
165
        ))
166

167

168
        # Add line for nearest distance
169
        row = df[df['label'] == label].iloc[0]
170
        fig.add_trace(go.Scatter3d(
171
            x=[row['xe1'], row['xe2']],
172
            y=[row['ye1'], row['ye2']],
173
            z=[row['ze1'], row['ze2']],
174
            mode='lines',
175
            line=dict(color='black', width=5),
176
            name=f'Distance {label}-{row["label2"]}'
177
        ))
178

179

180
    # Update layout
181
    fig.update_layout(
182
        scene=dict(
183
            xaxis_title='X',
184
            yaxis_title='Y',
185
            zaxis_title='Z',
186
            aspectmode='data'
187
        ),
188
        title='3D Objects with Nearest Neighbor Distances'
189
    )
190

191

192
    return fig
193

194

195
# Example usage
196
if __name__ == "__main__":
197
    # Path to your mask.tif file
198
    mask_path = 'test-mask.tif'
199
    t0 = time.time()
200
    # Compute distances and get DataFrame
201
    df, mask, label_to_prop = compute_nearest_distances(mask_path)
202
    print(f"Time usage: {time.time()-t0:.2f} s.")    # Time usage: 86.95 s.
203
    # Print DataFrame
204
    # print(df)
205
    df.to_csv("test-mask-gap-distance-results.csv", index=None)
206

207

208
    # Create and show visualization
209
    fig = visualize_nearest_distances(df, mask, label_to_prop)
210
    fig.show()

这段代码在我的机器（AMD R9 7900X）中执行耗时 86.9秒，这个有点难以接受，因为 test-mask.tif 的shape是(9,100,100)，而我真正待分析的数据是 (9, 2560, 2560)，所以必须要优化性能的。

然后我就继续提交prompt:

1
如何优化下面这段代码以提升性能：
2

3
...(code)...

Grok很快就更新了一般代码，我再复制下来尝试运行，发现报错。但是没关系，我把报错直接复制粘贴反馈给Grok，不需要任何添油加醋。如：

TypeError: regionprops() got an unexpected keyword argument 'properties'

joblib.externals.loky.process_executor.BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.

解决上述两个报错后，代码就能跑了。使用测试数据测性能，耗时仅需 3.37 秒。更新后的代码如下：

1
import numpy as np
2
import pandas as pd
3
from skimage import io, measure
4
from scipy.spatial import KDTree
5
from scipy.ndimage import binary_erosion
6
from joblib import Parallel, delayed
7
from itertools import combinations
8
import plotly.graph_objects as go
9
import time
10

11

12

13

14
def get_boundary_points(label_mask, max_points=1000):
15
    binary = (label_mask > 0).astype(np.uint8)
16
    eroded = binary_erosion(binary, iterations=1)
17
    boundary = binary & ~eroded
18
    z, y, x = np.where(boundary)
19
    if len(x) > max_points:
20
        indices = np.random.choice(len(x), max_points, replace=False)
21
        x, y, z = x[indices], y[indices], z[indices]
22
    return z, y, x
23

24

25
def process_pair(label1, label2, mask1, mask2, centroid1, centroid2):
26
    # Compute boundary points
27
    z1, y1, x1 = get_boundary_points(mask1)
28
    coords1 = np.vstack((x1, y1, z1)).T
29
    z2, y2, x2 = get_boundary_points(mask2)
30
    coords2 = np.vstack((x2, y2, z2)).T
31

32

33
    # Use KDTree for efficient distance calculation
34
    tree = KDTree(coords2)
35
    min_dist, idx2 = tree.query(coords1)
36
    idx1 = np.argmin(min_dist)
37
    min_dist = min_dist[idx1]
38
    point1 = coords1[idx1]
39
    point2 = coords2[idx2[idx1]]
40

41

42
    # Return data for both directions
43
    return [
44
        (label1, centroid1[2], centroid1[1], centroid1[0], min_dist, label2, centroid2[2], centroid2[1], centroid2[0], point1[0], point1[1], point1[2], point2[0], point2[1], point2[2]),
45
        (label2, centroid2[2], centroid2[1], centroid2[0], min_dist, label1, centroid1[2], centroid1[1], centroid1[0], point2[0], point2[1], point2[2], point1[0], point1[1], point1[2])
46
    ]
47

48

49
def compute_nearest_distances(mask_path):
50
    # Read the mask image
51
    mask = io.imread(mask_path)
52
    labels = np.unique(mask)[1:]  # Exclude background
53

54

55
    # Compute region properties (compatible with older scikit-image versions)
56
    props = measure.regionprops(mask)
57
    label_to_prop = {prop.label: prop for prop in props if prop.label > 0}
58

59

60
    # Precompute binary masks and centroids
61
    label_masks = {label: (mask == label).astype(np.uint8) for label in labels}
62
    label_to_centroid = {label: prop.centroid for label, prop in label_to_prop.items()}
63

64

65
    # Parallel processing
66
    results = Parallel(n_jobs=-1, backend='loky')(
67
        delayed(process_pair)(
68
            label1,
69
            label2,
70
            label_masks[label1],
71
            label_masks[label2],
72
            label_to_centroid[label1],
73
            label_to_centroid[label2]
74
        )
75
        for label1, label2 in combinations(labels, 2)
76
    )
77

78

79
    # Collect results
80
    n_pairs = len(results) * 2
81
    data = np.zeros((n_pairs, 15), dtype=np.float64)
82
    idx = 0
83
    for result in results:
84
        data[idx:idx+2] = result
85
        idx += 2
86

87

88
    # Create DataFrame
89
    df = pd.DataFrame(data, columns=['label', 'xc', 'yc', 'zc', 'd', 'label2', 'xc2', 'yc2', 'zc2', 'xe1', 'ye1', 'ze1', 'xe2', 'ye2', 'ze2'])
90
    df = df.loc[df.groupby('label')['d'].idxmin()].reset_index(drop=True)
91

92

93
    return df, mask, label_to_prop
94

95

96

97

98
def visualize_nearest_distances(df, mask, label_to_prop):
99
    fig = go.Figure()
100

101

102
    # Colors for different objects
103
    colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta']
104

105

106
    # Plot each object
107
    for idx, label in enumerate(df['label']):
108
        # Get binary mask for the object
109
        binary_mask = (mask == label).astype(np.uint8)
110

111

112
        # Get surface points using marching cubes
113
        verts, faces, _, _ = measure.marching_cubes(binary_mask, level=0.5)
114

115

116
        # Create surface plot
117
        fig.add_trace(go.Mesh3d(
118
            x=verts[:, 2], y=verts[:, 1], z=verts[:, 0],
119
            i=faces[:, 0], j=faces[:, 1], k=faces[:, 2],
120
            color=colors[idx % len(colors)],
121
            opacity=0.5,
122
            name=f'Object {label}'
123
        ))
124

125

126
        # Add label text at centroid
127
        prop = label_to_prop[label]
128
        centroid = prop.centroid
129
        fig.add_trace(go.Scatter3d(
130
            x=[centroid[2]], y=[centroid[1]], z=[centroid[0]],
131
            mode='text',
132
            text=[f'Label {label}'],
133
            textposition='middle center',
134
            showlegend=False
135
        ))
136

137

138
        # Add line for nearest distance
139
        row = df[df['label'] == label].iloc[0]
140
        fig.add_trace(go.Scatter3d(
141
            x=[row['xe1'], row['xe2']],
142
            y=[row['ye1'], row['ye2']],
143
            z=[row['ze1'], row['ze2']],
144
            mode='lines',
145
            line=dict(color='black', width=5),
146
            name=f'Distance {label}-{row["label2"]}'
147
        ))
148

149

150
    # Update layout
151
    fig.update_layout(
152
        scene=dict(
153
            xaxis_title='X',
154
            yaxis_title='Y',
155
            zaxis_title='Z',
156
            aspectmode='data'
157
        ),
158
        title='3D Objects with Nearest Neighbor Distances'
159
    )
160

161

162
    return fig
163

164

165
# Example usage
166
if __name__ == "__main__":
167
    # Path to your mask.tif file
168
    mask_path = 'test-mask.tif'
169

170

171
    t0 = time.time()
172
    # Compute distances and get DataFrame
173
    df, mask, label_to_prop = compute_nearest_distances(mask_path)
174
    print(f"Time usage: {time.time()-t0:.2f} s.")
175
    # Time usage: 3.4 s for 9x100x100, 322.2 s for 9x256x256
176
    # Print DataFrame
177
    # print(df)
178
    df.to_csv("test-mask-gap-distance-results.csv", index=None)
179

180

181
    # Create and show visualization
182
    fig = visualize_nearest_distances(df, mask, label_to_prop)
183
    fig.show()

计算结果和可视化效果都和之前的一模一样。

经测试，输入mask.tif包含9x256x256这么多像素，具备一定可用性。如果是9x2560x2560会报内存错误。不过后面可以进一步优化，比如使用移动窗口的方式遍历全局，或者是按对象遍历，应该还是具有较好的可用性的。

我和Grok具体的聊天记录详见此链接（需科学上网）。