YOLOv5-seg数据集制作、模型训练以及TensorRT部署

版本声明
一、数据集制作：图像 Json转txt
二、分割模型训练
三 tensorRT部署

版本声明

yolov5-seg:官方地址：https://github.com/ultralytics/yolov5/tree/v6.2
TensorRT：8.x.x
语言：C++
系统：ubuntu18.04

一、数据集制作：图像 Json转txt

前言：由于yolo仓中提供了标准coco的json文件转txt代码，因此需要将labelme的json文件转为coco json.

labelme JSON 转COCO JSON
使用labelme的CreatePolygons按钮开始绘制多边形，然后保存为json格式。

https://github.com/wkentaro/labelme/tree/master/examples/instance_segmentation.
在该链接中有个labelme2coco.py脚本，将该脚本下载下来后，执行以下指令即可。其中data_annotated是刚刚标注保存的json标签文件夹，data_dataset_coco是生成MS COCO数据类型的目录。
python labelme2coco.py data_annotated data_dataset_coco --labels label.txt

注意：由于自定义的数据集里面标签从0开始不包括背景直接转换会报错。修改72行。
在这里插入图片描述
生成三个文件JPEGImages、 Visualization 、annotations.json

JPEGImages中为原图，annotations.json里面是coco格式的文件：

在这里插入图片描述
Visualization中的图如下：
转换前需要自定义label.txt

COCO JSON转txt
coco128-seg提供了标准的训练格式，我们下载下来看看。[label]+[points]

下载链接link:https://github.com/ultralytics/JSON2YOLO
找到general_json2yolo.py文件，修改路径后直接运行会报错：
No such file or directory xxx/xxxxx/xxx.txt
排查过后发现是我们生成的annotations.json和标准的coco json有出入：(多了JPEGImages/)，修改代码313行：
标准的：

我们的：

再次运行，报下一个错误：
TypeError: must be real number, not NoneType
错误指向：
在这里插入图片描述
观察文件夹中，已经生成一个xxx.txt且有部分数据，打印line之后发现数据里有[None,point…point]这样的数据。大体知道了：应该是生成了背景类且没有标签。修改代码跳过这些标签：

再次运行报错消失，执行完毕没有报错。以为成功了打开txt一个最大的标签仅仅为13，应该是到15(我的数据集一共十六类)，中间有几类被消除了，排查错误。应该是这个地方把91–>80类的函数的问题。修改一番，两个地方。（若只修改第二处会出现-1标签，最高到14）

在这里插入图片描述
也可以只修改第二处：再修改代码：
下面展示一些 内联代码片。

cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1  # class
1

cls = coco80[ann['category_id']] if cls91to80 else ann['category_id'] - 1  # class
1

coco91_to_coco80_class()函数：

在这里插入图片描述
排除完毕以上错误时，再次运行，没有错误了。

import contextlib
import json

import cv2
import pandas as pd
from PIL import Image
from collections import defaultdict

from utils import *


# Convert INFOLKS JSON file into YOLO-format labels ----------------------------
def convert_infolks_json(name, files, img_path):
    # Create folders
    path = make_dirs()

    # Import json
    data = []
    for file in glob.glob(files):
        with open(file) as f:
            jdata = json.load(f)
            jdata['json_file'] = file
            data.append(jdata)

    # Write images and shapes
    name = path + os.sep + name
    file_id, file_name, wh, cat = [], [], [], []
    for x in tqdm(data, desc='Files and Shapes'):
        f = glob.glob(img_path + Path(x['json_file']).stem + '.*')[0]
        file_name.append(f)
        wh.append(exif_size(Image.open(f)))  # (width, height)
        cat.extend(a['classTitle'].lower() for a in x['output']['objects'])  # categories

        # filename
        with open(name + '.txt', 'a') as file:
            file.write('%s\n' % f)

    # Write *.names file
    names = sorted(np.unique(cat))
    # names.pop(names.index('Missing product'))  # remove
    with open(name + '.names', 'a') as file:
        [file.write('%s\n' % a) for a in names]

    # Write labels file
    for i, x in enumerate(tqdm(data, desc='Annotations')):
        label_name = Path(file_name[i]).stem + '.txt'

        with open(path + '/labels/' + label_name, 'a') as file:
            for a in x['output']['objects']:
                # if a['classTitle'] == 'Missing product':
                #    continue  # skip

                category_id = names.index(a['classTitle'].lower())

                # The INFOLKS bounding box format is [x-min, y-min, x-max, y-max]
                box = np.array(a['points']['exterior'], dtype=np.float32).ravel()
                box[[0, 2]] /= wh[i][0]  # normalize x by width
                box[[1, 3]] /= wh[i][1]  # normalize y by height
                box = [box[[0, 2]].mean(), box[[1, 3]].mean(), box[2] - box[0], box[3] - box[1]]  # xywh
                if (box[2] > 0.) and (box[3] > 0.):  # if w > 0 and h > 0
                    file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))

    # Split data into train, test, and validate files
    split_files(name, file_name)
    write_data_data(name + '.data', nc=len(names))
    print(f'Done. Output saved to {os.getcwd() + os.sep + path}')


# Convert vott JSON file into YOLO-format labels -------------------------------
def convert_vott_json(name, files, img_path):
    # Create folders
    path = make_dirs()
    name = path + os.sep + name

    # Import json
    data = []
    for file in glob.glob(files):
        with open(file) as f:
            jdata = json.load(f)
            jdata['json_file'] = file
            data.append(jdata)

    # Get all categories
    file_name, wh, cat = [], [], []
    for i, x in enumerate(tqdm(data, desc='Files and Shapes')):
        with contextlib.suppress(Exception):
            cat.extend(a['tags'][0] for a in x['regions'])  # categories

    # Write *.names file
    names = sorted(pd.unique(cat))
    with open(name + '.names', 'a') as file:
        [file.write('%s\n' % a) for a in names]

    # Write labels file
    n1, n2 = 0, 0
    missing_images = []
    for i, x in enumerate(tqdm(data, desc='Annotations')):

        f = glob.glob(img_path + x['asset']['name'] + '.jpg')
        if len(f):
            f = f[0]
            file_name.append(f)
            wh = exif_size(Image.open(f))  # (width, height)

            n1 += 1
            if (len(f) > 0) and (wh[0] > 0) and (wh[1] > 0):
                n2 += 1

                # append filename to list
                with open(name + '.txt', 'a') as file:
                    file.write('%s\n' % f)

                # write labelsfile
                label_name = Path(f).stem + '.txt'
                with open(path + '/labels/' + label_name, 'a') as file:
                    for a in x['regions']:
                        category_id = names.index(a['tags'][0])

                        # The INFOLKS bounding box format is [x-min, y-min, x-max, y-max]
                        box = a['boundingBox']
                        box = np.array([box['left'], box['top'], box['width'], box['height']]).ravel()
                        box[[0, 2]] /= wh[0]  # normalize x by width
                        box[[1, 3]] /= wh[1]  # normalize y by height
                        box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2], box[3]]  # xywh

                        if (box[2] > 0.) and (box[3] > 0.):  # if w > 0 and h > 0
                            file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))
        else:
            missing_images.append(x['asset']['name'])

    print('Attempted %g json imports, found %g images, imported %g annotations successfully' % (i, n1, n2))
    if len(missing_images):
        print('WARNING, missing images:', missing_images)

    # Split data into train, test, and validate files
    split_files(name, file_name)
    print(f'Done. Output saved to {os.getcwd() + os.sep + path}')


# Convert ath JSON file into YOLO-format labels --------------------------------
def convert_ath_json(json_dir):  # dir contains json annotations and images
    # Create folders
    dir = make_dirs()  # output directory

    jsons = []
    for dirpath, dirnames, filenames in os.walk(json_dir):
        jsons.extend(
            os.path.join(dirpath, filename)
            for filename in [
                f for f in filenames if f.lower().endswith('.json')
            ]
        )

    # Import json
    n1, n2, n3 = 0, 0, 0
    missing_images, file_name = [], []
    for json_file in sorted(jsons):
        with open(json_file) as f:
            data = json.load(f)

        # # Get classes
        # try:
        #     classes = list(data['_via_attributes']['region']['class']['options'].values())  # classes
        # except:
        #     classes = list(data['_via_attributes']['region']['Class']['options'].values())  # classes

        # # Write *.names file
        # names = pd.unique(classes)  # preserves sort order
        # with open(dir + 'data.names', 'w') as f:
        #     [f.write('%s\n' % a) for a in names]

        # Write labels file
        for x in tqdm(data['_via_img_metadata'].values(), desc=f'Processing {json_file}'):
            image_file = str(Path(json_file).parent / x['filename'])
            f = glob.glob(image_file)  # image file
            if len(f):
                f = f[0]
                file_name.append(f)
                wh = exif_size(Image.open(f))  # (width, height)

                n1 += 1  # all images
                if len(f) > 0 and wh[0] > 0 and wh[1] > 0:
                    label_file = dir + 'labels/' + Path(f).stem + '.txt'

                    nlabels = 0
                    try:
                        with open(label_file, 'a') as file:  # write labelsfile
                            # try:
                            #     category_id = int(a['region_attributes']['class'])
                            # except:
                            #     category_id = int(a['region_attributes']['Class'])
                            category_id = 0  # single-class

                            for a in x['regions']:
                                # bounding box format is [x-min, y-min, x-max, y-max]
                                box = a['shape_attributes']
                                box = np.array([box['x'], box['y'], box['width'], box['height']],
                                               dtype=np.float32).ravel()
                                box[[0, 2]] /= wh[0]  # normalize x by width
                                box[[1, 3]] /= wh[1]  # normalize y by height
                                box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2],
                                       box[3]]  # xywh (left-top to center x-y)

                                if box[2] > 0. and box[3] > 0.:  # if w > 0 and h > 0
                                    file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))
                                    n3 += 1
                                    nlabels += 1

                        if nlabels == 0:  # remove non-labelled images from dataset
                            os.system(f'rm {label_file}')
                            # print('no labels for %s' % f)
                            continue  # next file

                        # write image
                        img_size = 4096  # resize to maximum
                        img = cv2.imread(f)  # BGR
                        assert img is not None, 'Image Not Found ' + f
                        r = img_size / max(img.shape)  # size ratio
                        if r < 1:  # downsize if necessary
                            h, w, _ = img.shape
                            img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA)

                        ifile = dir + 'images/' + Path(f).name
                        if cv2.imwrite(ifile, img):  # if success append image to list
                            with open(dir + 'data.txt', 'a') as file:
                                file.write('%s\n' % ifile)
                            n2 += 1  # correct images

                    except Exception:
                        os.system(f'rm {label_file}')
                        print(f'problem with {f}')

            else:
                missing_images.append(image_file)

    nm = len(missing_images)  # number missing
    print('\nFound %g JSONs with %g labels over %g images. Found %g images, labelled %g images successfully' %
          (len(jsons), n3, n1, n1 - nm, n2))
    if len(missing_images):
        print('WARNING, missing images:', missing_images)

    # Write *.names file
    names = ['knife']  # preserves sort order
    with open(dir + 'data.names', 'w') as f:
        [f.write('%s\n' % a) for a in names]

    # Split data into train, test, and validate files
    split_rows_simple(dir + 'data.txt')
    write_data_data(dir + 'data.data', nc=1)
    print(f'Done. Output saved to {Path(dir).absolute()}')


def convert_coco_json(json_dir='../coco/annotations/', use_segments=False, cls91to80=False):
    save_dir = make_dirs()  # output directory
    coco80 = coco91_to_coco80_class()

    # Import json
    for json_file in sorted(Path(json_dir).resolve().glob('*.json')):
        fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '')  # folder name
        fn.mkdir()
        with open(json_file) as f:
            data = json.load(f)
            print(data)

        # Create image dict
        images = {'%g' % x['id']: x for x in data['images']}
        # Create image-annotations dict
        imgToAnns = defaultdict(list)
        for ann in data['annotations']:
            imgToAnns[ann['image_id']].append(ann)

        # Write labels file
        for img_id, anns in tqdm(imgToAnns.items(), desc=f'Annotations {json_file}'):
            img = images['%g' % img_id]
            h, w, f = img['height'], img['width'], img['file_name']

            bboxes = []
            segments = []
            for ann in anns:
                if ann['iscrowd']:
                    continue
                # The COCO box format is [top left x, top left y, width, height]
                box = np.array(ann['bbox'], dtype=np.float64)
                box[:2] += box[2:] / 2  # xy top-left corner to center
                box[[0, 2]] /= w  # normalize x
                box[[1, 3]] /= h  # normalize y
                if box[2] <= 0 or box[3] <= 0:  # if w <= 0 and h <= 0
                    continue

                #cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1  # class
                '''这个地方把91类别转80类别关了，因为自己的建立的数据集不需要转变'''
                '''直接将cls=category_id'''
                cls = ann['category_id']
                box = [cls] + box.tolist()
                if box not in bboxes:
                    bboxes.append(box)
                # Segments
                if use_segments:
                    if len(ann['segmentation']) > 1:
                        s = merge_multi_segment(ann['segmentation'])
                        s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
                    else:
                        s = [j for i in ann['segmentation'] for j in i]  # all segments concatenated
                        s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
                    s = [cls] + s
                    if s not in segments:
                        segments.append(s)

            # Write
            print("fn/f==>",fn/f[11:])
            print("fn==>",fn)
            print("f==>",f)
            with open((fn / f[11:]).with_suffix('.txt'), 'a') as file:
                print(len(bboxes))
                for i in range(len(bboxes)):
                    print("seg:",segments)
                    line = *(segments[i] if use_segments else bboxes[i]),  # cls, box or segments
                    print("line:==>",line)
                    if(line[0]==None):
                        continue
                    file.write(('%g ' * len(line)).rstrip() % line + '\n')


def min_index(arr1, arr2):
    """Find a pair of indexes with the shortest distance. 
    Args:
        arr1: (N, 2).
        arr2: (M, 2).
    Return:
        a pair of indexes(tuple).
    """
    dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
    return np.unravel_index(np.argmin(dis, axis=None), dis.shape)


def merge_multi_segment(segments):
    """Merge multi segments to one list.
    Find the coordinates with min distance between each segment,
    then connect these coordinates with one thin line to merge all 
    segments into one.

    Args:
        segments(List(List)): original segmentations in coco's json file.
            like [segmentation1, segmentation2,...], 
            each segmentation is a list of coordinates.
    """
    s = []
    segments = [np.array(i).reshape(-1, 2) for i in segments]
    idx_list = [[] for _ in range(len(segments))]

    # record the indexes with min distance between each segment
    for i in range(1, len(segments)):
        idx1, idx2 = min_index(segments[i - 1], segments[i])
        idx_list[i - 1].append(idx1)
        idx_list[i].append(idx2)

    # use two round to connect all the segments
    for k in range(2):
        # forward connection
        if k == 0:
            for i, idx in enumerate(idx_list):
                # middle segments have two indexes
                # reverse the index of middle segments
                if len(idx) == 2 and idx[0] > idx[1]:
                    idx = idx[::-1]
                    segments[i] = segments[i][::-1, :]

                segments[i] = np.roll(segments[i], -idx[0], axis=0)
                segments[i] = np.concatenate([segments[i], segments[i][:1]])
                # deal with the first segment and the last one
                if i in [0, len(idx_list) - 1]:
                    s.append(segments[i])
                else:
                    idx = [0, idx[1] - idx[0]]
                    s.append(segments[i][idx[0]:idx[1] + 1])

        else:
            for i in range(len(idx_list) - 1, -1, -1):
                if i not in [0, len(idx_list) - 1]:
                    idx = idx_list[i]
                    nidx = abs(idx[1] - idx[0])
                    s.append(segments[i][nidx:])
    return s


def delete_dsstore(path='../datasets'):
    # Delete apple .DS_store files
    from pathlib import Path
    files = list(Path(path).rglob('.DS_store'))
    print(files)
    for f in files:
        f.unlink()


if __name__ == '__main__':
    source = 'COCO'

    if source == 'COCO':
        convert_coco_json('写自己的路径',  # directory with *.json
                          use_segments=True,
                          cls91to80=False)

    elif source == 'infolks':  # Infolks https://infolks.info/
        convert_infolks_json(name='out',
                             files='../data/sm4/json/*.json',
                             img_path='../data/sm4/images/')

    elif source == 'vott':  # VoTT https://github.com/microsoft/VoTT
        convert_vott_json(name='data',
                          files='../../Downloads/athena_day/20190715/*.json',
                          img_path='../../Downloads/athena_day/20190715/')  # images folder

    elif source == 'ath':  # ath format
        convert_ath_json(json_dir='../../Downloads/athena/')  # images folder

    # zip results
    # os.system('zip -r ../coco.zip ../coco')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418

二、分割模型训练

训练的步骤和目标检测模型一致，下载模型 yolov5s-seg.pt，划分数据集、修改配置文件、不再详述了。
在这里插入图片描述

三 tensorRT部署

1 模型导出

使用官方的export.py文件直接导出时，netron可视化之后如下：
在这里插入图片描述
onnx比较混乱，需要进一步修改，所有的修改如下，参考杜老的仓link:https://github.com/shouxieai/learning-cuda-trt/tree/main：

# line 55 forward function in yolov5/models/yolo.py 
# bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
# x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
# modified into:

bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
bs = -1
ny = int(ny)
nx = int(nx)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

# line 70 in yolov5/models/yolo.py
#  z.append(y.view(bs, -1, self.no))
# modified into：
z.append(y.view(bs, self.na * ny * nx, self.no))

############# for yolov5-6.0 #####################
# line 65 in yolov5/models/yolo.py
# if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
#    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
# modified into:
if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

# disconnect for pytorch trace
anchor_grid = (self.anchors[i].clone() * self.stride[i]).view(1, -1, 1, 1, 2)

# line 70 in yolov5/models/yolo.py
# y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
# modified into:
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_grid  # wh

# line 73 in yolov5/models/yolo.py
# wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
# modified into:
wh = (y[..., 2:4] * 2) ** 2 * anchor_grid  # wh
############# for yolov5-6.0 #####################

# line 77 in yolov5/models/yolo.py
# return x if self.training else (torch.cat(z, 1), x)
# modified into:
return x if self.training else torch.cat(z, 1)

# line 52 in yolov5/export.py
# torch.onnx.export(dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # shape(1,3,640,640)
#                                'output': {0: 'batch', 1: 'anchors'}  # shape(1,25200,85)  修改为
# modified into:
torch.onnx.export(dynamic_axes={'images': {0: 'batch'},  # shape(1,3,640,640)
                                'output': {0: 'batch'}  # shape(1,25200,85) 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

由于版本不同修改的地方也稍有改变
修改后：
在这里插入图片描述
导出指令：python export.py --weights runs/train-seg/exp3/weights/best.pt --include onnx --dynamic

2 onnx转trtmodel

TRT::compile(
            mode,                       // FP32、FP16、INT8
            test_batch_size,            // max batch size
            onnx_file,                  // source 
            model_file,                 // save to
            {},
            int8process,
            "inference"
        );
1
2
3
4
5
6
7
8
9

在这里插入图片描述

3 推理部分

static void inference(Type type, TRT::Mode mode, const string& model_file){

    auto engine = TRT::load_infer(model_file);
    if(engine == nullptr){
        INFOE("Engine is nullptr");
        return;
    }
     auto image      = cv::imread("xxx.jpg");
    //绘制结果
    int col=image.cols; //1920
    int row=image.rows; //1080
  
    Mat mask_seg=image.clone();
    Mat mask_box=image.clone();//3 channel
    Mat cut_img=image.clone();
	auto input      = engine->tensor("images");   // engine->input(0);
    auto output     = engine->tensor("output0");  // engine->output(1);//[batch , 32130 , 53]
    auto output1    = engine->tensor("output1"); //  (batch, 32, 136, 240) ==>(16,32,136,240)
   	int num_bboxes  = output->size(1);//32130


    int num_classes = output->size(2) - 5 ;
    float confidence_threshold = 0.5;
    float nms_threshold        = 0.45;
    int MAX_IMAGE_BBOX         = 1000;
    int NUM_BOX_ELEMENT        = 39;  // left, top, right, bottom, confidence, class, keepflag ,32 mask
    int netWidth               = 640;
    int netHeigh               = 640;
    int segWidth               = 160;
    int segHeight              = 160;
    float mask_thresh          = 0.2;

    TRT::Tensor output_array_device(TRT::DataType::Float);
  

    // use max = 1 batch to inference.
    int max_batch_size = 1;
    input->resize_single_dim(0, max_batch_size).to_gpu();  
    output_array_device.resize(max_batch_size, 1 + MAX_IMAGE_BBOX * NUM_BOX_ELEMENT).to_gpu(); 
    output_array_device.set_stream(engine->get_stream());


    // set batch = 1  image
    int ibatch = 0;
    image_to_tensor(image, input, type, ibatch);

    // do async 异步
    engine->forward(false);

	float* output_ptr = output1->cpu<float>();
	//vector 2 mat
    int size[]={32,segHeight,segWidth};
    //cout<<"size"<
    cv::Mat mask_protos = cv::Mat_<float>(3,size,CV_8UC1);
    for(int iii=0;iii<32;iii++)
    {   
        //unchar *data=mask_protos.ptr(iii);
        for(int jjj=0;jjj<segHeight;jjj++)
        {
            //unchar *data2=data.ptr(jjj);
            for(int kkk=0;kkk<segWidth;kkk++)
            {
                //data2[kkk]=output_ptr[iii*136*240+jjj*240+kkk];
                mask_protos.at<float>(iii,jjj,kkk)=output_ptr[iii*segHeight*segWidth+jjj*segWidth+kkk];
            }
        }
    }
   


	float* d2i_affine_matrix = static_cast<float*>(input->get_workspace()->gpu());
    Yolo::decode_kernel_invoker(
        output->gpu<float>(ibatch),
        num_bboxes, num_classes,
        confidence_threshold,
        d2i_affine_matrix, output_array_device.gpu<float>(ibatch),
        MAX_IMAGE_BBOX, engine->get_stream()
    );
	
    Yolo::nms_kernel_invoker(
        output_array_device.gpu<float>(ibatch),
        nms_threshold, 
        MAX_IMAGE_BBOX, engine->get_stream()
    );
	    
	    
    float* parray = output_array_device.cpu<float>();
    int num_box = min(static_cast<int>(*parray), MAX_IMAGE_BBOX);//取最小值
  

    //new a mat  and new a vector
    Mat mask_proposals;
    vector<OutputSeg> f_output;
    vector<vector<float>>proposal;  //[23,32] output0  =>mask
    

    int num_box1=0;
    Rect holeImgRect(0, 0, col, row);

    for(int i = 0; i < num_box; ++i){ //遍历所有的框
        float* pbox  = parray + 1 + i * NUM_BOX_ELEMENT;//+1+i*7  1：表示这个数组的元素数量
        int keepflag = pbox[6];
        
       vector<float> temp;
       OutputSeg result;

        if(keepflag == 1 ){
            num_box1+=1;
            // left,      top,     right,  bottom, confidence,class, keepflag
            // pbox[0], pbox[1], pbox[2], pbox[3], pbox[4], pbox[5], pbox[6]
            float left       = pbox[0];
            float top        = pbox[1];
            float right      = pbox[2];
            float bottom     = pbox[3];
            float confidence = pbox[4];
            for(int ii=0;ii<32;ii++)
            {
                temp.push_back(pbox[ii+7]);
            }

            proposal.push_back(temp);
            result.id=pbox[5];
            result.confidence=pbox[4];
            cv::Rect rect(left, top, right-left, bottom-top);
            result.box=rect & holeImgRect;//; //x,y,w,h
            f_output.push_back(result);
            int label = static_cast<int>(pbox[5]);

            uint8_t b, g, r;
            tie(b, g, r) = iLogger::random_color(label);
            cv::rectangle(image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(b, g, r), 3);

            auto name    = cocolabels[label];
            auto caption = iLogger::format("%s %.2f", name, confidence);
            int width    = cv::getTextSize(caption, 0, 1, 1, nullptr).width + 10;
            cv::rectangle(image, cv::Point(left-3, top-33), cv::Point(left + width, top), cv::Scalar(b, g, r), -1);
            cv::putText(image, caption, cv::Point(left, top-5), 0, 1, cv::Scalar::all(0), 2, 16);
          }
//对应于python中的process_mask
    //vector2mat
    for (int i = 0; i < proposal.size(); ++i)
    {mask_proposals.push_back(Mat(proposal[i]).t());}

/获取 proto 也就是output1的输出


    //逻辑 GetMask
    Vec4d params;  //根据实际图片输入 和 onnx模型输入输出 计算的，此处直接写死
    params[0]=0.5;
    params[1]=0.5;
    params[2]=0.0;
    params[3]=2.0;
    Mat protos = mask_protos.reshape(0, {32,136 * 240});
    Mat matmulRes = ( mask_proposals * protos).t(); //23,32 * 32,32640 ==> 23,32640
    Mat masks = matmulRes.reshape(proposal.size(),{136,240}); //上一步骤作转置的原因：//Mat Mat::reshape(int cn,int rows=0) const cn:表示通道数（channels），如果设置为0，则表示通道不变；
    vector<Mat> maskChannels; //分离通道
	split(masks, maskChannels);
    for (int index = 0; index < f_output.size(); ++index) {
        Mat dest,mask;
        //sigmoid
        cv::exp(-maskChannels[index],dest);//e^x
        dest= 1.0/(1.0 + dest);
        //_netWidth = 960; _netHeight=544;  //ONNX图片输入宽度\高度  //	const int _segWidth = 240;
		Rect roi(int(params[2] / netWidth * segWidth), int(params[3] / netHeigh * segHeight), int(segWidth - params[2] / 2), int(segHeight- 0/2)); //136-params[3]/2最后一个参数改了 mask会有偏移
		dest = dest(roi);
		resize(dest, mask, cv::Size(col,row), INTER_LINEAR);//srcImgShape （1920，1080）//INTER_NEAREST 最近临插值  PYTHON中用的就是 INTER_LINEAR - 双线性插值
        //crop
		Rect temp_rect = f_output[index].box;
		mask = mask(temp_rect) > mask_thresh; //mask_threshg mask阈值
		f_output[index].boxMask =mask;
    }

    //DrawPred 绘制结果
    for (int i=0;i<f_output.size();i++)
    {
        int lf, tp,wd,hg;
        float confidence;
        lf=f_output[i].box.x;
        tp=f_output[i].box.y;
        wd=f_output[i].box.width;
        hg=f_output[i].box.height;
        confidence=f_output[i].confidence;
        
        int label = static_cast<int>(f_output[i].id);
        
        //生成随机颜色
        uint8_t b, g, r;
        tie(b, g, r) = iLogger::random_color(label);
        cv::rectangle(mask_box, cv::Point(lf, tp), cv::Point(lf+wd, tp+hg), cv::Scalar(b, g, r), 3);//绘制box框

        auto name    = cocolabels[label];
        auto caption = iLogger::format("%s %.2f", name, confidence);
        int width    = cv::getTextSize(caption, 0, 1, 1, nullptr).width + 10;
        cv::rectangle(mask_box, cv::Point(lf-3, tp-33), cv::Point(lf + width, tp), cv::Scalar(b, g, r), -1);//绘制label的框
        cv::putText(mask_box, caption, cv::Point(lf, tp-5), 0, 1, cv::Scalar::all(0), 2, 16);
		mask_seg(f_output[i].box).setTo(cv::Scalar(b, g, r), f_output[i].boxMask);//绘制mask
       
    }
    addWeighted(mask_box, 0.6, mask_seg, 0.4, 0, mask_box); //将mask加在原图上面 
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

效果展示：
在这里插入图片描述

相关阅读:
物联网安防-园区周界安防技术实现
 js 判断两个object相等
 10.26 来 CNCC 2023 T16 展位，TDengine 精美周边等你来领！
Maven了解&运用配置
 为什么NFT在加密货币的文艺复兴中不可或缺？
TypeReference使用笔记
 电容笔品牌哪个牌子好？电容笔质量最好的品牌推荐
 2-1两数之和
 【题解】P8817 [CSP-S 2022] 假期计划（bfs，dfs）
【Glide 】框架在内存使用方面的优化
原文地址：https://blog.csdn.net/qq_40198848/article/details/127937648