ICNet用于及时的语义分割
ICNet 被广泛应用于及时的语义分割范畴。它在处理图像数据时,能够以较高的服从进行语义分割操作,为相关范畴的研究和实际应用提供了有力的支持。ICNet 的及时性使其在浩繁场景中都具有很大的上风,例如在视频处理、自动驾驶等对及时性要求较高的范畴,ICNet 能够快速准确地对图像进行语义分割,为后续的决策和处理提供关键信息。
如果你对MindSpore感爱好,可以关注昇思MindSpore社区
一、环境准备
1.进入ModelArts官网
云平台资助用户快速创建和摆设模子,管理全周期AI工作流,选择下面的云平台以开始使用昇思MindSpore,获取安装下令,安装MindSpore2.0.0-alpha版本,可以在昇思教程中进入ModelArts官网
选择下方CodeLab立刻体验
等待环境搭建完成
2.使用CodeLab体验Notebook实例
选择ModelArts Upload Files上传Git文件,地址为GitHub - yfjcode/ICNet: mindspore icnet model
选择Kernel环境
切换至GPU环境,切换成第一个限时免费
进入昇思MindSpore官网,点击上方的安装
获取安装下令
回到Notebook中,在第一块代码前加入下令
- conda update -n base -c defaults conda
复制代码
安装MindSpore 2.0 GPU版本
- conda install mindspore=2.0.0a0 -c mindspore -c conda-forge
复制代码
安装mindvision
安装下载download
二、应用体验
1.模子准备
根据原作者提示
环境准备与数据读取 本案例基于MindSpore-CPU版本实现,在CPU上完成模子训练。
案例实现所使用的数据:Cityscape Dataset Website
为了下载数据集,我们起首需要在Cityscapes数据集官网进行注册,而且最好使用edu教诲邮箱进行注册,此后等待几天,就可以下载数据集了,这里我们下载了两个文件:gtFine_trainvaltest.zip和leftImg8bit_trainvaltest.zip (11GB)。
下载完成后,我们对数据集压缩文件进行解压,文件的目录布局如下所示。
由于我们是在CPU上跑得,本来数据集有1个多G,全部拿来跑得话,很轻易掉卡,故我们就选择一个都会的一些图片完成。
起首要处理数据,生成对应的.mindrecord 和 .mindrecord.db文件
需要注意的是,在生成这两个文件之前,我们要创建一个文件夹,用cityscapes_mindrecord命名,放在cityscapes文件夹的同级目录下: 而且要保持cityscapes_mindrecord文件夹内里为空
下面是构建数据集的代码:注意,要保持cityscapes_mindrecord文件夹内里为空,报错可能是文件夹已经有文件了,文件夹地址为:/home/ma-user/work/ICNet/data/cityscapes_mindrecord
需要删掉/data/cityscapes_mindrecord文件

删掉文件后,需要修改路径,删掉/home/ma-user/work/ICNet,用./替换,之后直接运行代码块即可
- """Prepare Cityscapes dataset"""
- import os
- import random
- import argparse
- import numpy as np
- from PIL import Image
- from PIL import ImageOps
- from PIL import ImageFilter
- import mindspore.dataset as de
- from mindspore.mindrecord import FileWriter
- import mindspore.dataset.vision as transforms
- import mindspore.dataset.transforms as tc
- def _get_city_pairs(folder, split='train'):
- """Return two path arrays of data set img and mask"""
- def get_path_pairs(image_folder, masks_folder):
- image_paths = []
- masks_paths = []
- for root, _, files in os.walk(image_folder):
- for filename in files:
- if filename.endswith('.png'):
- imgpath = os.path.join(root, filename)
- foldername = os.path.basename(os.path.dirname(imgpath))
- maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
- maskpath = os.path.join(masks_folder, foldername, maskname)
- if os.path.isfile(imgpath) and os.path.isfile(maskpath):
- image_paths.append(imgpath)
- masks_paths.append(maskpath)
- else:
- print('cannot find the mask or image:', imgpath, maskpath)
- print('Found {} images in the folder {}'.format(len(image_paths), image_folder))
- return image_paths, masks_paths
- if split in ('train', 'val'):
- # "./Cityscapes/leftImg8bit/train" or "./Cityscapes/leftImg8bit/val"
- img_folder = os.path.join(folder, 'leftImg8bit/' + split)
- # "./Cityscapes/gtFine/train" or "./Cityscapes/gtFine/val"
- mask_folder = os.path.join(folder, 'gtFine/' + split)
- # The order of img_paths and mask_paths is one-to-one correspondence
- img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
- return img_paths, mask_paths
- def _sync_transform(img, mask):
- """img and mask augmentation"""
- a = random.Random()
- a.seed(1234)
- base_size = 1024
- crop_size = 960
- # random mirror
- if random.random() < 0.5:
- img = img.transpose(Image.FLIP_LEFT_RIGHT)
- mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
- crop_size = crop_size
- # random scale (short edge)
- short_size = random.randint(int(base_size * 0.5), int(base_size * 2.0))
- w, h = img.size
- if h > w:
- ow = short_size
- oh = int(1.0 * h * ow / w)
- else:
- oh = short_size
- ow = int(1.0 * w * oh / h)
- img = img.resize((ow, oh), Image.BILINEAR)
- mask = mask.resize((ow, oh), Image.NEAREST)
- # pad crop
- if short_size < crop_size:
- padh = crop_size - oh if oh < crop_size else 0
- padw = crop_size - ow if ow < crop_size else 0
- img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
- mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
- # random crop crop_size
- w, h = img.size
- x1 = random.randint(0, w - crop_size)
- y1 = random.randint(0, h - crop_size)
- img = img.crop((x1, y1, x1 + crop_size, y1 + crop_size))
- mask = mask.crop((x1, y1, x1 + crop_size, y1 + crop_size))
- # gaussian blur as in PSP
- if random.random() < 0.5:
- img = img.filter(ImageFilter.GaussianBlur(radius=random.random()))
- # final transform
- output = _img_mask_transform(img, mask)
- return output
- def _class_to_index(mask):
- """class to index"""
- # Reference:
- # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py
- _key = np.array([-1, -1, -1, -1, -1, -1,
- -1, -1, 0, 1, -1, -1,
- 2, 3, 4, -1, -1, -1,
- 5, -1, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15,
- -1, -1, 16, 17, 18])
- # [-1, ..., 33]
- _mapping = np.array(range(-1, len(_key) - 1)).astype('int32')
- # assert the value
- values = np.unique(mask)
- for value in values:
- assert value in _mapping
- # Get the index of each pixel value in the mask corresponding to _mapping
- index = np.digitize(mask.ravel(), _mapping, right=True)
- # According to the above index, according to _key, get the corresponding
- return _key[index].reshape(mask.shape)
- def _img_transform(img):
- return np.array(img)
- def _mask_transform(mask):
- target = _class_to_index(np.array(mask).astype('int32'))
- return np.array(target).astype('int32')
- def _img_mask_transform(img, mask):
- """img and mask transform"""
- input_transform = tc.Compose([
- transforms.ToTensor(),
- transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225), is_hwc=False)])
- img = _img_transform(img)
- mask = _mask_transform(mask)
- img = input_transform(img)
- img = np.array(img).astype(np.float32)
- mask = np.array(mask).astype(np.float32)
- return (img, mask)
- def data_to_mindrecord_img(prefix='cityscapes-2975.mindrecord', file_num=1,
- root='./', split='train', mindrecord_dir="./"):
- """to mindrecord"""
- mindrecord_path = os.path.join(mindrecord_dir, prefix)
- writter = FileWriter(mindrecord_path, file_num)
- img_paths, mask_paths = _get_city_pairs(root, split)
- cityscapes_json = {
- "images": {"type": "int32", "shape": [1024, 2048, 3]},
- "mask": {"type": "int32", "shape": [1024, 2048]},
- }
- writter.add_schema(cityscapes_json, "cityscapes_json")
- images_files_num = len(img_paths)
- for index in range(images_files_num):
- img = Image.open(img_paths[index]).convert('RGB')
- img = np.array(img, dtype=np.int32)
- mask = Image.open(mask_paths[index])
- mask = np.array(mask, dtype=np.int32)
- row = {"images": img, "mask": mask}
- # print("images",img, "mask", mask)
- # print("images_files_num,index, img_paths[index],mask_paths[index]",images_files_num,index,img_paths[index],mask_paths[index])
- if (index + 1) % 10 == 0:
- print("writing {}/{} into mindrecord".format(index + 1, images_files_num))
- writter.write_raw_data([row])
- writter.commit()
- def get_Image_crop_nor(img, mask):
- image = np.uint8(img)
- mask = np.uint8(mask)
- image = Image.fromarray(image)
- mask = Image.fromarray(mask)
- output = _sync_transform(image, mask)
- return output
- def create_icnet_dataset(mindrecord_file, batch_size=16, device_num=1, rank_id=0):
- """create dataset for training"""
- a = random.Random()
- a.seed(1234)
- ds = de.MindDataset(mindrecord_file, columns_list=["images", "mask"],
- num_shards=device_num, shard_id=rank_id, shuffle=True)
- ds = ds.map(operations=get_Image_crop_nor, input_columns=["images", "mask"], output_columns=["image", "masks"])
- ds = ds.batch(batch_size=batch_size, drop_remainder=False)
- return ds
- dataset_path="./data/cityscapes/"
- mindrecord_path="./data/cityscapes_mindrecord/"
- data_to_mindrecord_img(root=dataset_path, mindrecord_dir=mindrecord_path)
- # if __name__ == '__main__':
- # parser = argparse.ArgumentParser(description="dataset_to_mindrecord")
- # parser.add_argument("--dataset_path", type=str, default="/home/ma-user/work/ICNet/data/cityscapes/", help="dataset path")
- # parser.add_argument("--mindrecord_path", type=str, default="/home/ma-user/work/ICNet/data/cityscapes_mindrecord/",
- # help="mindrecord_path")
- # args_opt = parser.parse_args()
- # data_to_mindrecord_img(root=args_opt.dataset_path, mindrecord_dir=args_opt.mindrecord_path)
复制代码 可以看到已经生成的对应的数据集文件,然后我们创建稍后用到的数据

注意修改路径
- prefix = 'cityscapes-2975.mindrecord'
- train_mindrecord_dir="/home/ma-user/work/ICNet/data/cityscapes_mindrecord"
- train_train_batch_size_percard=4
- device_num=1
- rank_id=0
- mindrecord_dir = train_mindrecord_dir
- mindrecord_file = os.path.join(mindrecord_dir, prefix)
- print("mindrecord_file",mindrecord_file)
- # print("cfg['train'][‘’train_batch_size_percard‘]",cfg['train']["train_batch_size_percard"])
- dataset = create_icnet_dataset(mindrecord_file, batch_size=train_train_batch_size_percard,
- device_num=device_num, rank_id=rank_id)
- print(dataset)
复制代码
2.模子构建
创建需要训练模子的一些参数:(这里只是展示,不运行,具体参数运行在背面)
1.Model
model: name: "icnet" backbone: "resnet50v1" base_size: 1024 # during augmentation, shorter size will be resized between [base_size0.5, base_size2.0] crop_size: 960 # end of augmentation, crop to training
2.Optimizer
optimizer: init_lr: 0.02 momentum: 0.9 weight_decay: 0.0001
3.Training
train: train_batch_size_percard: 4 valid_batch_size: 1 cityscapes_root: "/data/cityscapes/" epochs: 10 val_epoch: 1 # run validation every val-epoch ckpt_dir: "./ckpt/" # ckpt and training log will be saved here mindrecord_dir: '/home/ma-user/work/ICNet/data/cityscapes_mindrecord' pretrained_model_path: '/home/ma-user/work/ICNet/root/cacheckpt/resnet50-icnet-150_2.ckpt' save_checkpoint_epochs: 5 keep_checkpoint_max: 10
4.Valid
test: ckpt_path: "" # set the pretrained model path correctly
注意修改路径
- train_epochs=10
- train_data_size = dataset.get_dataset_size()
- print("data_size", train_data_size)
- epoch = train_epochs
- project_path="/home/ma-user/work/ICNet/"
- train_pretrained_model_path="/home/ma-user/work/ICNet/root/cacheckpt/resnet50-icnet-150_2.ckpt"
复制代码
- import mindspore as ms
- import mindspore.nn as nn
- import mindspore.ops as ops
- from src.loss import ICNetLoss
- from src.models.resnet50_v1 import get_resnet50v1b
- __all__ = ['ICNetdc']
- class ICNetdc(nn.Cell):
- """Image Cascade Network"""
- def __init__(self, nclass=19, pretrained_path="", istraining=True, norm_layer=nn.SyncBatchNorm):
- super(ICNetdc, self).__init__()
- self.conv_sub1 = nn.SequentialCell(
- _ConvBNReLU(3, 32, 3, 2, norm_layer=norm_layer),
- _ConvBNReLU(32, 32, 3, 2, norm_layer=norm_layer),
- _ConvBNReLU(32, 64, 3, 2, norm_layer=norm_layer)
- )
- self.istraining = istraining
- self.ppm = PyramidPoolingModule()
- self.backbone = SegBaseModel(root=pretrained_path, istraining=istraining)
- self.head = _ICHead(nclass, norm_layer=norm_layer)
- self.loss = ICNetLoss()
- self.resize_bilinear = nn.ResizeBilinear()
- self.__setattr__('exclusive', ['conv_sub1', 'head'])
- def construct(self, x, y):
- """ICNet_construct"""
- if x.shape[0] != 1:
- x = x.squeeze()
- # sub 1
- x_sub1 = self.conv_sub1(x)
- h, w = x.shape[2:]
- # sub 2
- x_sub2 = self.resize_bilinear(x, size=(h / 2, w / 2))
- _, x_sub2, _, _ = self.backbone(x_sub2)
- # sub 4
- _, _, _, x_sub4 = self.backbone(x)
- # add PyramidPoolingModule
- x_sub4 = self.ppm(x_sub4)
- output = self.head(x_sub1, x_sub2, x_sub4)
- if self.istraining:
- outputs = self.loss(output, y)
- else:
- outputs = output
- return outputs
- class PyramidPoolingModule(nn.Cell):
- """PPM"""
- def __init__(self, pyramids=None):
- super(PyramidPoolingModule, self).__init__()
- self.avgpool = ops.ReduceMean(keep_dims=True)
- self.pool2 = nn.AvgPool2d(kernel_size=15, stride=15)
- self.pool3 = nn.AvgPool2d(kernel_size=10, stride=10)
- self.pool6 = nn.AvgPool2d(kernel_size=5, stride=5)
- self.resize_bilinear = nn.ResizeBilinear()
- def construct(self, x):
- """ppm_construct"""
- feat = x
- height, width = x.shape[2:]
- x1 = self.avgpool(x, (2, 3))
- x1 = self.resize_bilinear(x1, size=(height, width), align_corners=True)
- feat = feat + x1
- x2 = self.pool2(x)
- x2 = self.resize_bilinear(x2, size=(height, width), align_corners=True)
- feat = feat + x2
- x3 = self.pool3(x)
- x3 = self.resize_bilinear(x3, size=(height, width), align_corners=True)
- feat = feat + x3
- x6 = self.pool6(x)
- x6 = self.resize_bilinear(x6, size=(height, width), align_corners=True)
- feat = feat + x6
- return feat
- class _ICHead(nn.Cell):
- """Head"""
- def __init__(self, nclass, norm_layer=nn.SyncBatchNorm, **kwargs):
- super(_ICHead, self).__init__()
- self.cff_12 = CascadeFeatureFusion12(128, 64, 128, nclass, norm_layer, **kwargs)
- self.cff_24 = CascadeFeatureFusion24(2048, 512, 128, nclass, norm_layer, **kwargs)
- self.conv_cls = nn.Conv2d(128, nclass, 1, has_bias=False)
- self.outputs = list()
- self.resize_bilinear = nn.ResizeBilinear()
- def construct(self, x_sub1, x_sub2, x_sub4):
- """Head_construct"""
- outputs = self.outputs
- x_cff_24, x_24_cls = self.cff_24(x_sub4, x_sub2)
- x_cff_12, x_12_cls = self.cff_12(x_cff_24, x_sub1)
- h1, w1 = x_cff_12.shape[2:]
- up_x2 = self.resize_bilinear(x_cff_12, size=(h1 * 2, w1 * 2),
- align_corners=True)
- up_x2 = self.conv_cls(up_x2)
- h2, w2 = up_x2.shape[2:]
- up_x8 = self.resize_bilinear(up_x2, size=(h2 * 4, w2 * 4),
- align_corners=True) # scale_factor=4,
- outputs.append(up_x8)
- outputs.append(up_x2)
- outputs.append(x_12_cls)
- outputs.append(x_24_cls)
- return outputs
- class _ConvBNReLU(nn.Cell):
- """ConvBNRelu"""
- def __init__(self, in_channels, out_channels, kernel_size=3, stride=2, padding=1, dilation=1,
- groups=1, norm_layer=nn.SyncBatchNorm, bias=False, **kwargs):
- super(_ConvBNReLU, self).__init__()
- self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad_mode='pad', padding=padding,
- dilation=dilation,
- group=1, has_bias=False)
- self.bn = norm_layer(out_channels, momentum=0.1)
- self.relu = nn.ReLU()
- def construct(self, x):
- x = self.conv(x)
- x = self.bn(x)
- x = self.relu(x)
- return x
- class CascadeFeatureFusion12(nn.Cell):
- """CFF Unit"""
- def __init__(self, low_channels, high_channels, out_channels, nclass, norm_layer=nn.SyncBatchNorm, **kwargs):
- super(CascadeFeatureFusion12, self).__init__()
- self.conv_low = nn.SequentialCell(
- nn.Conv2d(low_channels, out_channels, 3, pad_mode='pad', padding=2, dilation=2, has_bias=False),
- norm_layer(out_channels, momentum=0.1)
- )
- self.conv_high = nn.SequentialCell(
- nn.Conv2d(high_channels, out_channels, kernel_size=1, has_bias=False),
- norm_layer(out_channels, momentum=0.1)
- )
- self.conv_low_cls = nn.Conv2d(in_channels=out_channels, out_channels=nclass, kernel_size=1, has_bias=False)
- self.resize_bilinear = nn.ResizeBilinear()
- self.scalar_cast = ops.ScalarCast()
- self.relu = ms.nn.ReLU()
- def construct(self, x_low, x_high):
- """cff_construct"""
- h, w = x_high.shape[2:]
- x_low = self.resize_bilinear(x_low, size=(h, w), align_corners=True)
- x_low = self.conv_low(x_low)
- x_high = self.conv_high(x_high)
- x = x_low + x_high
- x = self.relu(x)
- x_low_cls = self.conv_low_cls(x_low)
- return x, x_low_cls
- class CascadeFeatureFusion24(nn.Cell):
- """CFF Unit"""
- def __init__(self, low_channels, high_channels, out_channels, nclass, norm_layer=nn.SyncBatchNorm, **kwargs):
- super(CascadeFeatureFusion24, self).__init__()
- self.conv_low = nn.SequentialCell(
- nn.Conv2d(low_channels, out_channels, 3, pad_mode='pad', padding=2, dilation=2, has_bias=False),
- norm_layer(out_channels, momentum=0.1)
- )
- self.conv_high = nn.SequentialCell(
- nn.Conv2d(high_channels, out_channels, kernel_size=1, has_bias=False),
- norm_layer(out_channels, momentum=0.1)
- )
- self.conv_low_cls = nn.Conv2d(in_channels=out_channels, out_channels=nclass, kernel_size=1, has_bias=False)
- self.resize_bilinear = nn.ResizeBilinear()
- self.relu = ms.nn.ReLU()
- def construct(self, x_low, x_high):
- """ccf_construct"""
- h, w = x_high.shape[2:]
- x_low = self.resize_bilinear(x_low, size=(h, w), align_corners=True)
- x_low = self.conv_low(x_low)
- x_high = self.conv_high(x_high)
- x = x_low + x_high
- x = self.relu(x)
- x_low_cls = self.conv_low_cls(x_low)
- return x, x_low_cls
- class SegBaseModel(nn.Cell):
- """Base Model for Semantic Segmentation"""
- def __init__(self, nclass=19, backbone='resnet50', root="", istraining=False):
- super(SegBaseModel, self).__init__()
- self.nclass = nclass
- if backbone == 'resnet50':
- self.pretrained = get_resnet50v1b(ckpt_root=root, istraining=istraining)
- def construct(self, x):
- """forwarding pre-trained network"""
- x = self.pretrained.conv1(x)
- x = self.pretrained.bn1(x)
- x = self.pretrained.relu(x)
- x = self.pretrained.maxpool(x)
- c1 = self.pretrained.layer1(x)
- c2 = self.pretrained.layer2(c1)
- c3 = self.pretrained.layer3(c2)
- c4 = self.pretrained.layer4(c3)
- return c1, c2, c3, c4
复制代码
- def poly_lr(base_lr, decay_steps, total_steps, end_lr=0.0001, power=0.9):
- for i in range(total_steps):
- step_ = min(i, decay_steps)
- yield (base_lr - end_lr) * ((1.0 - step_ / decay_steps) ** power) + end_lr
复制代码
- optimizer_init_lr=0.02
- optimizer_weight_decay = 0.0001
- optimizer_momentum= 0.9
- train_save_checkpoint_epochs=5
- train_keep_checkpoint_max = 10
- rank_id = 0
- device_id = 0
- device_num =1
- # from src.lr_scheduler import poly_lr
- import os
- import sys
- import logging
- import argparse
- # import yaml
- import mindspore.nn as nn
- from mindspore import Model
- from mindspore import context
- from mindspore import set_seed
- from mindspore.context import ParallelMode
- from mindspore.communication import init
- from mindspore.train.callback import CheckpointConfig
- from mindspore.train.callback import ModelCheckpoint
- from mindspore.train.callback import LossMonitor
- from mindspore.train.callback import TimeMonitor
- iters_per_epoch = train_data_size
- total_train_steps = iters_per_epoch * epoch
- base_lr = optimizer_init_lr
- iter_lr = poly_lr(base_lr, total_train_steps, total_train_steps, end_lr=0.0, power=0.9)
- network = ICNetdc(pretrained_path=train_pretrained_model_path, norm_layer=nn.BatchNorm2d)
- optim = nn.SGD(params=network.trainable_params(), learning_rate=iter_lr, momentum=optimizer_momentum,
- weight_decay=optimizer_weight_decay)
- model = Model(network, optimizer=optim, metrics=None)
- config_ck_train = CheckpointConfig(save_checkpoint_steps=iters_per_epoch * train_save_checkpoint_epochs,
- keep_checkpoint_max=train_keep_checkpoint_max)
- ckpoint_cb_train = ModelCheckpoint(prefix='ICNet', directory=project_path + 'ckpt' + str(device_id),
- config=config_ck_train)
- time_cb_train = TimeMonitor(data_size=dataset.get_dataset_size())
- loss_cb_train = LossMonitor()
- print("train begins------------------------------")
- model.train(epoch=epoch, train_dataset=dataset, callbacks=[ckpoint_cb_train, loss_cb_train, time_cb_train],
- dataset_sink_mode=True)
复制代码 3.模子验证
- import os
- import time
- import sys
- import argparse
- import yaml
- import numpy as np
- from PIL import Image
- import mindspore.ops as ops
- from mindspore import load_param_into_net
- from mindspore import load_checkpoint
- from mindspore import Tensor
- import mindspore.dataset.vision as vision
- from src.models import ICNet
- from src.metric import SegmentationMetric
- from src.logger import SetupLogger
- class Evaluator:
- """evaluate"""
- def __init__(self):
- # self.cfg = config
- # get valid dataset images and targets
- self.image_paths, self.mask_paths = _get_city_pairs(dataset_path, "val")
- # self.image_paths,
- # self.mask_paths
-
- # create network
- # self.model = ICNetdc(nclass=19, pretrained_path=train_pretrained_model_path, norm_layer=nn.BatchNorm2d,istraining=False)
- self.model = ICNet(nclass=19, pretrained_path=train_pretrained_model_path, istraining=False)
- # load ckpt
- checkpoint_path="/home/ma-user/work/ICNet/ckpt0/ICNet-10_1.ckpt"
- ckpt_file_name = checkpoint_path
- param_dict = load_checkpoint(ckpt_file_name)
- load_param_into_net(self.model, param_dict)
- # evaluation metrics
- self.metric = SegmentationMetric(19)
- def eval(self):
- """evaluate"""
- self.metric.reset()
- model = self.model
- model = model.set_train(False)
- logger.info("Start validation, Total sample: {:d}".format(len(self.image_paths)))
- list_time = []
- for i in range(len(self.image_paths)):
- image = Image.open(self.image_paths[i]).convert('RGB') # image shape: (W,H,3)
- mask = Image.open(self.mask_paths[i]) # mask shape: (W,H)
- image = self._img_transform(image) # image shape: (3,H,W) [0,1]
- mask = self._mask_transform(mask) # mask shape: (H,w)
- image = Tensor(image)
- expand_dims = ops.ExpandDims()
- image = expand_dims(image, 0)
- start_time = time.time()
- output = model(image)
- end_time = time.time()
- step_time = end_time - start_time
- output = output.asnumpy()
- mask = np.expand_dims(mask.asnumpy(), axis=0)
- self.metric.update(output, mask)
- list_time.append(step_time)
- mIoU, pixAcc = self.metric.get()
- average_time = sum(list_time) / len(list_time)
- print("avgmiou", mIoU)
- print("avg_pixacc", pixAcc)
- print("avgtime", average_time)
- def _img_transform(self, image):
- """img_transform"""
- to_tensor = vision.ToTensor()
- normalize = vision.Normalize([.485, .456, .406], [.229, .224, .225], is_hwc=False)
- image = to_tensor(image)
- image = normalize(image)
- return image
- def _mask_transform(self, mask):
- mask = self._class_to_index(np.array(mask).astype('int32'))
- return Tensor(np.array(mask).astype('int32')) # torch.LongTensor
- def _class_to_index(self, mask):
- """assert the value"""
- values = np.unique(mask)
- self._key = np.array([-1, -1, -1, -1, -1, -1,
- -1, -1, 0, 1, -1, -1,
- 2, 3, 4, -1, -1, -1,
- 5, -1, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15,
- -1, -1, 16, 17, 18])
- self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32')
- for value in values:
- assert value in self._mapping
- # Get the index of each pixel value in the mask corresponding to _mapping
- index = np.digitize(mask.ravel(), self._mapping, right=True)
- # According to the above index index, according to _key, the corresponding mask image is obtained
- return self._key[index].reshape(mask.shape)
- def _get_city_pairs(folder, split='train'):
- """get dataset img_mask_path_pairs"""
- def get_path_pairs(image_folder, mask_folder):
- img_paths = []
- mask_paths = []
- for root, _, files in os.walk(image_folder):
- for filename in files:
- if filename.endswith('.png'):
- imgpath = os.path.join(root, filename)
- foldername = os.path.basename(os.path.dirname(imgpath))
- maskname = filename.replace('leftImg8bit', 'gtFine_labelIds')
- maskpath = os.path.join(mask_folder, foldername, maskname)
- if os.path.isfile(imgpath) and os.path.isfile(maskpath):
- img_paths.append(imgpath)
- mask_paths.append(maskpath)
- else:
- print('cannot find the mask or image:', imgpath, maskpath)
- print('Found {} images in the folder {}'.format(len(img_paths), image_folder))
- return img_paths, mask_paths
- if split in ('train', 'val', 'test'):
- # "./Cityscapes/leftImg8bit/train" or "./Cityscapes/leftImg8bit/val"
- img_folder = os.path.join(folder, 'leftImg8bit/' + split)
- # "./Cityscapes/gtFine/train" or "./Cityscapes/gtFine/val"
- mask_folder = os.path.join(folder, 'gtFine/' + split)
- img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
- return img_paths, mask_paths
复制代码- train_ckpt_dir="./ckpt/"
- model_name="icnet"
- model_backbone="resnet50v1"
- checkpoint_path="./ckpt0/ICNet-10_1.ckpt"
- logger = SetupLogger(name="semantic_segmentation",
- save_dir=train_ckpt_dir,
- distributed_rank=0,
- filename='{}_{}_evaluate_log.txt'.format(model_name,model_backbone))
- evaluator = Evaluator()
- evaluator.eval()
复制代码 最后根据路径的图片获取语义分割文本
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |