3 lat temu · ebceda8419
--- a/docs/apis/train.md
+++ b/docs/apis/train.md
@@ -34,7 +34,7 @@
 
															 ### 初始化`BaseSegmenter`子类对象
														
 
															-- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数，分别表示输入通道数、输出类别数以及是否使用预置的混合损失。部分模型如`FarSeg`暂不支持对`in_channels`参数的设置。
														
 
															+- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数，分别表示输入通道数、输出类别数以及是否使用预置的混合损失。
														
 
															 - `use_mixed_loss`参将在未来被弃用，因此不建议使用。
														
 
															 - 可通过`losses`参数指定模型训练时使用的损失函数。`losses`需为一个字典，其中`'types'`键和`'coef'`键对应的值为两个等长的列表，分别表示损失函数对象（一个可调用对象）和损失函数的权重。例如：`losses={'types': [LossType1(), LossType2()], 'coef': [1.0, 0.5]}`在训练过程中将等价于计算如下损失函数：`1.0*LossType1()(logits, labels)+0.5*LossType2()(logits, labels)`，其中`logits`和`labels`分别是模型输出和真值标签。
														
 
															 - 不同的子类支持与模型相关的输入参数，详情请参考[模型定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/rs_models/seg)和[训练器定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmentor.py)。
														
--- a/docs/intro/model_zoo.md
+++ b/docs/intro/model_zoo.md
@@ -20,18 +20,21 @@ PaddleRS目前已支持的全部模型如下（标注\*的为遥感专用模型
 
															 | 变化检测 | \*FCCDN | 是 |
														
 
															 | 变化检测 | \*SNUNet | 是 |
														
 
															 | 变化检测 | \*STANet | 是 |
														
 
															-| 场景分类 | CondenseNetV2 | 是 |
														
 
															-| 场景分类 | HRNet | 是 |
														
 
															-| 场景分类 | MobileNetV3 | 是 |
														
 
															-| 场景分类 | ResNet50-vd | 是 |
														
 
															+| 场景分类 | CondenseNet V2 | 是 |
														
 
															+| 场景分类 | HRNet | 否 |
														
 
															+| 场景分类 | MobileNetV3 | 否 |
														
 
															+| 场景分类 | ResNet50-vd | 否 |
														
 
															 | 图像复原 | DRN | 否 |
														
 
															-| 图像复原 | ESRGAN | 否 |
														
 
															+| 图像复原 | ESRGAN | 是 |
														
 
															 | 图像复原 | LESRCNN | 否 |
														
 
															-| 目标检测 | Faster R-CNN | 是 |
														
 
															-| 目标检测 | PP-YOLO | 是 |
														
 
															-| 目标检测 | PP-YOLO Tiny | 是 |
														
 
															-| 目标检测 | PP-YOLOv2 | 是 |
														
 
															-| 目标检测 | YOLOv3 | 是 |
														
 
															+| 目标检测 | Faster R-CNN | 否 |
														
 
															+| 目标检测 | PP-YOLO | 否 |
														
 
															+| 目标检测 | PP-YOLO Tiny | 否 |
														
 
															+| 目标检测 | PP-YOLOv2 | 否 |
														
 
															+| 目标检测 | YOLOv3 | 否 |
														
 
															+| 图像分割 | BiSeNet V2 | 是 |
														
 
															 | 图像分割 | DeepLab V3+ | 是 |
														
 
															-| 图像分割 | \*FarSeg | 否 |
														
 
															+| 图像分割 | \*FarSeg | 是 |
														
 
															+| 图像分割 | Fast-SCNN | 是 |
														
 
															+| 图像分割 | HRNet | 是 |
														
 
															 | 图像分割 | UNet | 是 |
														
--- a/examples/README.md
+++ b/examples/README.md
@@ -53,3 +53,4 @@ PaddleRS提供从科学研究到产业应用的丰富示例，希望帮助遥感
 
															 |[【官方】第十一届 “中国软件杯”百度遥感赛项：目标检测功能](https://aistudio.baidu.com/aistudio/projectdetail/3792609)|古代飞|竞赛打榜|目标检测，比赛基线|
														
 
															 |[【十一届软件杯】遥感解译赛道：变化检测任务——预赛第四名方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4116895)|lzzzzzm|竞赛打榜|变化检测，高分方案|
														
 
															 |[【方案分享】第十一届 “中国软件杯”大学生软件设计大赛遥感解译赛道 比赛方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4146154)|trainer|竞赛打榜|变化检测，高分方案|
														
 
															+|[遥感变化检测助力信贷场景下工程进度管控](https://aistudio.baidu.com/aistudio/projectdetail/4543160)|古代飞|产业范例|变化检测，金融风控|
														
--- a/examples/rs_research/config_utils.py
+++ b/examples/rs_research/config_utils.py
@@ -133,6 +133,7 @@ def parse_args(*args, **kwargs):
 
															     # Global settings
														
 
															     parser.add_argument('cmd', choices=['train', 'eval'])
														
 
															     parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
														
 
															+    parser.add_argument('--seed', type=int, default=None)
														
 
															     # Data
														
 
															     parser.add_argument('--datasets', type=dict, default={})
														
--- a/examples/rs_research/run_task.py
+++ b/examples/rs_research/run_task.py
@@ -15,7 +15,9 @@
 
															 # limitations under the License.
														
 
															 import os
														
 
															+import random
														
 
															+import numpy as np
														
 
															 # Import cv2 and sklearn before paddlers to solve the
														
 
															 # "ImportError: dlopen: cannot load any more object with static TLS" issue.
														
 
															 import cv2
														
@@ -62,6 +64,11 @@ if __name__ == '__main__':
 
															     cfg = parse_args()
														
 
															     print(format_cfg(cfg))
														
 
															+    if cfg['seed'] is not None:
														
 
															+        random.seed(cfg['seed'])
														
 
															+        np.random.seed(cfg['seed'])
														
 
															+        paddle.seed(cfg['seed'])
														
 
															+
														
 
															     # Automatically download data
														
 
															     if cfg['download_on']:
														
 
															         paddlers.utils.download_and_decompress(
														
--- a/paddlers/deploy/predictor.py
+++ b/paddlers/deploy/predictor.py
@@ -103,11 +103,11 @@ class Predictor(object):
 
															             config.enable_use_gpu(200, gpu_id)
														
 
															             config.switch_ir_optim(True)
														
 
															             if use_trt:
														
 
															-                if self._model.model_type == 'segmenter':
														
 
															+                if self.model_type == 'segmenter':
														
 
															                     logging.warning(
														
 
															                         "Semantic segmentation models do not support TensorRT acceleration, "
														
 
															                         "TensorRT is forcibly disabled.")
														
 
															-                elif self._model.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
														
 
															+                elif self.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
														
 
															                     logging.warning(
														
 
															                         "RCNN models do not support TensorRT acceleration, "
														
 
															                         "TensorRT is forcibly disabled.")
														
@@ -150,30 +150,29 @@ class Predictor(object):
 
															     def preprocess(self, images, transforms):
														
 
															         preprocessed_samples = self._model.preprocess(
														
 
															             images, transforms, to_tensor=False)
														
 
															-        if self._model.model_type == 'classifier':
														
 
															+        if self.model_type == 'classifier':
														
 
															             preprocessed_samples = {'image': preprocessed_samples[0]}
														
 
															-        elif self._model.model_type == 'segmenter':
														
 
															+        elif self.model_type == 'segmenter':
														
 
															             preprocessed_samples = {
														
 
															                 'image': preprocessed_samples[0],
														
 
															                 'ori_shape': preprocessed_samples[1]
														
 
															             }
														
 
															-        elif self._model.model_type == 'detector':
														
 
															+        elif self.model_type == 'detector':
														
 
															             pass
														
 
															-        elif self._model.model_type == 'change_detector':
														
 
															+        elif self.model_type == 'change_detector':
														
 
															             preprocessed_samples = {
														
 
															                 'image': preprocessed_samples[0],
														
 
															                 'image2': preprocessed_samples[1],
														
 
															                 'ori_shape': preprocessed_samples[2]
														
 
															             }
														
 
															-        elif self._model.model_type == 'restorer':
														
 
															+        elif self.model_type == 'restorer':
														
 
															             preprocessed_samples = {
														
 
															                 'image': preprocessed_samples[0],
														
 
															                 'tar_shape': preprocessed_samples[1]
														
 
															             }
														
 
															         else:
														
 
															             logging.error(
														
 
															-                "Invalid model type {}".format(self._model.model_type),
														
 
															-                exit=True)
														
 
															+                "Invalid model type {}".format(self.model_type), exit=True)
														
 
															         return preprocessed_samples
														
 
															     def postprocess(self,
														
@@ -182,7 +181,7 @@ class Predictor(object):
 
															                     ori_shape=None,
														
 
															                     tar_shape=None,
														
 
															                     transforms=None):
														
 
															-        if self._model.model_type == 'classifier':
														
 
															+        if self.model_type == 'classifier':
														
 
															             true_topk = min(self._model.num_classes, topk)
														
 
															             if self._model.postprocess is None:
														
 
															                 self._model.build_postprocess_from_labels(topk)
														
@@ -198,7 +197,7 @@ class Predictor(object):
 
															                 'scores_map': s,
														
 
															                 'label_names_map': n,
														
 
															             } for l, s, n in zip(class_ids, scores, label_names)]
														
 
															-        elif self._model.model_type in ('segmenter', 'change_detector'):
														
 
															+        elif self.model_type in ('segmenter', 'change_detector'):
														
 
															             label_map, score_map = self._model.postprocess(
														
 
															                 net_outputs,
														
 
															                 batch_origin_shape=ori_shape,
														
@@ -207,13 +206,13 @@ class Predictor(object):
 
															                 'label_map': l,
														
 
															                 'score_map': s
														
 
															             } for l, s in zip(label_map, score_map)]
														
 
															-        elif self._model.model_type == 'detector':
														
 
															+        elif self.model_type == 'detector':
														
 
															             net_outputs = {
														
 
															                 k: v
														
 
															                 for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
														
 
															             }
														
 
															             preds = self._model.postprocess(net_outputs)
														
 
															-        elif self._model.model_type == 'restorer':
														
 
															+        elif self.model_type == 'restorer':
														
 
															             res_maps = self._model.postprocess(
														
 
															                 net_outputs[0],
														
 
															                 batch_tar_shape=tar_shape,
														
@@ -221,8 +220,7 @@ class Predictor(object):
 
															             preds = [{'res_map': res_map} for res_map in res_maps]
														
 
															         else:
														
 
															             logging.error(
														
 
															-                "Invalid model type {}.".format(self._model.model_type),
														
 
															-                exit=True)
														
 
															+                "Invalid model type {}.".format(self.model_type), exit=True)
														
 
															         return preds
														
@@ -360,6 +358,12 @@ class Predictor(object):
 
															             batch_size (int, optional): Batch size used in inference. Defaults to 1.
														
 
															             quiet (bool, optional): If True, disable the progress bar. Defaults to False.
														
 
															         """
														
 
															+
														
 
															+        if self.model_type not in ('segmenter', 'change_detector'):
														
 
															+            raise RuntimeError(
														
 
															+                "Model type is {}, which does not support inference with sliding windows.".
														
 
															+                format(self.model_type))
														
 
															+
														
 
															         slider_predict(
														
 
															             partial(
														
 
															                 self.predict, quiet=True),
														
@@ -375,3 +379,7 @@ class Predictor(object):
 
															     def batch_predict(self, image_list, **params):
														
 
															         return self.predict(img_file=image_list, **params)
														
 
															+
														
 
															+    @property
														
 
															+    def model_type(self):
														
 
															+        return self._model.model_type
														
--- a/paddlers/models/hash.txt
+++ b/paddlers/models/hash.txt
@@ -0,0 +1 @@
 
															+ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef
														
--- a/paddlers/models/ppseg/__init__.py
+++ b/paddlers/models/ppseg/__init__.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
--- a/paddlers/models/ppseg/core/infer.py
+++ b/paddlers/models/ppseg/core/infer.py
@@ -21,88 +21,16 @@ import paddle
 
															 import paddle.nn.functional as F
														
 
															-def get_reverse_list(ori_shape, transforms):
														
 
															-    """
														
 
															-    get reverse list of transform.
														
 
															-
														
 
															-    Args:
														
 
															-        ori_shape (list): Origin shape of image.
														
 
															-        transforms (list): List of transform.
														
 
															-
														
 
															-    Returns:
														
 
															-        list: List of tuple, there are two format:
														
 
															-            ('resize', (h, w)) The image shape before resize,
														
 
															-            ('padding', (h, w)) The image shape before padding.
														
 
															-    """
														
 
															-    reverse_list = []
														
 
															-    h, w = ori_shape[0], ori_shape[1]
														
 
															-    for op in transforms:
														
 
															-        if op.__class__.__name__ in ['Resize']:
														
 
															-            reverse_list.append(('resize', (h, w)))
														
 
															-            h, w = op.target_size[0], op.target_size[1]
														
 
															-        if op.__class__.__name__ in ['ResizeByLong']:
														
 
															-            reverse_list.append(('resize', (h, w)))
														
 
															-            long_edge = max(h, w)
														
 
															-            short_edge = min(h, w)
														
 
															-            short_edge = int(round(short_edge * op.long_size / long_edge))
														
 
															-            long_edge = op.long_size
														
 
															-            if h > w:
														
 
															-                h = long_edge
														
 
															-                w = short_edge
														
 
															-            else:
														
 
															-                w = long_edge
														
 
															-                h = short_edge
														
 
															-        if op.__class__.__name__ in ['ResizeByShort']:
														
 
															-            reverse_list.append(('resize', (h, w)))
														
 
															-            long_edge = max(h, w)
														
 
															-            short_edge = min(h, w)
														
 
															-            long_edge = int(round(long_edge * op.short_size / short_edge))
														
 
															-            short_edge = op.short_size
														
 
															-            if h > w:
														
 
															-                h = long_edge
														
 
															-                w = short_edge
														
 
															-            else:
														
 
															-                w = long_edge
														
 
															-                h = short_edge
														
 
															-        if op.__class__.__name__ in ['Pad']:
														
 
															-            reverse_list.append(('padding', (h, w)))
														
 
															-            w, h = op.target_size[0], op.target_size[1]
														
 
															-        if op.__class__.__name__ in ['PadByAspectRatio']:
														
 
															-            reverse_list.append(('padding', (h, w)))
														
 
															-            ratio = w / h
														
 
															-            if ratio == op.aspect_ratio:
														
 
															-                pass
														
 
															-            elif ratio > op.aspect_ratio:
														
 
															-                h = int(w / op.aspect_ratio)
														
 
															-            else:
														
 
															-                w = int(h * op.aspect_ratio)
														
 
															-        if op.__class__.__name__ in ['LimitLong']:
														
 
															-            long_edge = max(h, w)
														
 
															-            short_edge = min(h, w)
														
 
															-            if ((op.max_long is not None) and (long_edge > op.max_long)):
														
 
															-                reverse_list.append(('resize', (h, w)))
														
 
															-                long_edge = op.max_long
														
 
															-                short_edge = int(round(short_edge * op.max_long / long_edge))
														
 
															-            elif ((op.min_long is not None) and (long_edge < op.min_long)):
														
 
															-                reverse_list.append(('resize', (h, w)))
														
 
															-                long_edge = op.min_long
														
 
															-                short_edge = int(round(short_edge * op.min_long / long_edge))
														
 
															-            if h > w:
														
 
															-                h = long_edge
														
 
															-                w = short_edge
														
 
															-            else:
														
 
															-                w = long_edge
														
 
															-                h = short_edge
														
 
															-    return reverse_list
														
 
															-
														
 
															-
														
 
															-def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
														
 
															+def reverse_transform(pred, trans_info, mode='nearest'):
														
 
															     """recover pred to origin shape"""
														
 
															-    reverse_list = get_reverse_list(ori_shape, transforms)
														
 
															     intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
														
 
															     dtype = pred.dtype
														
 
															-    for item in reverse_list[::-1]:
														
 
															-        if item[0] == 'resize':
														
 
															+    for item in trans_info[::-1]:
														
 
															+        if isinstance(item[0], list):
														
 
															+            trans_mode = item[0][0]
														
 
															+        else:
														
 
															+            trans_mode = item[0]
														
 
															+        if trans_mode == 'resize':
														
 
															             h, w = item[1][0], item[1][1]
														
 
															             if paddle.get_device() == 'cpu' and dtype in intTypeList:
														
 
															                 pred = paddle.cast(pred, 'float32')
														
@@ -110,7 +38,7 @@ def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
 
															                 pred = paddle.cast(pred, dtype)
														
 
															             else:
														
 
															                 pred = F.interpolate(pred, (h, w), mode=mode)
														
 
															-        elif item[0] == 'padding':
														
 
															+        elif trans_mode == 'padding':
														
 
															             h, w = item[1][0], item[1][1]
														
 
															             pred = pred[:, :, 0:h, 0:w]
														
 
															         else:
														
@@ -205,8 +133,7 @@ def slide_inference(model, im, crop_size, stride):
 
															 def inference(model,
														
 
															               im,
														
 
															-              ori_shape=None,
														
 
															-              transforms=None,
														
 
															+              trans_info=None,
														
 
															               is_slide=False,
														
 
															               stride=None,
														
 
															               crop_size=None):
														
@@ -216,8 +143,7 @@ def inference(model,
 
															     Args:
														
 
															         model (paddle.nn.Layer): model to get logits of image.
														
 
															         im (Tensor): the input image.
														
 
															-        ori_shape (list): Origin shape of image.
														
 
															-        transforms (list): Transforms for image.
														
 
															+        trans_info (list): Image shape informating changed process. Default: None.
														
 
															         is_slide (bool): Whether to infer by sliding window. Default: False.
														
 
															         crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
														
 
															         stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
														
@@ -239,8 +165,8 @@ def inference(model,
 
															         logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
														
 
															     if hasattr(model, 'data_format') and model.data_format == 'NHWC':
														
 
															         logit = logit.transpose((0, 3, 1, 2))
														
 
															-    if ori_shape is not None:
														
 
															-        logit = reverse_transform(logit, ori_shape, transforms, mode='bilinear')
														
 
															+    if trans_info is not None:
														
 
															+        logit = reverse_transform(logit, trans_info, mode='bilinear')
														
 
															         pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
														
 
															         return pred, logit
														
 
															     else:
														
@@ -249,8 +175,7 @@ def inference(model,
 
															 def aug_inference(model,
														
 
															                   im,
														
 
															-                  ori_shape,
														
 
															-                  transforms,
														
 
															+                  trans_info,
														
 
															                   scales=1.0,
														
 
															                   flip_horizontal=False,
														
 
															                   flip_vertical=False,
														
@@ -263,8 +188,7 @@ def aug_inference(model,
 
															     Args:
														
 
															         model (paddle.nn.Layer): model to get logits of image.
														
 
															         im (Tensor): the input image.
														
 
															-        ori_shape (list): Origin shape of image.
														
 
															-        transforms (list): Transforms for image.
														
 
															+        trans_info (list): Transforms for image.
														
 
															         scales (float|tuple|list):  Scales for resize. Default: 1.
														
 
															         flip_horizontal (bool): Whether to flip horizontally. Default: False.
														
 
															         flip_vertical (bool): Whether to flip vertically. Default: False.
														
@@ -302,8 +226,7 @@ def aug_inference(model,
 
															             logit = F.softmax(logit, axis=1)
														
 
															             final_logit = final_logit + logit
														
 
															-    final_logit = reverse_transform(
														
 
															-        final_logit, ori_shape, transforms, mode='bilinear')
														
 
															+    final_logit = reverse_transform(final_logit, trans_info, mode='bilinear')
														
 
															     pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
														
 
															     return pred, final_logit
														
--- a/paddlers/models/ppseg/core/predict.py
+++ b/paddlers/models/ppseg/core/predict.py
@@ -36,6 +36,15 @@ def partition_list(arr, m):
 
															     return [arr[i:i + n] for i in range(0, len(arr), n)]
														
 
															+def preprocess(im_path, transforms):
														
 
															+    data = {}
														
 
															+    data['img'] = im_path
														
 
															+    data = transforms(data)
														
 
															+    data['img'] = data['img'][np.newaxis, ...]
														
 
															+    data['img'] = paddle.to_tensor(data['img'])
														
 
															+    return data
														
 
															+
														
 
															+
														
 
															 def predict(model,
														
 
															             model_path,
														
 
															             transforms,
														
@@ -89,18 +98,13 @@ def predict(model,
 
															     color_map = visualize.get_color_map_list(256, custom_color=custom_color)
														
 
															     with paddle.no_grad():
														
 
															         for i, im_path in enumerate(img_lists[local_rank]):
														
 
															-            im = cv2.imread(im_path)
														
 
															-            ori_shape = im.shape[:2]
														
 
															-            im, _ = transforms(im)
														
 
															-            im = im[np.newaxis, ...]
														
 
															-            im = paddle.to_tensor(im)
														
 
															+            data = preprocess(im_path, transforms)
														
 
															             if aug_pred:
														
 
															                 pred, _ = infer.aug_inference(
														
 
															                     model,
														
 
															-                    im,
														
 
															-                    ori_shape=ori_shape,
														
 
															-                    transforms=transforms.transforms,
														
 
															+                    data['img'],
														
 
															+                    trans_info=data['trans_info'],
														
 
															                     scales=scales,
														
 
															                     flip_horizontal=flip_horizontal,
														
 
															                     flip_vertical=flip_vertical,
														
@@ -110,9 +114,8 @@ def predict(model,
 
															             else:
														
 
															                 pred, _ = infer.inference(
														
 
															                     model,
														
 
															-                    im,
														
 
															-                    ori_shape=ori_shape,
														
 
															-                    transforms=transforms.transforms,
														
 
															+                    data['img'],
														
 
															+                    trans_info=data['trans_info'],
														
 
															                     is_slide=is_slide,
														
 
															                     stride=stride,
														
 
															                     crop_size=crop_size)
														
@@ -141,9 +144,4 @@ def predict(model,
 
															             mkdir(pred_saved_path)
														
 
															             pred_mask.save(pred_saved_path)
														
 
															-            # pred_im = utils.visualize(im_path, pred, weight=0.0)
														
 
															-            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
														
 
															-            # mkdir(pred_saved_path)
														
 
															-            # cv2.imwrite(pred_saved_path, pred_im)
														
 
															-
														
 
															             progbar_pred.update(i + 1)
														
--- a/paddlers/models/ppseg/core/train.py
+++ b/paddlers/models/ppseg/core/train.py
@@ -35,17 +35,15 @@ def check_logits_losses(logits_list, losses):
 
															             .format(len_logits, len_losses))
														
 
															-def loss_computation(logits_list, labels, losses, edges=None):
														
 
															+def loss_computation(logits_list, labels, edges, losses):
														
 
															     check_logits_losses(logits_list, losses)
														
 
															     loss_list = []
														
 
															     for i in range(len(logits_list)):
														
 
															         logits = logits_list[i]
														
 
															         loss_i = losses['types'][i]
														
 
															         coef_i = losses['coef'][i]
														
 
															-
														
 
															-        if loss_i.__class__.__name__ in ('BCELoss', 'FocalLoss'
														
 
															-                                         ) and loss_i.edge_label:
														
 
															-            # If use edges as labels According to loss type.
														
 
															+        if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
														
 
															+            # Use edges as labels According to loss type.
														
 
															             loss_list.append(coef_i * loss_i(logits, edges))
														
 
															         elif loss_i.__class__.__name__ == 'MixedLoss':
														
 
															             mixed_loss_list = loss_i(logits, labels)
														
@@ -75,13 +73,14 @@ def train(model,
 
															           keep_checkpoint_max=5,
														
 
															           test_config=None,
														
 
															           precision='fp32',
														
 
															+          amp_level='O1',
														
 
															           profiler_options=None,
														
 
															           to_static_training=False):
														
 
															     """
														
 
															     Launch training.
														
 
															     Args:
														
 
															-        model（nn.Layer): A sementic segmentation model.
														
 
															+        model（nn.Layer): A semantic segmentation model.
														
 
															         train_dataset (paddle.io.Dataset): Used to read and process training datasets.
														
 
															         val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
														
 
															         optimizer (paddle.optimizer.Optimizer): The optimizer.
														
@@ -98,6 +97,9 @@ def train(model,
 
															         keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
														
 
															         test_config(dict, optional): Evaluation config.
														
 
															         precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
														
 
															+        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, 
														
 
															+            the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators 
														
 
															+            parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
														
 
															         profiler_options (str, optional): The option of train profiler.
														
 
															         to_static_training (bool, optional): Whether to use @to_static for training.
														
 
															     """
														
@@ -112,7 +114,18 @@ def train(model,
 
															     if not os.path.isdir(save_dir):
														
 
															         if os.path.exists(save_dir):
														
 
															             os.remove(save_dir)
														
 
															-        os.makedirs(save_dir)
														
 
															+        os.makedirs(save_dir, exist_ok=True)
														
 
															+
														
 
															+    # use amp
														
 
															+    if precision == 'fp16':
														
 
															+        logger.info('use AMP to train. AMP level = {}'.format(amp_level))
														
 
															+        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
														
 
															+        if amp_level == 'O2':
														
 
															+            model, optimizer = paddle.amp.decorate(
														
 
															+                models=model,
														
 
															+                optimizers=optimizer,
														
 
															+                level='O2',
														
 
															+                save_dtype='float32')
														
 
															     if nranks > 1:
														
 
															         paddle.distributed.fleet.init(is_collective=True)
														
@@ -130,18 +143,13 @@ def train(model,
 
															         return_list=True,
														
 
															         worker_init_fn=worker_init_fn, )
														
 
															-    # use amp
														
 
															-    if precision == 'fp16':
														
 
															-        logger.info('use amp to train')
														
 
															-        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
														
 
															-
														
 
															     if use_vdl:
														
 
															         from visualdl import LogWriter
														
 
															         log_writer = LogWriter(save_dir)
														
 
															     if to_static_training:
														
 
															         model = paddle.jit.to_static(model)
														
 
															-        logger.info("Successfully to apply @to_static")
														
 
															+        logger.info("Successfully applied @to_static")
														
 
															     avg_loss = 0.0
														
 
															     avg_loss_list = []
														
@@ -164,30 +172,29 @@ def train(model,
 
															                 else:
														
 
															                     break
														
 
															             reader_cost_averager.record(time.time() - batch_start)
														
 
															-            images = data[0]
														
 
															-            labels = data[1].astype('int64')
														
 
															+            images = data['img']
														
 
															+            labels = data['label'].astype('int64')
														
 
															             edges = None
														
 
															-            if len(data) == 3:
														
 
															-                edges = data[2].astype('int64')
														
 
															+            if 'edge' in data.keys():
														
 
															+                edges = data['edge'].astype('int64')
														
 
															             if hasattr(model, 'data_format') and model.data_format == 'NHWC':
														
 
															                 images = images.transpose((0, 2, 3, 1))
														
 
															             if precision == 'fp16':
														
 
															                 with paddle.amp.auto_cast(
														
 
															+                        level=amp_level,
														
 
															                         enable=True,
														
 
															                         custom_white_list={
														
 
															                             "elementwise_add", "batch_norm", "sync_batch_norm"
														
 
															                         },
														
 
															                         custom_black_list={'bilinear_interp_v2'}):
														
 
															-                    if nranks > 1:
														
 
															-                        logits_list = ddp_model(images)
														
 
															-                    else:
														
 
															-                        logits_list = model(images)
														
 
															+                    logits_list = ddp_model(images) if nranks > 1 else model(
														
 
															+                        images)
														
 
															                     loss_list = loss_computation(
														
 
															                         logits_list=logits_list,
														
 
															                         labels=labels,
														
 
															-                        losses=losses,
														
 
															-                        edges=edges)
														
 
															+                        edges=edges,
														
 
															+                        losses=losses)
														
 
															                     loss = sum(loss_list)
														
 
															                 scaled = scaler.scale(loss)  # scale the loss
														
@@ -197,15 +204,12 @@ def train(model,
 
															                 else:
														
 
															                     scaler.minimize(optimizer, scaled)  # update parameters
														
 
															             else:
														
 
															-                if nranks > 1:
														
 
															-                    logits_list = ddp_model(images)
														
 
															-                else:
														
 
															-                    logits_list = model(images)
														
 
															+                logits_list = ddp_model(images) if nranks > 1 else model(images)
														
 
															                 loss_list = loss_computation(
														
 
															                     logits_list=logits_list,
														
 
															                     labels=labels,
														
 
															-                    losses=losses,
														
 
															-                    edges=edges)
														
 
															+                    edges=edges,
														
 
															+                    losses=losses)
														
 
															                 loss = sum(loss_list)
														
 
															                 loss.backward()
														
 
															                 # if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
														
@@ -278,7 +282,12 @@ def train(model,
 
															                     test_config = {}
														
 
															                 mean_iou, acc, _, _, _ = evaluate(
														
 
															-                    model, val_dataset, num_workers=num_workers, **test_config)
														
 
															+                    model,
														
 
															+                    val_dataset,
														
 
															+                    num_workers=num_workers,
														
 
															+                    precision=precision,
														
 
															+                    amp_level=amp_level,
														
 
															+                    **test_config)
														
 
															                 model.train()
														
@@ -314,7 +323,7 @@ def train(model,
 
															             batch_start = time.time()
														
 
															     # Calculate flops.
														
 
															-    if local_rank == 0:
														
 
															+    if local_rank == 0 and not (precision == 'fp16' and amp_level == 'O2'):
														
 
															         _, c, h, w = images.shape
														
 
															         _ = paddle.flops(
														
 
															             model, [1, c, h, w],
														
--- a/paddlers/models/ppseg/core/val.py
+++ b/paddlers/models/ppseg/core/val.py
@@ -34,6 +34,8 @@ def evaluate(model,
 
															              is_slide=False,
														
 
															              stride=None,
														
 
															              crop_size=None,
														
 
															+             precision='fp32',
														
 
															+             amp_level='O1',
														
 
															              num_workers=0,
														
 
															              print_detail=True,
														
 
															              auc_roc=False):
														
@@ -41,7 +43,7 @@ def evaluate(model,
 
															     Launch evalution.
														
 
															     Args:
														
 
															-        model（nn.Layer): A sementic segmentation model.
														
 
															+        model（nn.Layer): A semantic segmentation model.
														
 
															         eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
														
 
															         aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
														
 
															         scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
														
@@ -52,6 +54,8 @@ def evaluate(model,
 
															             It should be provided when `is_slide` is True.
														
 
															         crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
														
 
															             It should be provided when `is_slide` is True.
														
 
															+        precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the evaluation is normal.
														
 
															+        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
														
 
															         num_workers (int, optional): Num workers for data loader. Default: 0.
														
 
															         print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
														
 
															         auc_roc(bool, optional): whether add auc_roc metric
														
@@ -93,32 +97,66 @@ def evaluate(model,
 
															     batch_cost_averager = TimeAverager()
														
 
															     batch_start = time.time()
														
 
															     with paddle.no_grad():
														
 
															-        for iter, (im, label) in enumerate(loader):
														
 
															+        for iter, data in enumerate(loader):
														
 
															             reader_cost_averager.record(time.time() - batch_start)
														
 
															-            label = label.astype('int64')
														
 
															+            label = data['label'].astype('int64')
														
 
															-            ori_shape = label.shape[-2:]
														
 
															             if aug_eval:
														
 
															-                pred, logits = infer.aug_inference(
														
 
															-                    model,
														
 
															-                    im,
														
 
															-                    ori_shape=ori_shape,
														
 
															-                    transforms=eval_dataset.transforms.transforms,
														
 
															-                    scales=scales,
														
 
															-                    flip_horizontal=flip_horizontal,
														
 
															-                    flip_vertical=flip_vertical,
														
 
															-                    is_slide=is_slide,
														
 
															-                    stride=stride,
														
 
															-                    crop_size=crop_size)
														
 
															+                if precision == 'fp16':
														
 
															+                    with paddle.amp.auto_cast(
														
 
															+                            level=amp_level,
														
 
															+                            enable=True,
														
 
															+                            custom_white_list={
														
 
															+                                "elementwise_add", "batch_norm",
														
 
															+                                "sync_batch_norm"
														
 
															+                            },
														
 
															+                            custom_black_list={'bilinear_interp_v2'}):
														
 
															+                        pred, logits = infer.aug_inference(
														
 
															+                            model,
														
 
															+                            data['img'],
														
 
															+                            trans_info=data['trans_info'],
														
 
															+                            scales=scales,
														
 
															+                            flip_horizontal=flip_horizontal,
														
 
															+                            flip_vertical=flip_vertical,
														
 
															+                            is_slide=is_slide,
														
 
															+                            stride=stride,
														
 
															+                            crop_size=crop_size)
														
 
															+                else:
														
 
															+                    pred, logits = infer.aug_inference(
														
 
															+                        model,
														
 
															+                        data['img'],
														
 
															+                        trans_info=data['trans_info'],
														
 
															+                        scales=scales,
														
 
															+                        flip_horizontal=flip_horizontal,
														
 
															+                        flip_vertical=flip_vertical,
														
 
															+                        is_slide=is_slide,
														
 
															+                        stride=stride,
														
 
															+                        crop_size=crop_size)
														
 
															             else:
														
 
															-                pred, logits = infer.inference(
														
 
															-                    model,
														
 
															-                    im,
														
 
															-                    ori_shape=ori_shape,
														
 
															-                    transforms=eval_dataset.transforms.transforms,
														
 
															-                    is_slide=is_slide,
														
 
															-                    stride=stride,
														
 
															-                    crop_size=crop_size)
														
 
															+                if precision == 'fp16':
														
 
															+                    with paddle.amp.auto_cast(
														
 
															+                            level=amp_level,
														
 
															+                            enable=True,
														
 
															+                            custom_white_list={
														
 
															+                                "elementwise_add", "batch_norm",
														
 
															+                                "sync_batch_norm"
														
 
															+                            },
														
 
															+                            custom_black_list={'bilinear_interp_v2'}):
														
 
															+                        pred, logits = infer.inference(
														
 
															+                            model,
														
 
															+                            data['img'],
														
 
															+                            trans_info=data['trans_info'],
														
 
															+                            is_slide=is_slide,
														
 
															+                            stride=stride,
														
 
															+                            crop_size=crop_size)
														
 
															+                else:
														
 
															+                    pred, logits = infer.inference(
														
 
															+                        model,
														
 
															+                        data['img'],
														
 
															+                        trans_info=data['trans_info'],
														
 
															+                        is_slide=is_slide,
														
 
															+                        stride=stride,
														
 
															+                        crop_size=crop_size)
														
 
															             intersect_area, pred_area, label_area = metrics.calculate_area(
														
 
															                 pred,
														
@@ -175,12 +213,12 @@ def evaluate(model,
 
															             batch_cost_averager.reset()
														
 
															             batch_start = time.time()
														
 
															-    class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
														
 
															-                                       label_area_all)
														
 
															-    class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
														
 
															-    kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
														
 
															-    class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all,
														
 
															-                                     label_area_all)
														
 
															+    metrics_input = (intersect_area_all, pred_area_all, label_area_all)
														
 
															+    class_iou, miou = metrics.mean_iou(*metrics_input)
														
 
															+    acc, class_precision, class_recall = metrics.class_measurement(
														
 
															+        *metrics_input)
														
 
															+    kappa = metrics.kappa(*metrics_input)
														
 
															+    class_dice, mdice = metrics.dice(*metrics_input)
														
 
															     if auc_roc:
														
 
															         auc_roc = metrics.auc_roc(
														
@@ -193,5 +231,7 @@ def evaluate(model,
 
															         infor = infor + auc_infor if auc_roc else infor
														
 
															         logger.info(infor)
														
 
															         logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
														
 
															-        logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
														
 
															-    return miou, acc, class_iou, class_acc, kappa
														
 
															+        logger.info("[EVAL] Class Precision: \n" + str(
														
 
															+            np.round(class_precision, 4)))
														
 
															+        logger.info("[EVAL] Class Recall: \n" + str(np.round(class_recall, 4)))
														
 
															+    return miou, acc, class_iou, class_precision, kappa
														
--- a/paddlers/models/ppseg/cvlibs/config.py
+++ b/paddlers/models/ppseg/cvlibs/config.py
@@ -15,9 +15,12 @@
 
															 import codecs
														
 
															 import os
														
 
															 from typing import Any, Dict, Generic
														
 
															+import warnings
														
 
															+from ast import literal_eval
														
 
															 import paddle
														
 
															 import yaml
														
 
															+import six
														
 
															 from paddlers.models.ppseg.cvlibs import manager
														
 
															 from paddlers.models.ppseg.utils import logger
														
@@ -69,7 +72,8 @@ class Config(object):
 
															                  path: str,
														
 
															                  learning_rate: float=None,
														
 
															                  batch_size: int=None,
														
 
															-                 iters: int=None):
														
 
															+                 iters: int=None,
														
 
															+                 opts: list=None):
														
 
															         if not path:
														
 
															             raise ValueError('Please specify the configuration file path.')
														
@@ -84,7 +88,18 @@ class Config(object):
 
															             raise RuntimeError('Config file should in yaml format!')
														
 
															         self.update(
														
 
															-            learning_rate=learning_rate, batch_size=batch_size, iters=iters)
														
 
															+            learning_rate=learning_rate,
														
 
															+            batch_size=batch_size,
														
 
															+            iters=iters,
														
 
															+            opts=opts)
														
 
															+
														
 
															+        model_cfg = self.dic.get('model', None)
														
 
															+        if model_cfg is None:
														
 
															+            raise RuntimeError('No model specified in the configuration file.')
														
 
															+        if (not self.train_dataset_config) and (not self.val_dataset_config):
														
 
															+            raise ValueError(
														
 
															+                'One of `train_dataset` or `val_dataset should be given, but there are none.'
														
 
															+            )
														
 
															     def _update_dic(self, dic, base_dic):
														
 
															         """
														
@@ -121,7 +136,8 @@ class Config(object):
 
															     def update(self,
														
 
															                learning_rate: float=None,
														
 
															                batch_size: int=None,
														
 
															-               iters: int=None):
														
 
															+               iters: int=None,
														
 
															+               opts: list=None):
														
 
															         '''Update config'''
														
 
															         if learning_rate:
														
 
															             if 'lr_scheduler' in self.dic:
														
@@ -135,6 +151,27 @@ class Config(object):
 
															         if iters:
														
 
															             self.dic['iters'] = iters
														
 
															+        # fix parameters by --opts of command
														
 
															+        if opts is not None:
														
 
															+            if len(opts) % 2 != 0 or len(opts) == 0:
														
 
															+                raise ValueError(
														
 
															+                    "Command line options config `--opts` format error! It should be even length like: k1 v1 k2 v2 ... Please check it: {}".
														
 
															+                    format(opts))
														
 
															+            for key, value in zip(opts[0::2], opts[1::2]):
														
 
															+                if isinstance(value, six.string_types):
														
 
															+                    try:
														
 
															+                        value = literal_eval(value)
														
 
															+                    except ValueError:
														
 
															+                        pass
														
 
															+                    except SyntaxError:
														
 
															+                        pass
														
 
															+                key_list = key.split('.')
														
 
															+                dic = self.dic
														
 
															+                for subkey in key_list[:-1]:
														
 
															+                    dic.setdefault(subkey, dict())
														
 
															+                    dic = dic[subkey]
														
 
															+                dic[key_list[-1]] = value
														
 
															+
														
 
															     @property
														
 
															     def batch_size(self) -> int:
														
 
															         return self.dic.get('batch_size', 1)
														
@@ -153,13 +190,32 @@ class Config(object):
 
															                 'No `lr_scheduler` specified in the configuration file.')
														
 
															         params = self.dic.get('lr_scheduler')
														
 
															+        use_warmup = False
														
 
															+        if 'warmup_iters' in params:
														
 
															+            use_warmup = True
														
 
															+            warmup_iters = params.pop('warmup_iters')
														
 
															+            assert 'warmup_start_lr' in params, \
														
 
															+                "When use warmup, please set warmup_start_lr and warmup_iters in lr_scheduler"
														
 
															+            warmup_start_lr = params.pop('warmup_start_lr')
														
 
															+            end_lr = params['learning_rate']
														
 
															+
														
 
															         lr_type = params.pop('type')
														
 
															         if lr_type == 'PolynomialDecay':
														
 
															-            params.setdefault('decay_steps', self.iters)
														
 
															+            iters = self.iters - warmup_iters if use_warmup else self.iters
														
 
															+            iters = max(iters, 1)
														
 
															+            params.setdefault('decay_steps', iters)
														
 
															             params.setdefault('end_lr', 0)
														
 
															             params.setdefault('power', 0.9)
														
 
															+        lr_sche = getattr(paddle.optimizer.lr, lr_type)(**params)
														
 
															+
														
 
															+        if use_warmup:
														
 
															+            lr_sche = paddle.optimizer.lr.LinearWarmup(
														
 
															+                learning_rate=lr_sche,
														
 
															+                warmup_steps=warmup_iters,
														
 
															+                start_lr=warmup_start_lr,
														
 
															+                end_lr=end_lr)
														
 
															-        return getattr(paddle.optimizer.lr, lr_type)(**params)
														
 
															+        return lr_sche
														
 
															     @property
														
 
															     def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
														
@@ -202,15 +258,33 @@ class Config(object):
 
															         args = self.optimizer_args
														
 
															         optimizer_type = args.pop('type')
														
 
															+        params = self.model.parameters()
														
 
															+        if 'backbone_lr_mult' in args:
														
 
															+            if not hasattr(self.model, 'backbone'):
														
 
															+                logger.warning('The backbone_lr_mult is not effective because'
														
 
															+                               ' the model does not have backbone')
														
 
															+            else:
														
 
															+                backbone_lr_mult = args.pop('backbone_lr_mult')
														
 
															+                backbone_params = self.model.backbone.parameters()
														
 
															+                backbone_params_id = [id(x) for x in backbone_params]
														
 
															+                other_params = [
														
 
															+                    x for x in params if id(x) not in backbone_params_id
														
 
															+                ]
														
 
															+                params = [{
														
 
															+                    'params': backbone_params,
														
 
															+                    'learning_rate': backbone_lr_mult
														
 
															+                }, {
														
 
															+                    'params': other_params
														
 
															+                }]
														
 
															+
														
 
															         if optimizer_type == 'sgd':
														
 
															-            return paddle.optimizer.Momentum(
														
 
															-                lr, parameters=self.model.parameters(), **args)
														
 
															+            return paddle.optimizer.Momentum(lr, parameters=params, **args)
														
 
															         elif optimizer_type == 'adam':
														
 
															-            return paddle.optimizer.Adam(
														
 
															-                lr, parameters=self.model.parameters(), **args)
														
 
															+            return paddle.optimizer.Adam(lr, parameters=params, **args)
														
 
															         elif optimizer_type in paddle.optimizer.__all__:
														
 
															-            return getattr(paddle.optimizer, optimizer_type)(
														
 
															-                lr, parameters=self.model.parameters(), **args)
														
 
															+            return getattr(paddle.optimizer, optimizer_type)(lr,
														
 
															+                                                             parameters=params,
														
 
															+                                                             **args)
														
 
															         raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))
														
@@ -295,24 +369,6 @@ class Config(object):
 
															     @property
														
 
															     def model(self) -> paddle.nn.Layer:
														
 
															         model_cfg = self.dic.get('model').copy()
														
 
															-        if not model_cfg:
														
 
															-            raise RuntimeError('No model specified in the configuration file.')
														
 
															-        if not 'num_classes' in model_cfg:
														
 
															-            num_classes = None
														
 
															-            if self.train_dataset_config:
														
 
															-                if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
														
 
															-                    num_classes = self.train_dataset_class.NUM_CLASSES
														
 
															-                elif hasattr(self.train_dataset, 'num_classes'):
														
 
															-                    num_classes = self.train_dataset.num_classes
														
 
															-            elif self.val_dataset_config:
														
 
															-                if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
														
 
															-                    num_classes = self.val_dataset_class.NUM_CLASSES
														
 
															-                elif hasattr(self.val_dataset, 'num_classes'):
														
 
															-                    num_classes = self.val_dataset.num_classes
														
 
															-
														
 
															-            if num_classes is not None:
														
 
															-                model_cfg['num_classes'] = num_classes
														
 
															-
														
 
															         if not self._model:
														
 
															             self._model = self._load_object(model_cfg)
														
 
															         return self._model
														
@@ -401,3 +457,94 @@ class Config(object):
 
															     def __str__(self) -> str:
														
 
															         return yaml.dump(self.dic)
														
 
															+
														
 
															+    @property
														
 
															+    def val_transforms(self) -> list:
														
 
															+        """Get val_transform from val_dataset"""
														
 
															+        _val_dataset = self.val_dataset_config
														
 
															+        if not _val_dataset:
														
 
															+            return []
														
 
															+        _transforms = _val_dataset.get('transforms', [])
														
 
															+        transforms = []
														
 
															+        for i in _transforms:
														
 
															+            transforms.append(self._load_object(i))
														
 
															+        return transforms
														
 
															+
														
 
															+    def check_sync_info(self) -> None:
														
 
															+        """
														
 
															+        Check and sync the info, such as num_classes and img_channels, 
														
 
															+        between the config of model, train_dataset and val_dataset.
														
 
															+        """
														
 
															+        self._check_sync_num_classes()
														
 
															+        self._check_sync_img_channels()
														
 
															+
														
 
															+    def _check_sync_num_classes(self):
														
 
															+        num_classes_set = set()
														
 
															+
														
 
															+        if self.dic['model'].get('num_classes', None) is not None:
														
 
															+            num_classes_set.add(self.dic['model'].get('num_classes'))
														
 
															+        if self.train_dataset_config:
														
 
															+            if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
														
 
															+                num_classes_set.add(self.train_dataset_class.NUM_CLASSES)
														
 
															+            elif 'num_classes' in self.train_dataset_config:
														
 
															+                num_classes_set.add(self.train_dataset_config['num_classes'])
														
 
															+        if self.val_dataset_config:
														
 
															+            if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
														
 
															+                num_classes_set.add(self.val_dataset_class.NUM_CLASSES)
														
 
															+            elif 'num_classes' in self.val_dataset_config:
														
 
															+                num_classes_set.add(self.val_dataset_config['num_classes'])
														
 
															+
														
 
															+        if len(num_classes_set) == 0:
														
 
															+            raise ValueError(
														
 
															+                '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
														
 
															+            )
														
 
															+        elif len(num_classes_set) > 1:
														
 
															+            raise ValueError(
														
 
															+                '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
														
 
															+                .format(num_classes_set))
														
 
															+
														
 
															+        num_classes = num_classes_set.pop()
														
 
															+        self.dic['model']['num_classes'] = num_classes
														
 
															+        if self.train_dataset_config and \
														
 
															+            (not hasattr(self.train_dataset_class, 'NUM_CLASSES')):
														
 
															+            self.dic['train_dataset']['num_classes'] = num_classes
														
 
															+        if self.val_dataset_config and \
														
 
															+            (not hasattr(self.val_dataset_class, 'NUM_CLASSES')):
														
 
															+            self.dic['val_dataset']['num_classes'] = num_classes
														
 
															+
														
 
															+    def _check_sync_img_channels(self):
														
 
															+        img_channels_set = set()
														
 
															+        model_cfg = self.dic['model']
														
 
															+
														
 
															+        # If the model has backbone, in_channels is the input params of backbone.
														
 
															+        # Otherwise, in_channels is the input params of the model.
														
 
															+        if 'backbone' in model_cfg:
														
 
															+            x = model_cfg['backbone'].get('in_channels', None)
														
 
															+            if x is not None:
														
 
															+                img_channels_set.add(x)
														
 
															+        elif model_cfg.get('in_channels', None) is not None:
														
 
															+            img_channels_set.add(model_cfg.get('in_channels'))
														
 
															+        if self.train_dataset_config and \
														
 
															+            ('img_channels' in self.train_dataset_config):
														
 
															+            img_channels_set.add(self.train_dataset_config['img_channels'])
														
 
															+        if self.val_dataset_config and \
														
 
															+            ('img_channels' in self.val_dataset_config):
														
 
															+            img_channels_set.add(self.val_dataset_config['img_channels'])
														
 
															+
														
 
															+        if len(img_channels_set) > 1:
														
 
															+            raise ValueError(
														
 
															+                '`img_channels` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
														
 
															+                .format(img_channels_set))
														
 
															+
														
 
															+        img_channels = 3 if len(img_channels_set) == 0 \
														
 
															+            else img_channels_set.pop()
														
 
															+        if 'backbone' in model_cfg:
														
 
															+            self.dic['model']['backbone']['in_channels'] = img_channels
														
 
															+        else:
														
 
															+            self.dic['model']['in_channels'] = img_channels
														
 
															+        if self.train_dataset_config and \
														
 
															+            self.train_dataset_config['type'] == "Dataset":
														
 
															+            self.dic['train_dataset']['img_channels'] = img_channels
														
 
															+        if self.val_dataset_config and \
														
 
															+            self.val_dataset_config['type'] == "Dataset":
														
 
															+            self.dic['val_dataset']['img_channels'] = img_channels
														
--- a/paddlers/models/ppseg/cvlibs/param_init.py
+++ b/paddlers/models/ppseg/cvlibs/param_init.py
@@ -118,3 +118,29 @@ def kaiming_uniform(param, **kwargs):
 
															     initializer = nn.initializer.KaimingUniform(**kwargs)
														
 
															     initializer(param, param.block)
														
 
															+
														
 
															+
														
 
															+def xavier_uniform(param, **kwargs):
														
 
															+    r"""
														
 
															+    This implements the Xavier weight initializer from the paper
														
 
															+    `Understanding the difficulty of training deep feedforward neural
														
 
															+    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
														
 
															+    by Xavier Glorot and Yoshua Bengio.
														
 
															+    This initializer is designed to keep the scale of the gradients
														
 
															+    approximately same in all the layers. In case of Uniform distribution,
														
 
															+    the range is [-x, x], where
														
 
															+    .. math::
														
 
															+        x = \sqrt{\frac{6.0}{fan\_in + fan\_out}}
														
 
															+    Args:
														
 
															+        param (Tensor): Tensor that needs to be initialized.
														
 
															+
														
 
															+    Examples:
														
 
															+
														
 
															+        from paddlers.models.ppseg.cvlibs import param_init
														
 
															+        import paddle.nn as nn
														
 
															+
														
 
															+        linear = nn.Linear(2, 4)
														
 
															+        param_init.xavier_uniform(linear.weight)
														
 
															+    """
														
 
															+    initializer = nn.initializer.XavierUniform(**kwargs)
														
 
															+    initializer(param, param.block)
														
--- a/paddlers/models/ppseg/datasets/__init__.py
+++ b/paddlers/models/ppseg/datasets/__init__.py
@@ -27,3 +27,4 @@ from .drive import DRIVE
 
															 from .hrf import HRF
														
 
															 from .chase_db1 import CHASEDB1
														
 
															 from .pp_humanseg14k import PPHumanSeg14K
														
 
															+from .pssl import PSSLDataset
														
--- a/paddlers/models/ppseg/datasets/ade.py
+++ b/paddlers/models/ppseg/datasets/ade.py
@@ -89,23 +89,31 @@ class ADE20K(Dataset):
 
															             self.file_list.append([img_path, label_path])
														
 
															     def __getitem__(self, idx):
														
 
															+        data = {}
														
 
															+        data['trans_info'] = []
														
 
															         image_path, label_path = self.file_list[idx]
														
 
															+        data['img'] = image_path
														
 
															+        data['gt_fields'] = [
														
 
															+        ]  # If key in gt_fields, the data[key] have transforms synchronous.
														
 
															+
														
 
															         if self.mode == 'val':
														
 
															-            im, _ = self.transforms(im=image_path)
														
 
															+            data = self.transforms(data)
														
 
															             label = np.asarray(Image.open(label_path))
														
 
															             # The class 0 is ignored. And it will equal to 255 after
														
 
															             # subtracted 1, because the dtype of label is uint8.
														
 
															             label = label - 1
														
 
															             label = label[np.newaxis, :, :]
														
 
															-            return im, label
														
 
															+            data['label'] = label
														
 
															+            return data
														
 
															         else:
														
 
															-            im, label = self.transforms(im=image_path, label=label_path)
														
 
															-            label = label - 1
														
 
															+            data['label'] = label_path
														
 
															+            data['gt_fields'].append('label')
														
 
															+            data = self.transforms(data)
														
 
															+            data['label'] = data['label'] - 1
														
 
															             # Recover the ignore pixels adding by transform
														
 
															-            label[label == 254] = 255
														
 
															+            data['label'][data['label'] == 254] = 255
														
 
															             if self.edge:
														
 
															                 edge_mask = F.mask_to_binary_edge(
														
 
															                     label, radius=2, num_classes=self.num_classes)
														
 
															-                return im, label, edge_mask
														
 
															-            else:
														
 
															-                return im, label
														
 
															+                data['edge'] = edge_mask
														
 
															+            return data
														
--- a/paddlers/models/ppseg/datasets/dataset.py
+++ b/paddlers/models/ppseg/datasets/dataset.py
@@ -46,10 +46,10 @@ class Dataset(paddle.io.Dataset):
 
															         Examples:
														
 
															-            import paddlers.models.ppseg.transforms as T
														
 
															+            import paddlers.models.ppseg as ppseg.transforms as T
														
 
															             from paddlers.models.ppseg.datasets import Dataset
														
 
															-            transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
														
 
															+            transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
														
 
															             dataset_root = 'dataset_root_path'
														
 
															             train_path = 'train_path'
														
 
															             num_classes = 2
														
@@ -62,10 +62,11 @@ class Dataset(paddle.io.Dataset):
 
															     """
														
 
															     def __init__(self,
														
 
															-                 transforms,
														
 
															+                 mode,
														
 
															                  dataset_root,
														
 
															+                 transforms,
														
 
															                  num_classes,
														
 
															-                 mode='train',
														
 
															+                 img_channels=3,
														
 
															                  train_path=None,
														
 
															                  val_path=None,
														
 
															                  test_path=None,
														
@@ -73,10 +74,11 @@ class Dataset(paddle.io.Dataset):
 
															                  ignore_index=255,
														
 
															                  edge=False):
														
 
															         self.dataset_root = dataset_root
														
 
															-        self.transforms = Compose(transforms)
														
 
															+        self.transforms = Compose(transforms, img_channels=img_channels)
														
 
															         self.file_list = list()
														
 
															         self.mode = mode.lower()
														
 
															         self.num_classes = num_classes
														
 
															+        self.img_channels = img_channels
														
 
															         self.ignore_index = ignore_index
														
 
															         self.edge = edge
														
@@ -84,13 +86,18 @@ class Dataset(paddle.io.Dataset):
 
															             raise ValueError(
														
 
															                 "mode should be 'train', 'val' or 'test', but got {}.".format(
														
 
															                     self.mode))
														
 
															-
														
 
															-        if self.transforms is None:
														
 
															-            raise ValueError("`transforms` is necessary, but it is None.")
														
 
															-
														
 
															         if not os.path.exists(self.dataset_root):
														
 
															             raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
														
 
															                 self.dataset_root))
														
 
															+        if self.transforms is None:
														
 
															+            raise ValueError("`transforms` is necessary, but it is None.")
														
 
															+        if num_classes < 1:
														
 
															+            raise ValueError(
														
 
															+                "`num_classes` should be greater than 1, but got {}".format(
														
 
															+                    num_classes))
														
 
															+        if img_channels not in [1, 3]:
														
 
															+            raise ValueError("`img_channels` should in [1, 3], but got {}".
														
 
															+                             format(img_channels))
														
 
															         if self.mode == 'train':
														
 
															             if train_path is None:
														
@@ -139,24 +146,25 @@ class Dataset(paddle.io.Dataset):
 
															                 self.file_list.append([image_path, label_path])
														
 
															     def __getitem__(self, idx):
														
 
															+        data = {}
														
 
															+        data['trans_info'] = []
														
 
															         image_path, label_path = self.file_list[idx]
														
 
															-        if self.mode == 'test':
														
 
															-            im, _ = self.transforms(im=image_path)
														
 
															-            im = im[np.newaxis, ...]
														
 
															-            return im, image_path
														
 
															-        elif self.mode == 'val':
														
 
															-            im, _ = self.transforms(im=image_path)
														
 
															-            label = np.asarray(Image.open(label_path))
														
 
															-            label = label[np.newaxis, :, :]
														
 
															-            return im, label
														
 
															+        data['img'] = image_path
														
 
															+        data['label'] = label_path
														
 
															+        # If key in gt_fields, the data[key] have transforms synchronous.
														
 
															+        data['gt_fields'] = []
														
 
															+        if self.mode == 'val':
														
 
															+            data = self.transforms(data)
														
 
															+            data['label'] = data['label'][np.newaxis, :, :]
														
 
															+
														
 
															         else:
														
 
															-            im, label = self.transforms(im=image_path, label=label_path)
														
 
															+            data['gt_fields'].append('label')
														
 
															+            data = self.transforms(data)
														
 
															             if self.edge:
														
 
															                 edge_mask = F.mask_to_binary_edge(
														
 
															-                    label, radius=2, num_classes=self.num_classes)
														
 
															-                return im, label, edge_mask
														
 
															-            else:
														
 
															-                return im, label
														
 
															+                    data['label'], radius=2, num_classes=self.num_classes)
														
 
															+                data['edge'] = edge_mask
														
 
															+        return data
														
 
															     def __len__(self):
														
 
															         return len(self.file_list)
														
--- a/paddlers/models/ppseg/datasets/pssl.py
+++ b/paddlers/models/ppseg/datasets/pssl.py
@@ -0,0 +1,135 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import os
														
 
															+import numpy as np
														
 
															+
														
 
															+from paddlers.models.ppseg.datasets import Dataset
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.transforms import Compose
														
 
															+
														
 
															+
														
 
															+@manager.DATASETS.add_component
														
 
															+class PSSLDataset(Dataset):
														
 
															+    """
														
 
															+    The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label
														
 
															+    is computed by the Consensus explanation algorithm.
														
 
															+
														
 
															+    The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation 
														
 
															+    Models" (https://arxiv.org/abs/2207.03335). 
														
 
															+    
														
 
															+    The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification 
														
 
															+    Models: An Empirical Study" (https://arxiv.org/abs/2109.00707).
														
 
															+
														
 
															+    To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure
														
 
															+    as follows:
														
 
															+
														
 
															+        imagenet_root
														
 
															+        |
														
 
															+        |--train
														
 
															+        |  |--n01440764
														
 
															+        |  |  |--n01440764_10026.JPEG
														
 
															+        |  |  |--...
														
 
															+        |  |--nxxxxxxxx
														
 
															+        |  |--...
														
 
															+
														
 
															+    where only the "train" set is needed.
														
 
															+
														
 
															+    The PSSL dataset has the folder structure as follows:
														
 
															+
														
 
															+        pssl_root
														
 
															+        |
														
 
															+        |--train
														
 
															+        |  |--n01440764
														
 
															+        |  |  |--n01440764_10026.JPEG_eiseg.npz
														
 
															+        |  |  |--...
														
 
															+        |  |--nxxxxxxxx
														
 
															+        |  |--...
														
 
															+        |
														
 
															+        |--imagenet_lsvrc_2015_synsets.txt
														
 
															+        |--train.txt
														
 
															+
														
 
															+    where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset.
														
 
															+
														
 
															+    Args:
														
 
															+        transforms (list): Transforms for image.
														
 
															+        imagenet_root (str): The path to the original ImageNet dataset.
														
 
															+        pssl_root (str): The path to the PSSL dataset.
														
 
															+        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
														
 
															+        edge (bool, optional): Whether to compute edge while training. Default: False.
														
 
															+    """
														
 
															+    ignore_index = 1001  # 0~999 is target class, 1000 is bg
														
 
															+    NUM_CLASSES = 1001  # consider target class and bg
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 transforms,
														
 
															+                 imagenet_root,
														
 
															+                 pssl_root,
														
 
															+                 mode='train',
														
 
															+                 edge=False):
														
 
															+        mode = mode.lower()
														
 
															+        if mode not in ['train']:
														
 
															+            raise ValueError("mode should be 'train', but got {}.".format(mode))
														
 
															+        if transforms is None:
														
 
															+            raise ValueError("`transforms` is necessary, but it is None.")
														
 
															+
														
 
															+        self.transforms = Compose(transforms)
														
 
															+        self.mode = mode
														
 
															+        self.edge = edge
														
 
															+
														
 
															+        self.num_classes = self.NUM_CLASSES
														
 
															+        self.ignore_index = self.num_classes  # 1001
														
 
															+        self.file_list = []
														
 
															+        self.class_id_dict = {}
														
 
															+
														
 
															+        if imagenet_root is None or not os.path.isdir(pssl_root):
														
 
															+            raise ValueError(
														
 
															+                "The dataset is not Found or the folder structure is nonconfoumance."
														
 
															+            )
														
 
															+
														
 
															+        train_list_file = os.path.join(pssl_root, "train.txt")
														
 
															+        if not os.path.exists(train_list_file):
														
 
															+            raise ValueError("Train list file isn't exists.")
														
 
															+        for idx, line in enumerate(open(train_list_file)):
														
 
															+            # line: train/n04118776/n04118776_45912.JPEG_eiseg.npz
														
 
															+            label_path = line.strip()
														
 
															+            img_path = label_path.split('.JPEG')[0] + '.JPEG'
														
 
															+            label_path = os.path.join(pssl_root, label_path)
														
 
															+            img_path = os.path.join(imagenet_root, img_path)
														
 
															+            self.file_list.append([img_path, label_path])
														
 
															+
														
 
															+        # mapping class name to class id.
														
 
															+        class_id_file = os.path.join(pssl_root,
														
 
															+                                     "imagenet_lsvrc_2015_synsets.txt")
														
 
															+        if not os.path.exists(class_id_file):
														
 
															+            raise ValueError("Class id file isn't exists.")
														
 
															+        for idx, line in enumerate(open(class_id_file)):
														
 
															+            class_name = line.strip()
														
 
															+            self.class_id_dict[class_name] = idx
														
 
															+
														
 
															+    def __getitem__(self, idx):
														
 
															+        image_path, label_path = self.file_list[idx]
														
 
															+
														
 
															+        # transform label
														
 
															+        class_name = (image_path.split('/')[-1]).split('_')[0]
														
 
															+        class_id = self.class_id_dict[class_name]
														
 
															+
														
 
															+        pssl_seg = np.load(label_path)['arr_0']
														
 
															+        gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000
														
 
															+        # [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training.
														
 
															+        gt_semantic_seg[pssl_seg == 1] = class_id
														
 
															+
														
 
															+        im, label = self.transforms(im=image_path, label=gt_semantic_seg)
														
 
															+
														
 
															+        return im, label
														
--- a/paddlers/models/ppseg/models/__init__.py
+++ b/paddlers/models/ppseg/models/__init__.py
@@ -49,9 +49,18 @@ from .segnet import SegNet
 
															 from .encnet import ENCNet
														
 
															 from .hrnet_contrast import HRNetW48Contrast
														
 
															 from .espnet import ESPNetV2
														
 
															+from .pp_liteseg import PPLiteSeg
														
 
															 from .dmnet import DMNet
														
 
															 from .espnetv1 import ESPNetV1
														
 
															 from .enet import ENet
														
 
															 from .bisenetv1 import BiseNetV1
														
 
															 from .fastfcn import FastFCN
														
 
															 from .pfpnnet import PFPNNet
														
 
															+from .glore import GloRe
														
 
															+from .ddrnet import DDRNet_23
														
 
															+from .ccnet import CCNet
														
 
															+from .mobileseg import MobileSeg
														
 
															+from .upernet import UPerNet
														
 
															+from .sinet import SINet
														
 
															+from .lraspp import LRASPP
														
 
															+from .topformer import TopFormer
														
--- a/paddlers/models/ppseg/models/attention_unet.py
+++ b/paddlers/models/ppseg/models/attention_unet.py
@@ -35,13 +35,13 @@ class AttentionUNet(nn.Layer):
 
															     Args:
														
 
															         num_classes (int): The unique number of target classes.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															     """
														
 
															-    def __init__(self, num_classes, pretrained=None):
														
 
															+    def __init__(self, num_classes, in_channels=3, pretrained=None):
														
 
															         super().__init__()
														
 
															-        n_channels = 3
														
 
															-        self.encoder = Encoder(n_channels, [64, 128, 256, 512])
														
 
															+        self.encoder = Encoder(in_channels, [64, 128, 256, 512])
														
 
															         filters = np.array([64, 128, 256, 512, 1024])
														
 
															         self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
														
 
															         self.att5 = AttentionBlock(
														
--- a/paddlers/models/ppseg/models/backbones/__init__.py
+++ b/paddlers/models/ppseg/models/backbones/__init__.py
@@ -21,3 +21,7 @@ from .swin_transformer import *
 
															 from .mobilenetv2 import *
														
 
															 from .mix_transformer import *
														
 
															 from .stdcnet import *
														
 
															+from .lite_hrnet import *
														
 
															+from .shufflenetv2 import *
														
 
															+from .ghostnet import *
														
 
															+from .top_transformer import *
														
--- a/paddlers/models/ppseg/models/backbones/ghostnet.py
+++ b/paddlers/models/ppseg/models/backbones/ghostnet.py
@@ -0,0 +1,318 @@
 
															+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
														
 
															+
														
 
															+import math
														
 
															+import paddle
														
 
															+from paddle import ParamAttr
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
														
 
															+from paddle.regularizer import L2Decay
														
 
															+from paddle.nn.initializer import Uniform, KaimingNormal
														
 
															+
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.utils import utils, logger
														
 
															+
														
 
															+__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
														
 
															+
														
 
															+
														
 
															+class ConvBNLayer(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 out_channels,
														
 
															+                 kernel_size,
														
 
															+                 stride=1,
														
 
															+                 groups=1,
														
 
															+                 act="relu",
														
 
															+                 name=None):
														
 
															+        super(ConvBNLayer, self).__init__()
														
 
															+        self._conv = Conv2D(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=out_channels,
														
 
															+            kernel_size=kernel_size,
														
 
															+            stride=stride,
														
 
															+            padding=(kernel_size - 1) // 2,
														
 
															+            groups=groups,
														
 
															+            weight_attr=ParamAttr(
														
 
															+                initializer=KaimingNormal(), name=name + "_weights"),
														
 
															+            bias_attr=False)
														
 
															+        bn_name = name + "_bn"
														
 
															+
														
 
															+        self._batch_norm = BatchNorm(
														
 
															+            num_channels=out_channels,
														
 
															+            act=act,
														
 
															+            param_attr=ParamAttr(
														
 
															+                name=bn_name + "_scale", regularizer=L2Decay(0.0)),
														
 
															+            bias_attr=ParamAttr(
														
 
															+                name=bn_name + "_offset", regularizer=L2Decay(0.0)),
														
 
															+            moving_mean_name=bn_name + "_mean",
														
 
															+            moving_variance_name=bn_name + "_variance")
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        y = self._conv(inputs)
														
 
															+        y = self._batch_norm(y)
														
 
															+        return y
														
 
															+
														
 
															+
														
 
															+class SEBlock(nn.Layer):
														
 
															+    def __init__(self, num_channels, reduction_ratio=4, name=None):
														
 
															+        super(SEBlock, self).__init__()
														
 
															+        self.pool2d_gap = AdaptiveAvgPool2D(1)
														
 
															+        self._num_channels = num_channels
														
 
															+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
														
 
															+        med_ch = num_channels // reduction_ratio
														
 
															+        self.squeeze = Linear(
														
 
															+            num_channels,
														
 
															+            med_ch,
														
 
															+            weight_attr=ParamAttr(
														
 
															+                initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
														
 
															+            bias_attr=ParamAttr(name=name + "_1_offset"))
														
 
															+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
														
 
															+        self.excitation = Linear(
														
 
															+            med_ch,
														
 
															+            num_channels,
														
 
															+            weight_attr=ParamAttr(
														
 
															+                initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
														
 
															+            bias_attr=ParamAttr(name=name + "_2_offset"))
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        pool = self.pool2d_gap(inputs)
														
 
															+        pool = paddle.squeeze(pool, axis=[2, 3])
														
 
															+        squeeze = self.squeeze(pool)
														
 
															+        squeeze = F.relu(squeeze)
														
 
															+        excitation = self.excitation(squeeze)
														
 
															+        excitation = paddle.clip(x=excitation, min=0, max=1)
														
 
															+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
														
 
															+        out = paddle.multiply(inputs, excitation)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class GhostModule(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 output_channels,
														
 
															+                 kernel_size=1,
														
 
															+                 ratio=2,
														
 
															+                 dw_size=3,
														
 
															+                 stride=1,
														
 
															+                 relu=True,
														
 
															+                 name=None):
														
 
															+        super(GhostModule, self).__init__()
														
 
															+        init_channels = int(math.ceil(output_channels / ratio))
														
 
															+        new_channels = int(init_channels * (ratio - 1))
														
 
															+        self.primary_conv = ConvBNLayer(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=init_channels,
														
 
															+            kernel_size=kernel_size,
														
 
															+            stride=stride,
														
 
															+            groups=1,
														
 
															+            act="relu" if relu else None,
														
 
															+            name=name + "_primary_conv")
														
 
															+        self.cheap_operation = ConvBNLayer(
														
 
															+            in_channels=init_channels,
														
 
															+            out_channels=new_channels,
														
 
															+            kernel_size=dw_size,
														
 
															+            stride=1,
														
 
															+            groups=init_channels,
														
 
															+            act="relu" if relu else None,
														
 
															+            name=name + "_cheap_operation")
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        x = self.primary_conv(inputs)
														
 
															+        y = self.cheap_operation(x)
														
 
															+        out = paddle.concat([x, y], axis=1)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class GhostBottleneck(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 hidden_dim,
														
 
															+                 output_channels,
														
 
															+                 kernel_size,
														
 
															+                 stride,
														
 
															+                 use_se,
														
 
															+                 name=None):
														
 
															+        super(GhostBottleneck, self).__init__()
														
 
															+        self._stride = stride
														
 
															+        self._use_se = use_se
														
 
															+        self._num_channels = in_channels
														
 
															+        self._output_channels = output_channels
														
 
															+        self.ghost_module_1 = GhostModule(
														
 
															+            in_channels=in_channels,
														
 
															+            output_channels=hidden_dim,
														
 
															+            kernel_size=1,
														
 
															+            stride=1,
														
 
															+            relu=True,
														
 
															+            name=name + "_ghost_module_1")
														
 
															+        if stride == 2:
														
 
															+            self.depthwise_conv = ConvBNLayer(
														
 
															+                in_channels=hidden_dim,
														
 
															+                out_channels=hidden_dim,
														
 
															+                kernel_size=kernel_size,
														
 
															+                stride=stride,
														
 
															+                groups=hidden_dim,
														
 
															+                act=None,
														
 
															+                name=name +
														
 
															+                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
														
 
															+            )
														
 
															+        if use_se:
														
 
															+            self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
														
 
															+        self.ghost_module_2 = GhostModule(
														
 
															+            in_channels=hidden_dim,
														
 
															+            output_channels=output_channels,
														
 
															+            kernel_size=1,
														
 
															+            relu=False,
														
 
															+            name=name + "_ghost_module_2")
														
 
															+        if stride != 1 or in_channels != output_channels:
														
 
															+            self.shortcut_depthwise = ConvBNLayer(
														
 
															+                in_channels=in_channels,
														
 
															+                out_channels=in_channels,
														
 
															+                kernel_size=kernel_size,
														
 
															+                stride=stride,
														
 
															+                groups=in_channels,
														
 
															+                act=None,
														
 
															+                name=name +
														
 
															+                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
														
 
															+            )
														
 
															+            self.shortcut_conv = ConvBNLayer(
														
 
															+                in_channels=in_channels,
														
 
															+                out_channels=output_channels,
														
 
															+                kernel_size=1,
														
 
															+                stride=1,
														
 
															+                groups=1,
														
 
															+                act=None,
														
 
															+                name=name + "_shortcut_conv")
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        x = self.ghost_module_1(inputs)
														
 
															+        if self._stride == 2:
														
 
															+            x = self.depthwise_conv(x)
														
 
															+        if self._use_se:
														
 
															+            x = self.se_block(x)
														
 
															+        x = self.ghost_module_2(x)
														
 
															+        if self._stride == 1 and self._num_channels == self._output_channels:
														
 
															+            shortcut = inputs
														
 
															+        else:
														
 
															+            shortcut = self.shortcut_depthwise(inputs)
														
 
															+            shortcut = self.shortcut_conv(shortcut)
														
 
															+        return paddle.add(x=x, y=shortcut)
														
 
															+
														
 
															+
														
 
															+class GhostNet(nn.Layer):
														
 
															+    def __init__(self, scale, in_channels=3, pretrained=None):
														
 
															+        super(GhostNet, self).__init__()
														
 
															+        self.cfgs = [
														
 
															+            # k, t, c, SE, s
														
 
															+            [3, 16, 16, 0, 1],
														
 
															+            [3, 48, 24, 0, 2],
														
 
															+            [3, 72, 24, 0, 1],  # x4
														
 
															+            [5, 72, 40, 1, 2],
														
 
															+            [5, 120, 40, 1, 1],  # x8
														
 
															+            [3, 240, 80, 0, 2],
														
 
															+            [3, 200, 80, 0, 1],
														
 
															+            [3, 184, 80, 0, 1],
														
 
															+            [3, 184, 80, 0, 1],
														
 
															+            [3, 480, 112, 1, 1],
														
 
															+            [3, 672, 112, 1, 1],  # x16
														
 
															+            [5, 672, 160, 1, 2],
														
 
															+            [5, 960, 160, 0, 1],
														
 
															+            [5, 960, 160, 1, 1],
														
 
															+            [5, 960, 160, 0, 1],
														
 
															+            [5, 960, 160, 1, 1]  # x32
														
 
															+        ]
														
 
															+        self.scale = scale
														
 
															+        self.pretrained = pretrained
														
 
															+
														
 
															+        output_channels = int(self._make_divisible(16 * self.scale, 4))
														
 
															+        self.conv1 = ConvBNLayer(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=output_channels,
														
 
															+            kernel_size=3,
														
 
															+            stride=2,
														
 
															+            groups=1,
														
 
															+            act="relu",
														
 
															+            name="conv1")
														
 
															+
														
 
															+        # build inverted residual blocks
														
 
															+        self.out_index = [2, 4, 10, 15]
														
 
															+        self.feat_channels = []
														
 
															+        self.ghost_bottleneck_list = []
														
 
															+        for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs):
														
 
															+            in_channels = output_channels
														
 
															+            output_channels = int(self._make_divisible(c * self.scale, 4))
														
 
															+            hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
														
 
															+            ghost_bottleneck = self.add_sublayer(
														
 
															+                name="_ghostbottleneck_" + str(idx),
														
 
															+                sublayer=GhostBottleneck(
														
 
															+                    in_channels=in_channels,
														
 
															+                    hidden_dim=hidden_dim,
														
 
															+                    output_channels=output_channels,
														
 
															+                    kernel_size=k,
														
 
															+                    stride=s,
														
 
															+                    use_se=use_se,
														
 
															+                    name="_ghostbottleneck_" + str(idx)))
														
 
															+            self.ghost_bottleneck_list.append(ghost_bottleneck)
														
 
															+            if idx in self.out_index:
														
 
															+                self.feat_channels.append(output_channels)
														
 
															+
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        feat_list = []
														
 
															+        x = self.conv1(inputs)
														
 
															+        for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
														
 
															+            x = ghost_bottleneck(x)
														
 
															+            if idx in self.out_index:
														
 
															+                feat_list.append(x)
														
 
															+        return feat_list
														
 
															+
														
 
															+    def _make_divisible(self, v, divisor, min_value=None):
														
 
															+        """
														
 
															+        This function is taken from the original tf repo.
														
 
															+        It ensures that all layers have a channel number that is divisible by 8
														
 
															+        It can be seen here:
														
 
															+        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
														
 
															+        """
														
 
															+        if min_value is None:
														
 
															+            min_value = divisor
														
 
															+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
														
 
															+        # Make sure that round down does not go down by more than 10%.
														
 
															+        if new_v < 0.9 * v:
														
 
															+            new_v += divisor
														
 
															+        return new_v
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def GhostNet_x0_5(**kwargs):
														
 
															+    model = GhostNet(scale=0.5, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def GhostNet_x1_0(**kwargs):
														
 
															+    model = GhostNet(scale=1.0, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def GhostNet_x1_3(**kwargs):
														
 
															+    model = GhostNet(scale=1.3, **kwargs)
														
 
															+    return model
														
--- a/paddlers/models/ppseg/models/backbones/hrnet.py
+++ b/paddlers/models/ppseg/models/backbones/hrnet.py
@@ -37,6 +37,7 @@ class HRNet(nn.Layer):
 
															     (https://arxiv.org/pdf/1908.07919.pdf).
														
 
															     Args:
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         pretrained (str, optional): The path of pretrained model.
														
 
															         stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
														
 
															         stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
														
@@ -56,6 +57,7 @@ class HRNet(nn.Layer):
 
															     """
														
 
															     def __init__(self,
														
 
															+                 in_channels=3,
														
 
															                  pretrained=None,
														
 
															                  stage1_num_modules=1,
														
 
															                  stage1_num_blocks=(4, ),
														
@@ -91,7 +93,7 @@ class HRNet(nn.Layer):
 
															         self.feat_channels = [sum(stage4_num_channels)]
														
 
															         self.conv_layer1_1 = layers.ConvBNReLU(
														
 
															-            in_channels=3,
														
 
															+            in_channels=in_channels,
														
 
															             out_channels=64,
														
 
															             kernel_size=3,
														
 
															             stride=2,
														
--- a/paddlers/models/ppseg/models/backbones/lite_hrnet.py
+++ b/paddlers/models/ppseg/models/backbones/lite_hrnet.py
@@ -0,0 +1,974 @@
 
															+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+"""
														
 
															+This code is based on
														
 
															+https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
														
 
															+"""
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+from numbers import Integral
														
 
															+from paddle import ParamAttr
														
 
															+from paddle.regularizer import L2Decay
														
 
															+from paddle.nn.initializer import Normal, Constant
														
 
															+
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg import utils
														
 
															+
														
 
															+__all__ = [
														
 
															+    "Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive",
														
 
															+    "Lite_HRNet_wider_naive", "LiteHRNet"
														
 
															+]
														
 
															+
														
 
															+
														
 
															+def Conv2d(in_channels,
														
 
															+           out_channels,
														
 
															+           kernel_size,
														
 
															+           stride=1,
														
 
															+           padding=0,
														
 
															+           dilation=1,
														
 
															+           groups=1,
														
 
															+           bias=True,
														
 
															+           weight_init=Normal(std=0.001),
														
 
															+           bias_init=Constant(0.)):
														
 
															+    weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
														
 
															+    if bias:
														
 
															+        bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
														
 
															+    else:
														
 
															+        bias_attr = False
														
 
															+    conv = nn.Conv2D(
														
 
															+        in_channels,
														
 
															+        out_channels,
														
 
															+        kernel_size,
														
 
															+        stride,
														
 
															+        padding,
														
 
															+        dilation,
														
 
															+        groups,
														
 
															+        weight_attr=weight_attr,
														
 
															+        bias_attr=bias_attr)
														
 
															+    return conv
														
 
															+
														
 
															+
														
 
															+def channel_shuffle(x, groups):
														
 
															+    x_shape = paddle.shape(x)
														
 
															+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
														
 
															+    num_channels = x.shape[1]
														
 
															+    channels_per_group = num_channels // groups
														
 
															+
														
 
															+    x = paddle.reshape(
														
 
															+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
														
 
															+    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
														
 
															+    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
														
 
															+
														
 
															+    return x
														
 
															+
														
 
															+
														
 
															+class ConvNormLayer(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 ch_in,
														
 
															+                 ch_out,
														
 
															+                 filter_size,
														
 
															+                 stride=1,
														
 
															+                 groups=1,
														
 
															+                 norm_type=None,
														
 
															+                 norm_groups=32,
														
 
															+                 norm_decay=0.,
														
 
															+                 freeze_norm=False,
														
 
															+                 act=None):
														
 
															+        super(ConvNormLayer, self).__init__()
														
 
															+        self.act = act
														
 
															+        norm_lr = 0. if freeze_norm else 1.
														
 
															+        if norm_type is not None:
														
 
															+            assert norm_type in ['bn', 'sync_bn', 'gn'], \
														
 
															+                "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
														
 
															+            param_attr = ParamAttr(
														
 
															+                initializer=Constant(1.0),
														
 
															+                learning_rate=norm_lr,
														
 
															+                regularizer=L2Decay(norm_decay), )
														
 
															+            bias_attr = ParamAttr(
														
 
															+                learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
														
 
															+            global_stats = True if freeze_norm else None
														
 
															+            if norm_type in ['bn', 'sync_bn']:
														
 
															+                self.norm = nn.BatchNorm2D(
														
 
															+                    ch_out,
														
 
															+                    weight_attr=param_attr,
														
 
															+                    bias_attr=bias_attr,
														
 
															+                    use_global_stats=global_stats, )
														
 
															+            elif norm_type == 'gn':
														
 
															+                self.norm = nn.GroupNorm(
														
 
															+                    num_groups=norm_groups,
														
 
															+                    num_channels=ch_out,
														
 
															+                    weight_attr=param_attr,
														
 
															+                    bias_attr=bias_attr)
														
 
															+            norm_params = self.norm.parameters()
														
 
															+            if freeze_norm:
														
 
															+                for param in norm_params:
														
 
															+                    param.stop_gradient = True
														
 
															+            conv_bias_attr = False
														
 
															+        else:
														
 
															+            conv_bias_attr = True
														
 
															+            self.norm = None
														
 
															+
														
 
															+        self.conv = nn.Conv2D(
														
 
															+            in_channels=ch_in,
														
 
															+            out_channels=ch_out,
														
 
															+            kernel_size=filter_size,
														
 
															+            stride=stride,
														
 
															+            padding=(filter_size - 1) // 2,
														
 
															+            groups=groups,
														
 
															+            weight_attr=ParamAttr(initializer=Normal(
														
 
															+                mean=0., std=0.001)),
														
 
															+            bias_attr=conv_bias_attr)
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        out = self.conv(inputs)
														
 
															+        if self.norm is not None:
														
 
															+            out = self.norm(out)
														
 
															+
														
 
															+        if self.act == 'relu':
														
 
															+            out = F.relu(out)
														
 
															+        elif self.act == 'sigmoid':
														
 
															+            out = F.sigmoid(out)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class DepthWiseSeparableConvNormLayer(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 ch_in,
														
 
															+                 ch_out,
														
 
															+                 filter_size,
														
 
															+                 stride=1,
														
 
															+                 dw_norm_type=None,
														
 
															+                 pw_norm_type=None,
														
 
															+                 norm_decay=0.,
														
 
															+                 freeze_norm=False,
														
 
															+                 dw_act=None,
														
 
															+                 pw_act=None):
														
 
															+        super(DepthWiseSeparableConvNormLayer, self).__init__()
														
 
															+        self.depthwise_conv = ConvNormLayer(
														
 
															+            ch_in=ch_in,
														
 
															+            ch_out=ch_in,
														
 
															+            filter_size=filter_size,
														
 
															+            stride=stride,
														
 
															+            groups=ch_in,
														
 
															+            norm_type=dw_norm_type,
														
 
															+            act=dw_act,
														
 
															+            norm_decay=norm_decay,
														
 
															+            freeze_norm=freeze_norm, )
														
 
															+        self.pointwise_conv = ConvNormLayer(
														
 
															+            ch_in=ch_in,
														
 
															+            ch_out=ch_out,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            norm_type=pw_norm_type,
														
 
															+            act=pw_act,
														
 
															+            norm_decay=norm_decay,
														
 
															+            freeze_norm=freeze_norm, )
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.depthwise_conv(x)
														
 
															+        x = self.pointwise_conv(x)
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+class CrossResolutionWeightingModule(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 channels,
														
 
															+                 ratio=16,
														
 
															+                 norm_type='bn',
														
 
															+                 freeze_norm=False,
														
 
															+                 norm_decay=0.):
														
 
															+        super(CrossResolutionWeightingModule, self).__init__()
														
 
															+        self.channels = channels
														
 
															+        total_channel = sum(channels)
														
 
															+        self.conv1 = ConvNormLayer(
														
 
															+            ch_in=total_channel,
														
 
															+            ch_out=total_channel // ratio,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            norm_type=norm_type,
														
 
															+            act='relu',
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+        self.conv2 = ConvNormLayer(
														
 
															+            ch_in=total_channel // ratio,
														
 
															+            ch_out=total_channel,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            norm_type=norm_type,
														
 
															+            act='sigmoid',
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        out = []
														
 
															+        for idx, xi in enumerate(x[:-1]):
														
 
															+            kernel_size = stride = pow(2, len(x) - idx - 1)
														
 
															+            xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride)
														
 
															+            out.append(xi)
														
 
															+        out.append(x[-1])
														
 
															+
														
 
															+        out = paddle.concat(out, 1)
														
 
															+        out = self.conv1(out)
														
 
															+        out = self.conv2(out)
														
 
															+        out = paddle.split(out, self.channels, 1)
														
 
															+        out = [
														
 
															+            s * F.interpolate(
														
 
															+                a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out)
														
 
															+        ]
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class SpatialWeightingModule(nn.Layer):
														
 
															+    def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
														
 
															+        super(SpatialWeightingModule, self).__init__()
														
 
															+        self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
														
 
															+        self.conv1 = ConvNormLayer(
														
 
															+            ch_in=in_channel,
														
 
															+            ch_out=in_channel // ratio,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            act='relu',
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+        self.conv2 = ConvNormLayer(
														
 
															+            ch_in=in_channel // ratio,
														
 
															+            ch_out=in_channel,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            act='sigmoid',
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        out = self.global_avgpooling(x)
														
 
															+        out = self.conv1(out)
														
 
															+        out = self.conv2(out)
														
 
															+        return x * out
														
 
															+
														
 
															+
														
 
															+class ConditionalChannelWeightingBlock(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 stride,
														
 
															+                 reduce_ratio,
														
 
															+                 norm_type='bn',
														
 
															+                 freeze_norm=False,
														
 
															+                 norm_decay=0.):
														
 
															+        super(ConditionalChannelWeightingBlock, self).__init__()
														
 
															+        assert stride in [1, 2]
														
 
															+        branch_channels = [channel // 2 for channel in in_channels]
														
 
															+
														
 
															+        self.cross_resolution_weighting = CrossResolutionWeightingModule(
														
 
															+            branch_channels,
														
 
															+            ratio=reduce_ratio,
														
 
															+            norm_type=norm_type,
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+        self.depthwise_convs = nn.LayerList([
														
 
															+            ConvNormLayer(
														
 
															+                channel,
														
 
															+                channel,
														
 
															+                filter_size=3,
														
 
															+                stride=stride,
														
 
															+                groups=channel,
														
 
															+                norm_type=norm_type,
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay) for channel in branch_channels
														
 
															+        ])
														
 
															+
														
 
															+        self.spatial_weighting = nn.LayerList([
														
 
															+            SpatialWeightingModule(
														
 
															+                channel,
														
 
															+                ratio=4,
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay) for channel in branch_channels
														
 
															+        ])
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = [s.chunk(2, axis=1) for s in x]
														
 
															+        x1 = [s[0] for s in x]
														
 
															+        x2 = [s[1] for s in x]
														
 
															+
														
 
															+        x2 = self.cross_resolution_weighting(x2)
														
 
															+        x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
														
 
															+        x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
														
 
															+
														
 
															+        out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
														
 
															+        out = [channel_shuffle(s, groups=2) for s in out]
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class ShuffleUnit(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channel,
														
 
															+                 out_channel,
														
 
															+                 stride,
														
 
															+                 norm_type='bn',
														
 
															+                 freeze_norm=False,
														
 
															+                 norm_decay=0.):
														
 
															+        super(ShuffleUnit, self).__init__()
														
 
															+        branch_channel = out_channel // 2
														
 
															+        self.stride = stride
														
 
															+        if self.stride == 1:
														
 
															+            assert in_channel == branch_channel * 2, \
														
 
															+                "when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
														
 
															+        if stride > 1:
														
 
															+            self.branch1 = nn.Sequential(
														
 
															+                ConvNormLayer(
														
 
															+                    ch_in=in_channel,
														
 
															+                    ch_out=in_channel,
														
 
															+                    filter_size=3,
														
 
															+                    stride=self.stride,
														
 
															+                    groups=in_channel,
														
 
															+                    norm_type=norm_type,
														
 
															+                    freeze_norm=freeze_norm,
														
 
															+                    norm_decay=norm_decay),
														
 
															+                ConvNormLayer(
														
 
															+                    ch_in=in_channel,
														
 
															+                    ch_out=branch_channel,
														
 
															+                    filter_size=1,
														
 
															+                    stride=1,
														
 
															+                    norm_type=norm_type,
														
 
															+                    act='relu',
														
 
															+                    freeze_norm=freeze_norm,
														
 
															+                    norm_decay=norm_decay), )
														
 
															+        self.branch2 = nn.Sequential(
														
 
															+            ConvNormLayer(
														
 
															+                ch_in=branch_channel if stride == 1 else in_channel,
														
 
															+                ch_out=branch_channel,
														
 
															+                filter_size=1,
														
 
															+                stride=1,
														
 
															+                norm_type=norm_type,
														
 
															+                act='relu',
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay),
														
 
															+            ConvNormLayer(
														
 
															+                ch_in=branch_channel,
														
 
															+                ch_out=branch_channel,
														
 
															+                filter_size=3,
														
 
															+                stride=self.stride,
														
 
															+                groups=branch_channel,
														
 
															+                norm_type=norm_type,
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay),
														
 
															+            ConvNormLayer(
														
 
															+                ch_in=branch_channel,
														
 
															+                ch_out=branch_channel,
														
 
															+                filter_size=1,
														
 
															+                stride=1,
														
 
															+                norm_type=norm_type,
														
 
															+                act='relu',
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay), )
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        if self.stride > 1:
														
 
															+            x1 = self.branch1(x)
														
 
															+            x2 = self.branch2(x)
														
 
															+        else:
														
 
															+            x1, x2 = x.chunk(2, axis=1)
														
 
															+            x2 = self.branch2(x2)
														
 
															+        out = paddle.concat([x1, x2], axis=1)
														
 
															+        out = channel_shuffle(out, groups=2)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class IterativeHead(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 norm_type='bn',
														
 
															+                 freeze_norm=False,
														
 
															+                 norm_decay=0.):
														
 
															+        super(IterativeHead, self).__init__()
														
 
															+        num_branches = len(in_channels)
														
 
															+        self.in_channels = in_channels[::-1]
														
 
															+
														
 
															+        projects = []
														
 
															+        for i in range(num_branches):
														
 
															+            if i != num_branches - 1:
														
 
															+                projects.append(
														
 
															+                    DepthWiseSeparableConvNormLayer(
														
 
															+                        ch_in=self.in_channels[i],
														
 
															+                        ch_out=self.in_channels[i + 1],
														
 
															+                        filter_size=3,
														
 
															+                        stride=1,
														
 
															+                        dw_act=None,
														
 
															+                        pw_act='relu',
														
 
															+                        dw_norm_type=norm_type,
														
 
															+                        pw_norm_type=norm_type,
														
 
															+                        freeze_norm=freeze_norm,
														
 
															+                        norm_decay=norm_decay))
														
 
															+            else:
														
 
															+                projects.append(
														
 
															+                    DepthWiseSeparableConvNormLayer(
														
 
															+                        ch_in=self.in_channels[i],
														
 
															+                        ch_out=self.in_channels[i],
														
 
															+                        filter_size=3,
														
 
															+                        stride=1,
														
 
															+                        dw_act=None,
														
 
															+                        pw_act='relu',
														
 
															+                        dw_norm_type=norm_type,
														
 
															+                        pw_norm_type=norm_type,
														
 
															+                        freeze_norm=freeze_norm,
														
 
															+                        norm_decay=norm_decay))
														
 
															+        self.projects = nn.LayerList(projects)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = x[::-1]
														
 
															+        y = []
														
 
															+        last_x = None
														
 
															+        for i, s in enumerate(x):
														
 
															+            if last_x is not None:
														
 
															+                last_x = F.interpolate(
														
 
															+                    last_x,
														
 
															+                    size=paddle.shape(s)[-2:],
														
 
															+                    mode='bilinear',
														
 
															+                    align_corners=True)
														
 
															+                s = s + last_x
														
 
															+            s = self.projects[i](s)
														
 
															+            y.append(s)
														
 
															+            last_x = s
														
 
															+
														
 
															+        return y[::-1]
														
 
															+
														
 
															+
														
 
															+class Stem(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channel,
														
 
															+                 stem_channel,
														
 
															+                 out_channel,
														
 
															+                 expand_ratio,
														
 
															+                 norm_type='bn',
														
 
															+                 freeze_norm=False,
														
 
															+                 norm_decay=0.):
														
 
															+        super(Stem, self).__init__()
														
 
															+        self.conv1 = ConvNormLayer(
														
 
															+            in_channel,
														
 
															+            stem_channel,
														
 
															+            filter_size=3,
														
 
															+            stride=2,
														
 
															+            norm_type=norm_type,
														
 
															+            act='relu',
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+        mid_channel = int(round(stem_channel * expand_ratio))
														
 
															+        branch_channel = stem_channel // 2
														
 
															+        if stem_channel == out_channel:
														
 
															+            inc_channel = out_channel - branch_channel
														
 
															+        else:
														
 
															+            inc_channel = out_channel - stem_channel
														
 
															+        self.branch1 = nn.Sequential(
														
 
															+            ConvNormLayer(
														
 
															+                ch_in=branch_channel,
														
 
															+                ch_out=branch_channel,
														
 
															+                filter_size=3,
														
 
															+                stride=2,
														
 
															+                groups=branch_channel,
														
 
															+                norm_type=norm_type,
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay),
														
 
															+            ConvNormLayer(
														
 
															+                ch_in=branch_channel,
														
 
															+                ch_out=inc_channel,
														
 
															+                filter_size=1,
														
 
															+                stride=1,
														
 
															+                norm_type=norm_type,
														
 
															+                act='relu',
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay), )
														
 
															+        self.expand_conv = ConvNormLayer(
														
 
															+            ch_in=branch_channel,
														
 
															+            ch_out=mid_channel,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            norm_type=norm_type,
														
 
															+            act='relu',
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+        self.depthwise_conv = ConvNormLayer(
														
 
															+            ch_in=mid_channel,
														
 
															+            ch_out=mid_channel,
														
 
															+            filter_size=3,
														
 
															+            stride=2,
														
 
															+            groups=mid_channel,
														
 
															+            norm_type=norm_type,
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+        self.linear_conv = ConvNormLayer(
														
 
															+            ch_in=mid_channel,
														
 
															+            ch_out=branch_channel
														
 
															+            if stem_channel == out_channel else stem_channel,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            norm_type=norm_type,
														
 
															+            act='relu',
														
 
															+            freeze_norm=freeze_norm,
														
 
															+            norm_decay=norm_decay)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.conv1(x)
														
 
															+        x1, x2 = x.chunk(2, axis=1)
														
 
															+        x1 = self.branch1(x1)
														
 
															+        x2 = self.expand_conv(x2)
														
 
															+        x2 = self.depthwise_conv(x2)
														
 
															+        x2 = self.linear_conv(x2)
														
 
															+        out = paddle.concat([x1, x2], axis=1)
														
 
															+        out = channel_shuffle(out, groups=2)
														
 
															+
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class LiteHRNetModule(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 num_branches,
														
 
															+                 num_blocks,
														
 
															+                 in_channels,
														
 
															+                 reduce_ratio,
														
 
															+                 module_type,
														
 
															+                 multiscale_output=False,
														
 
															+                 with_fuse=True,
														
 
															+                 norm_type='bn',
														
 
															+                 freeze_norm=False,
														
 
															+                 norm_decay=0.):
														
 
															+        super(LiteHRNetModule, self).__init__()
														
 
															+        assert num_branches == len(in_channels),\
														
 
															+            "num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
														
 
															+        assert module_type in [
														
 
															+            'LITE', 'NAIVE'
														
 
															+        ], "module_type should be one of ['LITE', 'NAIVE']"
														
 
															+        self.num_branches = num_branches
														
 
															+        self.in_channels = in_channels
														
 
															+        self.multiscale_output = multiscale_output
														
 
															+        self.with_fuse = with_fuse
														
 
															+        self.norm_type = 'bn'
														
 
															+        self.module_type = module_type
														
 
															+
														
 
															+        if self.module_type == 'LITE':
														
 
															+            self.layers = self._make_weighting_blocks(
														
 
															+                num_blocks,
														
 
															+                reduce_ratio,
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay)
														
 
															+        elif self.module_type == 'NAIVE':
														
 
															+            self.layers = self._make_naive_branches(
														
 
															+                num_branches,
														
 
															+                num_blocks,
														
 
															+                freeze_norm=freeze_norm,
														
 
															+                norm_decay=norm_decay)
														
 
															+
														
 
															+        if self.with_fuse:
														
 
															+            self.fuse_layers = self._make_fuse_layers(
														
 
															+                freeze_norm=freeze_norm, norm_decay=norm_decay)
														
 
															+            self.relu = nn.ReLU()
														
 
															+
														
 
															+    def _make_weighting_blocks(self,
														
 
															+                               num_blocks,
														
 
															+                               reduce_ratio,
														
 
															+                               stride=1,
														
 
															+                               freeze_norm=False,
														
 
															+                               norm_decay=0.):
														
 
															+        layers = []
														
 
															+        for i in range(num_blocks):
														
 
															+            layers.append(
														
 
															+                ConditionalChannelWeightingBlock(
														
 
															+                    self.in_channels,
														
 
															+                    stride=stride,
														
 
															+                    reduce_ratio=reduce_ratio,
														
 
															+                    norm_type=self.norm_type,
														
 
															+                    freeze_norm=freeze_norm,
														
 
															+                    norm_decay=norm_decay))
														
 
															+        return nn.Sequential(*layers)
														
 
															+
														
 
															+    def _make_naive_branches(self,
														
 
															+                             num_branches,
														
 
															+                             num_blocks,
														
 
															+                             freeze_norm=False,
														
 
															+                             norm_decay=0.):
														
 
															+        branches = []
														
 
															+        for branch_idx in range(num_branches):
														
 
															+            layers = []
														
 
															+            for i in range(num_blocks):
														
 
															+                layers.append(
														
 
															+                    ShuffleUnit(
														
 
															+                        self.in_channels[branch_idx],
														
 
															+                        self.in_channels[branch_idx],
														
 
															+                        stride=1,
														
 
															+                        norm_type=self.norm_type,
														
 
															+                        freeze_norm=freeze_norm,
														
 
															+                        norm_decay=norm_decay))
														
 
															+            branches.append(nn.Sequential(*layers))
														
 
															+        return nn.LayerList(branches)
														
 
															+
														
 
															+    def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
														
 
															+        if self.num_branches == 1:
														
 
															+            return None
														
 
															+        fuse_layers = []
														
 
															+        num_out_branches = self.num_branches if self.multiscale_output else 1
														
 
															+        for i in range(num_out_branches):
														
 
															+            fuse_layer = []
														
 
															+            for j in range(self.num_branches):
														
 
															+                if j > i:
														
 
															+                    fuse_layer.append(
														
 
															+                        nn.Sequential(
														
 
															+                            Conv2d(
														
 
															+                                self.in_channels[j],
														
 
															+                                self.in_channels[i],
														
 
															+                                kernel_size=1,
														
 
															+                                stride=1,
														
 
															+                                padding=0,
														
 
															+                                bias=False, ),
														
 
															+                            nn.BatchNorm2D(self.in_channels[i]),
														
 
															+                            nn.Upsample(
														
 
															+                                scale_factor=2**(j - i), mode='nearest')))
														
 
															+                elif j == i:
														
 
															+                    fuse_layer.append(None)
														
 
															+                else:
														
 
															+                    conv_downsamples = []
														
 
															+                    for k in range(i - j):
														
 
															+                        if k == i - j - 1:
														
 
															+                            conv_downsamples.append(
														
 
															+                                nn.Sequential(
														
 
															+                                    Conv2d(
														
 
															+                                        self.in_channels[j],
														
 
															+                                        self.in_channels[j],
														
 
															+                                        kernel_size=3,
														
 
															+                                        stride=2,
														
 
															+                                        padding=1,
														
 
															+                                        groups=self.in_channels[j],
														
 
															+                                        bias=False, ),
														
 
															+                                    nn.BatchNorm2D(self.in_channels[j]),
														
 
															+                                    Conv2d(
														
 
															+                                        self.in_channels[j],
														
 
															+                                        self.in_channels[i],
														
 
															+                                        kernel_size=1,
														
 
															+                                        stride=1,
														
 
															+                                        padding=0,
														
 
															+                                        bias=False, ),
														
 
															+                                    nn.BatchNorm2D(self.in_channels[i])))
														
 
															+                        else:
														
 
															+                            conv_downsamples.append(
														
 
															+                                nn.Sequential(
														
 
															+                                    Conv2d(
														
 
															+                                        self.in_channels[j],
														
 
															+                                        self.in_channels[j],
														
 
															+                                        kernel_size=3,
														
 
															+                                        stride=2,
														
 
															+                                        padding=1,
														
 
															+                                        groups=self.in_channels[j],
														
 
															+                                        bias=False, ),
														
 
															+                                    nn.BatchNorm2D(self.in_channels[j]),
														
 
															+                                    Conv2d(
														
 
															+                                        self.in_channels[j],
														
 
															+                                        self.in_channels[j],
														
 
															+                                        kernel_size=1,
														
 
															+                                        stride=1,
														
 
															+                                        padding=0,
														
 
															+                                        bias=False, ),
														
 
															+                                    nn.BatchNorm2D(self.in_channels[j]),
														
 
															+                                    nn.ReLU()))
														
 
															+
														
 
															+                    fuse_layer.append(nn.Sequential(*conv_downsamples))
														
 
															+            fuse_layers.append(nn.LayerList(fuse_layer))
														
 
															+
														
 
															+        return nn.LayerList(fuse_layers)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        if self.num_branches == 1:
														
 
															+            return [self.layers[0](x[0])]
														
 
															+        if self.module_type == 'LITE':
														
 
															+            out = self.layers(x)
														
 
															+        elif self.module_type == 'NAIVE':
														
 
															+            for i in range(self.num_branches):
														
 
															+                x[i] = self.layers[i](x[i])
														
 
															+            out = x
														
 
															+        if self.with_fuse:
														
 
															+            out_fuse = []
														
 
															+            for i in range(len(self.fuse_layers)):
														
 
															+                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
														
 
															+                for j in range(self.num_branches):
														
 
															+                    if j == 0:
														
 
															+                        y += y
														
 
															+                    elif i == j:
														
 
															+                        y += out[j]
														
 
															+                    else:
														
 
															+                        y += self.fuse_layers[i][j](out[j])
														
 
															+                    if i == 0:
														
 
															+                        out[i] = y
														
 
															+                out_fuse.append(self.relu(y))
														
 
															+            out = out_fuse
														
 
															+        elif not self.multiscale_output:
														
 
															+            out = [out[0]]
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class LiteHRNet(nn.Layer):
														
 
															+    """
														
 
															+    @inproceedings{Yulitehrnet21,
														
 
															+    title={Lite-HRNet: A Lightweight High-Resolution Network},
														
 
															+        author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
														
 
															+        booktitle={CVPR},year={2021}
														
 
															+    }
														
 
															+
														
 
															+    Args:
														
 
															+        network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
														
 
															+            "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
														
 
															+            "wider_naive": Naive network with wider channels in each block.
														
 
															+            "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
														
 
															+            "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															+        freeze_at (int): the stage to freeze
														
 
															+        freeze_norm (bool): whether to freeze norm in HRNet
														
 
															+        norm_decay (float): weight decay for normalization layer weights
														
 
															+        return_idx (List): the stage to return
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 network_type,
														
 
															+                 in_channels=3,
														
 
															+                 freeze_at=0,
														
 
															+                 freeze_norm=True,
														
 
															+                 norm_decay=0.,
														
 
															+                 return_idx=[0, 1, 2, 3],
														
 
															+                 use_head=False,
														
 
															+                 pretrained=None):
														
 
															+        super(LiteHRNet, self).__init__()
														
 
															+        if isinstance(return_idx, Integral):
														
 
															+            return_idx = [return_idx]
														
 
															+        assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
														
 
															+            "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
														
 
															+        assert len(return_idx) > 0, "need one or more return index"
														
 
															+        self.freeze_at = freeze_at
														
 
															+        self.freeze_norm = freeze_norm
														
 
															+        self.norm_decay = norm_decay
														
 
															+        self.return_idx = return_idx
														
 
															+        self.norm_type = 'bn'
														
 
															+        self.use_head = use_head
														
 
															+        self.pretrained = pretrained
														
 
															+
														
 
															+        self.module_configs = {
														
 
															+            "lite_18": {
														
 
															+                "num_modules": [2, 4, 2],
														
 
															+                "num_branches": [2, 3, 4],
														
 
															+                "num_blocks": [2, 2, 2],
														
 
															+                "module_type": ["LITE", "LITE", "LITE"],
														
 
															+                "reduce_ratios": [8, 8, 8],
														
 
															+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
														
 
															+            },
														
 
															+            "lite_30": {
														
 
															+                "num_modules": [3, 8, 3],
														
 
															+                "num_branches": [2, 3, 4],
														
 
															+                "num_blocks": [2, 2, 2],
														
 
															+                "module_type": ["LITE", "LITE", "LITE"],
														
 
															+                "reduce_ratios": [8, 8, 8],
														
 
															+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
														
 
															+            },
														
 
															+            "naive": {
														
 
															+                "num_modules": [2, 4, 2],
														
 
															+                "num_branches": [2, 3, 4],
														
 
															+                "num_blocks": [2, 2, 2],
														
 
															+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
														
 
															+                "reduce_ratios": [1, 1, 1],
														
 
															+                "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
														
 
															+            },
														
 
															+            "wider_naive": {
														
 
															+                "num_modules": [2, 4, 2],
														
 
															+                "num_branches": [2, 3, 4],
														
 
															+                "num_blocks": [2, 2, 2],
														
 
															+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
														
 
															+                "reduce_ratios": [1, 1, 1],
														
 
															+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
														
 
															+            },
														
 
															+        }
														
 
															+
														
 
															+        self.stages_config = self.module_configs[network_type]
														
 
															+
														
 
															+        self.stem = Stem(in_channels, 32, 32, 1)
														
 
															+        num_channels_pre_layer = [32]
														
 
															+        for stage_idx in range(3):
														
 
															+            num_channels = self.stages_config["num_channels"][stage_idx]
														
 
															+            setattr(self, 'transition{}'.format(stage_idx),
														
 
															+                    self._make_transition_layer(num_channels_pre_layer,
														
 
															+                                                num_channels, self.freeze_norm,
														
 
															+                                                self.norm_decay))
														
 
															+            stage, num_channels_pre_layer = self._make_stage(
														
 
															+                self.stages_config, stage_idx, num_channels, True,
														
 
															+                self.freeze_norm, self.norm_decay)
														
 
															+            setattr(self, 'stage{}'.format(stage_idx), stage)
														
 
															+
														
 
															+        num_channels = self.stages_config["num_channels"][-1]
														
 
															+        self.feat_channels = num_channels
														
 
															+
														
 
															+        if self.use_head:
														
 
															+            self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
														
 
															+                                            self.freeze_norm, self.norm_decay)
														
 
															+
														
 
															+            self.feat_channels = [num_channels[0]]
														
 
															+            for i in range(1, len(num_channels)):
														
 
															+                self.feat_channels.append(num_channels[i] // 2)
														
 
															+
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+    def _make_transition_layer(self,
														
 
															+                               num_channels_pre_layer,
														
 
															+                               num_channels_cur_layer,
														
 
															+                               freeze_norm=False,
														
 
															+                               norm_decay=0.):
														
 
															+        num_branches_pre = len(num_channels_pre_layer)
														
 
															+        num_branches_cur = len(num_channels_cur_layer)
														
 
															+        transition_layers = []
														
 
															+        for i in range(num_branches_cur):
														
 
															+            if i < num_branches_pre:
														
 
															+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
														
 
															+                    transition_layers.append(
														
 
															+                        nn.Sequential(
														
 
															+                            Conv2d(
														
 
															+                                num_channels_pre_layer[i],
														
 
															+                                num_channels_pre_layer[i],
														
 
															+                                kernel_size=3,
														
 
															+                                stride=1,
														
 
															+                                padding=1,
														
 
															+                                groups=num_channels_pre_layer[i],
														
 
															+                                bias=False),
														
 
															+                            nn.BatchNorm2D(num_channels_pre_layer[i]),
														
 
															+                            Conv2d(
														
 
															+                                num_channels_pre_layer[i],
														
 
															+                                num_channels_cur_layer[i],
														
 
															+                                kernel_size=1,
														
 
															+                                stride=1,
														
 
															+                                padding=0,
														
 
															+                                bias=False, ),
														
 
															+                            nn.BatchNorm2D(num_channels_cur_layer[i]),
														
 
															+                            nn.ReLU()))
														
 
															+                else:
														
 
															+                    transition_layers.append(None)
														
 
															+            else:
														
 
															+                conv_downsamples = []
														
 
															+                for j in range(i + 1 - num_branches_pre):
														
 
															+                    conv_downsamples.append(
														
 
															+                        nn.Sequential(
														
 
															+                            Conv2d(
														
 
															+                                num_channels_pre_layer[-1],
														
 
															+                                num_channels_pre_layer[-1],
														
 
															+                                groups=num_channels_pre_layer[-1],
														
 
															+                                kernel_size=3,
														
 
															+                                stride=2,
														
 
															+                                padding=1,
														
 
															+                                bias=False, ),
														
 
															+                            nn.BatchNorm2D(num_channels_pre_layer[-1]),
														
 
															+                            Conv2d(
														
 
															+                                num_channels_pre_layer[-1],
														
 
															+                                num_channels_cur_layer[i]
														
 
															+                                if j == i - num_branches_pre else
														
 
															+                                num_channels_pre_layer[-1],
														
 
															+                                kernel_size=1,
														
 
															+                                stride=1,
														
 
															+                                padding=0,
														
 
															+                                bias=False, ),
														
 
															+                            nn.BatchNorm2D(num_channels_cur_layer[i]
														
 
															+                                           if j == i - num_branches_pre else
														
 
															+                                           num_channels_pre_layer[-1]),
														
 
															+                            nn.ReLU()))
														
 
															+                transition_layers.append(nn.Sequential(*conv_downsamples))
														
 
															+        return nn.LayerList(transition_layers)
														
 
															+
														
 
															+    def _make_stage(self,
														
 
															+                    stages_config,
														
 
															+                    stage_idx,
														
 
															+                    in_channels,
														
 
															+                    multiscale_output,
														
 
															+                    freeze_norm=False,
														
 
															+                    norm_decay=0.):
														
 
															+        num_modules = stages_config["num_modules"][stage_idx]
														
 
															+        num_branches = stages_config["num_branches"][stage_idx]
														
 
															+        num_blocks = stages_config["num_blocks"][stage_idx]
														
 
															+        reduce_ratio = stages_config['reduce_ratios'][stage_idx]
														
 
															+        module_type = stages_config['module_type'][stage_idx]
														
 
															+
														
 
															+        modules = []
														
 
															+        for i in range(num_modules):
														
 
															+            if not multiscale_output and i == num_modules - 1:
														
 
															+                reset_multiscale_output = False
														
 
															+            else:
														
 
															+                reset_multiscale_output = True
														
 
															+            modules.append(
														
 
															+                LiteHRNetModule(
														
 
															+                    num_branches,
														
 
															+                    num_blocks,
														
 
															+                    in_channels,
														
 
															+                    reduce_ratio,
														
 
															+                    module_type,
														
 
															+                    multiscale_output=reset_multiscale_output,
														
 
															+                    with_fuse=True,
														
 
															+                    freeze_norm=freeze_norm,
														
 
															+                    norm_decay=norm_decay))
														
 
															+            in_channels = modules[-1].in_channels
														
 
															+        return nn.Sequential(*modules), in_channels
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.stem(x)
														
 
															+
														
 
															+        y_list = [x]
														
 
															+        for stage_idx in range(3):
														
 
															+            x_list = []
														
 
															+            transition = getattr(self, 'transition{}'.format(stage_idx))
														
 
															+            for j in range(self.stages_config["num_branches"][stage_idx]):
														
 
															+                if transition[j] is not None:
														
 
															+                    if j >= len(y_list):
														
 
															+                        x_list.append(transition[j](y_list[-1]))
														
 
															+                    else:
														
 
															+                        x_list.append(transition[j](y_list[j]))
														
 
															+                else:
														
 
															+                    x_list.append(y_list[j])
														
 
															+            y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
														
 
															+
														
 
															+        if self.use_head:
														
 
															+            y_list = self.head_layer(y_list)
														
 
															+
														
 
															+        res = []
														
 
															+        for i, layer in enumerate(y_list):
														
 
															+            if i == self.freeze_at:
														
 
															+                layer.stop_gradient = True
														
 
															+            if i in self.return_idx:
														
 
															+                res.append(layer)
														
 
															+        return res
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def Lite_HRNet_18(**kwargs):
														
 
															+    model = LiteHRNet(network_type="lite_18", **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def Lite_HRNet_30(**kwargs):
														
 
															+    model = LiteHRNet(network_type="lite_30", **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def Lite_HRNet_naive(**kwargs):
														
 
															+    model = LiteHRNet(network_type="naive", **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def Lite_HRNet_wider_naive(**kwargs):
														
 
															+    model = LiteHRNet(network_type="wider_naive", **kwargs)
														
 
															+    return model
														
--- a/paddlers/models/ppseg/models/backbones/mix_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/mix_transformer.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -260,7 +260,7 @@ class MixVisionTransformer(nn.Layer):
 
															     def __init__(self,
														
 
															                  img_size=224,
														
 
															                  patch_size=16,
														
 
															-                 in_chans=3,
														
 
															+                 in_channels=3,
														
 
															                  num_classes=1000,
														
 
															                  embed_dims=[64, 128, 256, 512],
														
 
															                  num_heads=[1, 2, 4, 8],
														
@@ -284,7 +284,7 @@ class MixVisionTransformer(nn.Layer):
 
															             img_size=img_size,
														
 
															             patch_size=7,
														
 
															             stride=4,
														
 
															-            in_chans=in_chans,
														
 
															+            in_chans=in_channels,
														
 
															             embed_dim=embed_dims[0])
														
 
															         self.patch_embed2 = OverlapPatchEmbed(
														
 
															             img_size=img_size // 4,
														
--- a/paddlers/models/ppseg/models/backbones/mobilenetv2.py
+++ b/paddlers/models/ppseg/models/backbones/mobilenetv2.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -12,13 +12,26 @@
 
															 # See the License for the specific language governing permissions and
														
 
															 # limitations under the License.
														
 
															+import paddle
														
 
															+from paddle import ParamAttr
														
 
															 import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
														
 
															+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
														
 
															 from paddlers.models.ppseg.cvlibs import manager
														
 
															 from paddlers.models.ppseg import utils
														
 
															+__all__ = [
														
 
															+    "MobileNetV2_x0_25",
														
 
															+    "MobileNetV2_x0_5",
														
 
															+    "MobileNetV2_x0_75",
														
 
															+    "MobileNetV2_x1_0",
														
 
															+    "MobileNetV2_x1_5",
														
 
															+    "MobileNetV2_x2_0",
														
 
															+]
														
 
															+
														
 
															-@manager.BACKBONES.add_component
														
 
															 class MobileNetV2(nn.Layer):
														
 
															     """
														
 
															         The MobileNetV2 implementation based on PaddlePaddle.
														
@@ -29,69 +42,70 @@ class MobileNetV2(nn.Layer):
 
															         (https://arxiv.org/abs/1801.04381).
														
 
															         Args:
														
 
															-            channel_ratio (float, optional): The ratio of channel. Default: 1.0
														
 
															-            min_channel (int, optional): The minimum of channel. Default: 16
														
 
															+            scale (float, optional): The scale of channel. Default: 1.0
														
 
															+            in_channels (int, optional): The channels of input image. Default: 3.
														
 
															             pretrained (str, optional): The path or url of pretrained model. Default: None
														
 
															         """
														
 
															-    def __init__(self, channel_ratio=1.0, min_channel=16, pretrained=None):
														
 
															-        super(MobileNetV2, self).__init__()
														
 
															-        self.channel_ratio = channel_ratio
														
 
															-        self.min_channel = min_channel
														
 
															+    def __init__(self, scale=1.0, in_channels=3, pretrained=None):
														
 
															+        super().__init__()
														
 
															+        self.scale = scale
														
 
															         self.pretrained = pretrained
														
 
															+        prefix_name = ""
														
 
															-        self.stage0 = conv_bn(3, self.depth(32), 3, 2)
														
 
															-
														
 
															-        self.stage1 = InvertedResidual(self.depth(32), self.depth(16), 1, 1)
														
 
															-
														
 
															-        self.stage2 = nn.Sequential(
														
 
															-            InvertedResidual(self.depth(16), self.depth(24), 2, 6),
														
 
															-            InvertedResidual(self.depth(24), self.depth(24), 1, 6), )
														
 
															-
														
 
															-        self.stage3 = nn.Sequential(
														
 
															-            InvertedResidual(self.depth(24), self.depth(32), 2, 6),
														
 
															-            InvertedResidual(self.depth(32), self.depth(32), 1, 6),
														
 
															-            InvertedResidual(self.depth(32), self.depth(32), 1, 6), )
														
 
															+        bottleneck_params_list = [
														
 
															+            (1, 16, 1, 1),
														
 
															+            (6, 24, 2, 2),  # x4
														
 
															+            (6, 32, 3, 2),  # x8
														
 
															+            (6, 64, 4, 2),
														
 
															+            (6, 96, 3, 1),  # x16
														
 
															+            (6, 160, 3, 2),
														
 
															+            (6, 320, 1, 1),  # x32
														
 
															+        ]
														
 
															+        self.out_index = [1, 2, 4, 6]
														
 
															-        self.stage4 = nn.Sequential(
														
 
															-            InvertedResidual(self.depth(32), self.depth(64), 2, 6),
														
 
															-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
														
 
															-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
														
 
															-            InvertedResidual(self.depth(64), self.depth(64), 1, 6), )
														
 
															+        self.conv1 = ConvBNLayer(
														
 
															+            num_channels=in_channels,
														
 
															+            num_filters=int(32 * scale),
														
 
															+            filter_size=3,
														
 
															+            stride=2,
														
 
															+            padding=1,
														
 
															+            name=prefix_name + "conv1_1")
														
 
															-        self.stage5 = nn.Sequential(
														
 
															-            InvertedResidual(self.depth(64), self.depth(96), 1, 6),
														
 
															-            InvertedResidual(self.depth(96), self.depth(96), 1, 6),
														
 
															-            InvertedResidual(self.depth(96), self.depth(96), 1, 6), )
														
 
															+        self.block_list = []
														
 
															+        i = 1
														
 
															+        in_c = int(32 * scale)
														
 
															+        for layer_setting in bottleneck_params_list:
														
 
															+            t, c, n, s = layer_setting
														
 
															+            i += 1
														
 
															+            block = self.add_sublayer(
														
 
															+                prefix_name + "conv" + str(i),
														
 
															+                sublayer=InvresiBlocks(
														
 
															+                    in_c=in_c,
														
 
															+                    t=t,
														
 
															+                    c=int(c * scale),
														
 
															+                    n=n,
														
 
															+                    s=s,
														
 
															+                    name=prefix_name + "conv" + str(i)))
														
 
															+            self.block_list.append(block)
														
 
															+            in_c = int(c * scale)
														
 
															-        self.stage6 = nn.Sequential(
														
 
															-            InvertedResidual(self.depth(96), self.depth(160), 2, 6),
														
 
															-            InvertedResidual(self.depth(160), self.depth(160), 1, 6),
														
 
															-            InvertedResidual(self.depth(160), self.depth(160), 1, 6), )
														
 
															-
														
 
															-        self.stage7 = InvertedResidual(self.depth(160), self.depth(320), 1, 6)
														
 
															+        out_channels = [
														
 
															+            bottleneck_params_list[idx][1] for idx in self.out_index
														
 
															+        ]
														
 
															+        self.feat_channels = [int(c * scale) for c in out_channels]
														
 
															         self.init_weight()
														
 
															-    def depth(self, channels):
														
 
															-        min_channel = min(channels, self.min_channel)
														
 
															-        return max(min_channel, int(channels * self.channel_ratio))
														
 
															-
														
 
															-    def forward(self, x):
														
 
															+    def forward(self, inputs):
														
 
															         feat_list = []
														
 
															-        feature_1_2 = self.stage0(x)
														
 
															-        feature_1_2 = self.stage1(feature_1_2)
														
 
															-        feature_1_4 = self.stage2(feature_1_2)
														
 
															-        feature_1_8 = self.stage3(feature_1_4)
														
 
															-        feature_1_16 = self.stage4(feature_1_8)
														
 
															-        feature_1_16 = self.stage5(feature_1_16)
														
 
															-        feature_1_32 = self.stage6(feature_1_16)
														
 
															-        feature_1_32 = self.stage7(feature_1_32)
														
 
															-        feat_list.append(feature_1_4)
														
 
															-        feat_list.append(feature_1_8)
														
 
															-        feat_list.append(feature_1_16)
														
 
															-        feat_list.append(feature_1_32)
														
 
															+        y = self.conv1(inputs, if_act=True)
														
 
															+        for idx, block in enumerate(self.block_list):
														
 
															+            y = block(y)
														
 
															+            if idx in self.out_index:
														
 
															+                feat_list.append(y)
														
 
															+
														
 
															         return feat_list
														
 
															     def init_weight(self):
														
@@ -99,66 +113,153 @@ class MobileNetV2(nn.Layer):
 
															             utils.load_entire_model(self, self.pretrained)
														
 
															-def conv_bn(inp, oup, kernel, stride):
														
 
															-    return nn.Sequential(
														
 
															-        nn.Conv2D(
														
 
															-            in_channels=inp,
														
 
															-            out_channels=oup,
														
 
															-            kernel_size=kernel,
														
 
															+class ConvBNLayer(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 num_channels,
														
 
															+                 filter_size,
														
 
															+                 num_filters,
														
 
															+                 stride,
														
 
															+                 padding,
														
 
															+                 channels=None,
														
 
															+                 num_groups=1,
														
 
															+                 name=None,
														
 
															+                 use_cudnn=True):
														
 
															+        super(ConvBNLayer, self).__init__()
														
 
															+
														
 
															+        self._conv = Conv2D(
														
 
															+            in_channels=num_channels,
														
 
															+            out_channels=num_filters,
														
 
															+            kernel_size=filter_size,
														
 
															+            stride=stride,
														
 
															+            padding=padding,
														
 
															+            groups=num_groups,
														
 
															+            weight_attr=ParamAttr(name=name + "_weights"),
														
 
															+            bias_attr=False)
														
 
															+
														
 
															+        self._batch_norm = BatchNorm(
														
 
															+            num_filters,
														
 
															+            param_attr=ParamAttr(name=name + "_bn_scale"),
														
 
															+            bias_attr=ParamAttr(name=name + "_bn_offset"),
														
 
															+            moving_mean_name=name + "_bn_mean",
														
 
															+            moving_variance_name=name + "_bn_variance")
														
 
															+
														
 
															+    def forward(self, inputs, if_act=True):
														
 
															+        y = self._conv(inputs)
														
 
															+        y = self._batch_norm(y)
														
 
															+        if if_act:
														
 
															+            y = F.relu6(y)
														
 
															+        return y
														
 
															+
														
 
															+
														
 
															+class InvertedResidualUnit(nn.Layer):
														
 
															+    def __init__(self, num_channels, num_in_filter, num_filters, stride,
														
 
															+                 filter_size, padding, expansion_factor, name):
														
 
															+        super(InvertedResidualUnit, self).__init__()
														
 
															+        num_expfilter = int(round(num_in_filter * expansion_factor))
														
 
															+        self._expand_conv = ConvBNLayer(
														
 
															+            num_channels=num_channels,
														
 
															+            num_filters=num_expfilter,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            num_groups=1,
														
 
															+            name=name + "_expand")
														
 
															+
														
 
															+        self._bottleneck_conv = ConvBNLayer(
														
 
															+            num_channels=num_expfilter,
														
 
															+            num_filters=num_expfilter,
														
 
															+            filter_size=filter_size,
														
 
															             stride=stride,
														
 
															-            padding=(kernel - 1) // 2,
														
 
															-            bias_attr=False),
														
 
															-        nn.BatchNorm2D(
														
 
															-            num_features=oup, epsilon=1e-05, momentum=0.1),
														
 
															-        nn.ReLU())
														
 
															-
														
 
															-
														
 
															-class InvertedResidual(nn.Layer):
														
 
															-    def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
														
 
															-        super(InvertedResidual, self).__init__()
														
 
															-        self.stride = stride
														
 
															-        assert stride in [1, 2]
														
 
															-        self.use_res_connect = self.stride == 1 and inp == oup
														
 
															-
														
 
															-        self.conv = nn.Sequential(
														
 
															-            nn.Conv2D(
														
 
															-                inp,
														
 
															-                inp * expand_ratio,
														
 
															-                kernel_size=1,
														
 
															-                stride=1,
														
 
															-                padding=0,
														
 
															-                dilation=1,
														
 
															-                groups=1,
														
 
															-                bias_attr=False),
														
 
															-            nn.BatchNorm2D(
														
 
															-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
														
 
															-            nn.ReLU(),
														
 
															-            nn.Conv2D(
														
 
															-                inp * expand_ratio,
														
 
															-                inp * expand_ratio,
														
 
															-                kernel_size=3,
														
 
															-                stride=stride,
														
 
															-                padding=dilation,
														
 
															-                dilation=dilation,
														
 
															-                groups=inp * expand_ratio,
														
 
															-                bias_attr=False),
														
 
															-            nn.BatchNorm2D(
														
 
															-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
														
 
															-            nn.ReLU(),
														
 
															-            nn.Conv2D(
														
 
															-                inp * expand_ratio,
														
 
															-                oup,
														
 
															-                kernel_size=1,
														
 
															-                stride=1,
														
 
															-                padding=0,
														
 
															-                dilation=1,
														
 
															-                groups=1,
														
 
															-                bias_attr=False),
														
 
															-            nn.BatchNorm2D(
														
 
															-                num_features=oup, epsilon=1e-05, momentum=0.1), )
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        if self.use_res_connect:
														
 
															-            return x + self.conv(x)
														
 
															-        else:
														
 
															-            return self.conv(x)
														
 
															+            padding=padding,
														
 
															+            num_groups=num_expfilter,
														
 
															+            use_cudnn=False,
														
 
															+            name=name + "_dwise")
														
 
															+
														
 
															+        self._linear_conv = ConvBNLayer(
														
 
															+            num_channels=num_expfilter,
														
 
															+            num_filters=num_filters,
														
 
															+            filter_size=1,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            num_groups=1,
														
 
															+            name=name + "_linear")
														
 
															+
														
 
															+    def forward(self, inputs, ifshortcut):
														
 
															+        y = self._expand_conv(inputs, if_act=True)
														
 
															+        y = self._bottleneck_conv(y, if_act=True)
														
 
															+        y = self._linear_conv(y, if_act=False)
														
 
															+        if ifshortcut:
														
 
															+            y = paddle.add(inputs, y)
														
 
															+        return y
														
 
															+
														
 
															+
														
 
															+class InvresiBlocks(nn.Layer):
														
 
															+    def __init__(self, in_c, t, c, n, s, name):
														
 
															+        super(InvresiBlocks, self).__init__()
														
 
															+
														
 
															+        self._first_block = InvertedResidualUnit(
														
 
															+            num_channels=in_c,
														
 
															+            num_in_filter=in_c,
														
 
															+            num_filters=c,
														
 
															+            stride=s,
														
 
															+            filter_size=3,
														
 
															+            padding=1,
														
 
															+            expansion_factor=t,
														
 
															+            name=name + "_1")
														
 
															+
														
 
															+        self._block_list = []
														
 
															+        for i in range(1, n):
														
 
															+            block = self.add_sublayer(
														
 
															+                name + "_" + str(i + 1),
														
 
															+                sublayer=InvertedResidualUnit(
														
 
															+                    num_channels=c,
														
 
															+                    num_in_filter=c,
														
 
															+                    num_filters=c,
														
 
															+                    stride=1,
														
 
															+                    filter_size=3,
														
 
															+                    padding=1,
														
 
															+                    expansion_factor=t,
														
 
															+                    name=name + "_" + str(i + 1)))
														
 
															+            self._block_list.append(block)
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        y = self._first_block(inputs, ifshortcut=False)
														
 
															+        for block in self._block_list:
														
 
															+            y = block(y, ifshortcut=True)
														
 
															+        return y
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def MobileNetV2_x0_25(**kwargs):
														
 
															+    model = MobileNetV2(scale=0.25, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def MobileNetV2_x0_5(**kwargs):
														
 
															+    model = MobileNetV2(scale=0.5, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def MobileNetV2_x0_75(**kwargs):
														
 
															+    model = MobileNetV2(scale=0.75, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def MobileNetV2_x1_0(**kwargs):
														
 
															+    model = MobileNetV2(scale=1.0, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def MobileNetV2_x1_5(**kwargs):
														
 
															+    model = MobileNetV2(scale=1.5, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def MobileNetV2_x2_0(**kwargs):
														
 
															+    model = MobileNetV2(scale=2.0, **kwargs)
														
 
															+    return model
														
--- a/paddlers/models/ppseg/models/backbones/mobilenetv3.py
+++ b/paddlers/models/ppseg/models/backbones/mobilenetv3.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -14,10 +14,12 @@
 
															 import paddle
														
 
															 import paddle.nn as nn
														
 
															-import paddle.nn.functional as F
														
 
															+from paddle import ParamAttr
														
 
															+from paddle.regularizer import L2Decay
														
 
															+from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
														
 
															 from paddlers.models.ppseg.cvlibs import manager
														
 
															-from paddlers.models.ppseg.utils import utils
														
 
															+from paddlers.models.ppseg.utils import utils, logger
														
 
															 from paddlers.models.ppseg.models import layers
														
 
															 __all__ = [
														
@@ -28,8 +30,92 @@ __all__ = [
 
															     "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
														
 
															 ]
														
 
															-
														
 
															-def make_divisible(v, divisor=8, min_value=None):
														
 
															+MODEL_STAGES_PATTERN = {
														
 
															+    "MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
														
 
															+    "MobileNetV3_large":
														
 
															+    ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
														
 
															+}
														
 
															+
														
 
															+# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
														
 
															+# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
														
 
															+# k: kernel_size
														
 
															+# exp: middle channel number in depthwise block
														
 
															+# c: output channel number in depthwise block
														
 
															+# se: whether to use SE block
														
 
															+# act: which activation to use
														
 
															+# s: stride in depthwise block
														
 
															+# d: dilation rate in depthwise block
														
 
															+NET_CONFIG = {
														
 
															+    "large": [
														
 
															+        # k, exp, c, se, act, s
														
 
															+        [3, 16, 16, False, "relu", 1],
														
 
															+        [3, 64, 24, False, "relu", 2],
														
 
															+        [3, 72, 24, False, "relu", 1],  # x4
														
 
															+        [5, 72, 40, True, "relu", 2],
														
 
															+        [5, 120, 40, True, "relu", 1],
														
 
															+        [5, 120, 40, True, "relu", 1],  # x8
														
 
															+        [3, 240, 80, False, "hardswish", 2],
														
 
															+        [3, 200, 80, False, "hardswish", 1],
														
 
															+        [3, 184, 80, False, "hardswish", 1],
														
 
															+        [3, 184, 80, False, "hardswish", 1],
														
 
															+        [3, 480, 112, True, "hardswish", 1],
														
 
															+        [3, 672, 112, True, "hardswish", 1],  # x16
														
 
															+        [5, 672, 160, True, "hardswish", 2],
														
 
															+        [5, 960, 160, True, "hardswish", 1],
														
 
															+        [5, 960, 160, True, "hardswish", 1],  # x32
														
 
															+    ],
														
 
															+    "small": [
														
 
															+        # k, exp, c, se, act, s
														
 
															+        [3, 16, 16, True, "relu", 2],
														
 
															+        [3, 72, 24, False, "relu", 2],
														
 
															+        [3, 88, 24, False, "relu", 1],
														
 
															+        [5, 96, 40, True, "hardswish", 2],
														
 
															+        [5, 240, 40, True, "hardswish", 1],
														
 
															+        [5, 240, 40, True, "hardswish", 1],
														
 
															+        [5, 120, 48, True, "hardswish", 1],
														
 
															+        [5, 144, 48, True, "hardswish", 1],
														
 
															+        [5, 288, 96, True, "hardswish", 2],
														
 
															+        [5, 576, 96, True, "hardswish", 1],
														
 
															+        [5, 576, 96, True, "hardswish", 1],
														
 
															+    ],
														
 
															+    "large_os8": [
														
 
															+        # k, exp, c, se, act, s, {d}
														
 
															+        [3, 16, 16, False, "relu", 1],
														
 
															+        [3, 64, 24, False, "relu", 2],
														
 
															+        [3, 72, 24, False, "relu", 1],  # x4
														
 
															+        [5, 72, 40, True, "relu", 2],
														
 
															+        [5, 120, 40, True, "relu", 1],
														
 
															+        [5, 120, 40, True, "relu", 1],  # x8
														
 
															+        [3, 240, 80, False, "hardswish", 1],
														
 
															+        [3, 200, 80, False, "hardswish", 1, 2],
														
 
															+        [3, 184, 80, False, "hardswish", 1, 2],
														
 
															+        [3, 184, 80, False, "hardswish", 1, 2],
														
 
															+        [3, 480, 112, True, "hardswish", 1, 2],
														
 
															+        [3, 672, 112, True, "hardswish", 1, 2],
														
 
															+        [5, 672, 160, True, "hardswish", 1, 2],
														
 
															+        [5, 960, 160, True, "hardswish", 1, 4],
														
 
															+        [5, 960, 160, True, "hardswish", 1, 4],
														
 
															+    ],
														
 
															+    "small_os8": [
														
 
															+        # k, exp, c, se, act, s, {d}
														
 
															+        [3, 16, 16, True, "relu", 2],
														
 
															+        [3, 72, 24, False, "relu", 2],
														
 
															+        [3, 88, 24, False, "relu", 1],
														
 
															+        [5, 96, 40, True, "hardswish", 1],
														
 
															+        [5, 240, 40, True, "hardswish", 1, 2],
														
 
															+        [5, 240, 40, True, "hardswish", 1, 2],
														
 
															+        [5, 120, 48, True, "hardswish", 1, 2],
														
 
															+        [5, 144, 48, True, "hardswish", 1, 2],
														
 
															+        [5, 288, 96, True, "hardswish", 1, 2],
														
 
															+        [5, 576, 96, True, "hardswish", 1, 4],
														
 
															+        [5, 576, 96, True, "hardswish", 1, 4],
														
 
															+    ]
														
 
															+}
														
 
															+
														
 
															+OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]}
														
 
															+
														
 
															+
														
 
															+def _make_divisible(v, divisor=8, min_value=None):
														
 
															     if min_value is None:
														
 
															         min_value = divisor
														
 
															     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
														
@@ -38,156 +124,113 @@ def make_divisible(v, divisor=8, min_value=None):
 
															     return new_v
														
 
															-class MobileNetV3(nn.Layer):
														
 
															-    """
														
 
															-    The MobileNetV3 implementation based on PaddlePaddle.
														
 
															+def _create_act(act):
														
 
															+    if act == "hardswish":
														
 
															+        return nn.Hardswish()
														
 
															+    elif act == "relu":
														
 
															+        return nn.ReLU()
														
 
															+    elif act is None:
														
 
															+        return None
														
 
															+    else:
														
 
															+        raise RuntimeError(
														
 
															+            "The activation function is not supported: {}".format(act))
														
 
															-    The original article refers to Jingdong
														
 
															-    Andrew Howard, et, al. "Searching for MobileNetV3"
														
 
															-    (https://arxiv.org/pdf/1905.02244.pdf).
														
 
															+class MobileNetV3(nn.Layer):
														
 
															+    """
														
 
															+    MobileNetV3
														
 
															     Args:
														
 
															-        pretrained (str, optional): The path of pretrained model.
														
 
															-        scale (float, optional): The scale of channels . Default: 1.0.
														
 
															-        model_name (str, optional): Model name. It determines the type of MobileNetV3. The value is 'small' or 'large'. Defualt: 'small'.
														
 
															-        output_stride (int, optional): The stride of output features compared to input images. The value should be one of (2, 4, 8, 16, 32). Default: None.
														
 
															-
														
 
															+        config: list. MobileNetV3 depthwise blocks config.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															+        scale: float=1.0. The coefficient that controls the size of network parameters. 
														
 
															+    Returns:
														
 
															+        model: nn.Layer. Specific MobileNetV3 model depends on args.
														
 
															     """
														
 
															     def __init__(self,
														
 
															-                 pretrained=None,
														
 
															+                 config,
														
 
															+                 stages_pattern,
														
 
															+                 out_index,
														
 
															+                 in_channels=3,
														
 
															                  scale=1.0,
														
 
															-                 model_name="small",
														
 
															-                 output_stride=None):
														
 
															-        super(MobileNetV3, self).__init__()
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+        self.cfg = config
														
 
															+        self.out_index = out_index
														
 
															+        self.scale = scale
														
 
															+        self.pretrained = pretrained
														
 
															         inplanes = 16
														
 
															-        if model_name == "large":
														
 
															-            self.cfg = [
														
 
															-                # k, exp, c,  se,     nl,  s,
														
 
															-                [3, 16, 16, False, "relu", 1],
														
 
															-                [3, 64, 24, False, "relu", 2],
														
 
															-                [3, 72, 24, False, "relu", 1],  # output 1 -> out_index=2
														
 
															-                [5, 72, 40, True, "relu", 2],
														
 
															-                [5, 120, 40, True, "relu", 1],
														
 
															-                [5, 120, 40, True, "relu", 1],  # output 2 -> out_index=5
														
 
															-                [3, 240, 80, False, "hard_swish", 2],
														
 
															-                [3, 200, 80, False, "hard_swish", 1],
														
 
															-                [3, 184, 80, False, "hard_swish", 1],
														
 
															-                [3, 184, 80, False, "hard_swish", 1],
														
 
															-                [3, 480, 112, True, "hard_swish", 1],
														
 
															-                [3, 672, 112, True, "hard_swish",
														
 
															-                 1],  # output 3 -> out_index=11
														
 
															-                [5, 672, 160, True, "hard_swish", 2],
														
 
															-                [5, 960, 160, True, "hard_swish", 1],
														
 
															-                [5, 960, 160, True, "hard_swish",
														
 
															-                 1],  # output 3 -> out_index=14
														
 
															-            ]
														
 
															-            self.out_indices = [2, 5, 11, 14]
														
 
															-            self.feat_channels = [
														
 
															-                make_divisible(i * scale) for i in [24, 40, 112, 160]
														
 
															-            ]
														
 
															-
														
 
															-            self.cls_ch_squeeze = 960
														
 
															-            self.cls_ch_expand = 1280
														
 
															-        elif model_name == "small":
														
 
															-            self.cfg = [
														
 
															-                # k, exp, c,  se,     nl,  s,
														
 
															-                [3, 16, 16, True, "relu", 2],  # output 1 -> out_index=0
														
 
															-                [3, 72, 24, False, "relu", 2],
														
 
															-                [3, 88, 24, False, "relu", 1],  # output 2 -> out_index=3
														
 
															-                [5, 96, 40, True, "hard_swish", 2],
														
 
															-                [5, 240, 40, True, "hard_swish", 1],
														
 
															-                [5, 240, 40, True, "hard_swish", 1],
														
 
															-                [5, 120, 48, True, "hard_swish", 1],
														
 
															-                [5, 144, 48, True, "hard_swish", 1],  # output 3 -> out_index=7
														
 
															-                [5, 288, 96, True, "hard_swish", 2],
														
 
															-                [5, 576, 96, True, "hard_swish", 1],
														
 
															-                [5, 576, 96, True, "hard_swish", 1],  # output 4 -> out_index=10
														
 
															-            ]
														
 
															-            self.out_indices = [0, 3, 7, 10]
														
 
															-            self.feat_channels = [
														
 
															-                make_divisible(i * scale) for i in [16, 24, 48, 96]
														
 
															-            ]
														
 
															-
														
 
															-            self.cls_ch_squeeze = 576
														
 
															-            self.cls_ch_expand = 1280
														
 
															-        else:
														
 
															-            raise NotImplementedError(
														
 
															-                "mode[{}_model] is not implemented!".format(model_name))
														
 
															-
														
 
															-        ###################################################
														
 
															-        # modify stride and dilation based on output_stride
														
 
															-        self.dilation_cfg = [1] * len(self.cfg)
														
 
															-        self.modify_bottle_params(output_stride=output_stride)
														
 
															-        ###################################################
														
 
															-
														
 
															-        self.conv1 = ConvBNLayer(
														
 
															-            in_c=3,
														
 
															-            out_c=make_divisible(inplanes * scale),
														
 
															+
														
 
															+        self.conv = ConvBNLayer(
														
 
															+            in_c=in_channels,
														
 
															+            out_c=_make_divisible(inplanes * self.scale),
														
 
															             filter_size=3,
														
 
															             stride=2,
														
 
															             padding=1,
														
 
															             num_groups=1,
														
 
															             if_act=True,
														
 
															-            act="hard_swish")
														
 
															-
														
 
															-        self.block_list = []
														
 
															-
														
 
															-        inplanes = make_divisible(inplanes * scale)
														
 
															-        for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
														
 
															-            ######################################
														
 
															-            # add dilation rate
														
 
															-            dilation_rate = self.dilation_cfg[i]
														
 
															-            ######################################
														
 
															-            self.block_list.append(
														
 
															-                ResidualUnit(
														
 
															-                    in_c=inplanes,
														
 
															-                    mid_c=make_divisible(scale * exp),
														
 
															-                    out_c=make_divisible(scale * c),
														
 
															-                    filter_size=k,
														
 
															-                    stride=s,
														
 
															-                    dilation=dilation_rate,
														
 
															-                    use_se=se,
														
 
															-                    act=nl,
														
 
															-                    name="conv" + str(i + 2)))
														
 
															-            self.add_sublayer(
														
 
															-                sublayer=self.block_list[-1], name="conv" + str(i + 2))
														
 
															-            inplanes = make_divisible(scale * c)
														
 
															-
														
 
															-        self.pretrained = pretrained
														
 
															+            act="hardswish")
														
 
															+        self.blocks = nn.Sequential(*[
														
 
															+            ResidualUnit(
														
 
															+                in_c=_make_divisible(inplanes * self.scale if i == 0 else
														
 
															+                                     self.cfg[i - 1][2] * self.scale),
														
 
															+                mid_c=_make_divisible(self.scale * exp),
														
 
															+                out_c=_make_divisible(self.scale * c),
														
 
															+                filter_size=k,
														
 
															+                stride=s,
														
 
															+                use_se=se,
														
 
															+                act=act,
														
 
															+                dilation=td[0] if td else 1)
														
 
															+            for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg)
														
 
															+        ])
														
 
															+
														
 
															+        out_channels = [config[idx][2] for idx in self.out_index]
														
 
															+        self.feat_channels = [
														
 
															+            _make_divisible(self.scale * c) for c in out_channels
														
 
															+        ]
														
 
															+
														
 
															+        self.init_res(stages_pattern)
														
 
															         self.init_weight()
														
 
															-    def modify_bottle_params(self, output_stride=None):
														
 
															-
														
 
															-        if output_stride is not None and output_stride % 2 != 0:
														
 
															-            raise ValueError("output stride must to be even number")
														
 
															-        if output_stride is not None:
														
 
															-            stride = 2
														
 
															-            rate = 1
														
 
															-            for i, _cfg in enumerate(self.cfg):
														
 
															-                stride = stride * _cfg[-1]
														
 
															-                if stride > output_stride:
														
 
															-                    rate = rate * _cfg[-1]
														
 
															-                    self.cfg[i][-1] = 1
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+    def init_res(self, stages_pattern, return_patterns=None,
														
 
															+                 return_stages=None):
														
 
															+        if return_patterns and return_stages:
														
 
															+            msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
														
 
															+            logger.warning(msg)
														
 
															+            return_stages = None
														
 
															+
														
 
															+        if return_stages is True:
														
 
															+            return_patterns = stages_pattern
														
 
															+        # return_stages is int or bool
														
 
															+        if type(return_stages) is int:
														
 
															+            return_stages = [return_stages]
														
 
															+        if isinstance(return_stages, list):
														
 
															+            if max(return_stages) > len(stages_pattern) or min(
														
 
															+                    return_stages) < 0:
														
 
															+                msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
														
 
															+                logger.warning(msg)
														
 
															+                return_stages = [
														
 
															+                    val for val in return_stages
														
 
															+                    if val >= 0 and val < len(stages_pattern)
														
 
															+                ]
														
 
															+            return_patterns = [stages_pattern[i] for i in return_stages]
														
 
															-                self.dilation_cfg[i] = rate
														
 
															+    def forward(self, x):
														
 
															+        x = self.conv(x)
														
 
															-    def forward(self, inputs, label=None):
														
 
															-        x = self.conv1(inputs)
														
 
															-        # A feature list saves each downsampling feature.
														
 
															         feat_list = []
														
 
															-        for i, block in enumerate(self.block_list):
														
 
															+        for idx, block in enumerate(self.blocks):
														
 
															             x = block(x)
														
 
															-            if i in self.out_indices:
														
 
															+            if idx in self.out_index:
														
 
															                 feat_list.append(x)
														
 
															         return feat_list
														
 
															-    def init_weight(self):
														
 
															-        if self.pretrained is not None:
														
 
															-            utils.load_pretrained_model(self, self.pretrained)
														
 
															-
														
 
															 class ConvBNLayer(nn.Layer):
														
 
															     def __init__(self,
														
@@ -196,36 +239,34 @@ class ConvBNLayer(nn.Layer):
 
															                  filter_size,
														
 
															                  stride,
														
 
															                  padding,
														
 
															-                 dilation=1,
														
 
															                  num_groups=1,
														
 
															                  if_act=True,
														
 
															-                 act=None):
														
 
															-        super(ConvBNLayer, self).__init__()
														
 
															-        self.if_act = if_act
														
 
															-        self.act = act
														
 
															+                 act=None,
														
 
															+                 dilation=1):
														
 
															+        super().__init__()
														
 
															-        self.conv = nn.Conv2D(
														
 
															+        self.conv = Conv2D(
														
 
															             in_channels=in_c,
														
 
															             out_channels=out_c,
														
 
															             kernel_size=filter_size,
														
 
															             stride=stride,
														
 
															             padding=padding,
														
 
															-            dilation=dilation,
														
 
															             groups=num_groups,
														
 
															-            bias_attr=False)
														
 
															-        self.bn = layers.SyncBatchNorm(
														
 
															-            num_features=out_c,
														
 
															-            weight_attr=paddle.ParamAttr(
														
 
															-                regularizer=paddle.regularizer.L2Decay(0.0)),
														
 
															-            bias_attr=paddle.ParamAttr(
														
 
															-                regularizer=paddle.regularizer.L2Decay(0.0)))
														
 
															-        self._act_op = layers.Activation(act='hardswish')
														
 
															+            bias_attr=False,
														
 
															+            dilation=dilation)
														
 
															+        self.bn = BatchNorm(
														
 
															+            num_channels=out_c,
														
 
															+            act=None,
														
 
															+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
														
 
															+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
														
 
															+        self.if_act = if_act
														
 
															+        self.act = _create_act(act)
														
 
															     def forward(self, x):
														
 
															         x = self.conv(x)
														
 
															         x = self.bn(x)
														
 
															         if self.if_act:
														
 
															-            x = self._act_op(x)
														
 
															+            x = self.act(x)
														
 
															         return x
														
@@ -237,10 +278,9 @@ class ResidualUnit(nn.Layer):
 
															                  filter_size,
														
 
															                  stride,
														
 
															                  use_se,
														
 
															-                 dilation=1,
														
 
															                  act=None,
														
 
															-                 name=''):
														
 
															-        super(ResidualUnit, self).__init__()
														
 
															+                 dilation=1):
														
 
															+        super().__init__()
														
 
															         self.if_shortcut = stride == 1 and in_c == out_c
														
 
															         self.if_se = use_se
														
@@ -252,19 +292,18 @@ class ResidualUnit(nn.Layer):
 
															             padding=0,
														
 
															             if_act=True,
														
 
															             act=act)
														
 
															-
														
 
															         self.bottleneck_conv = ConvBNLayer(
														
 
															             in_c=mid_c,
														
 
															             out_c=mid_c,
														
 
															             filter_size=filter_size,
														
 
															             stride=stride,
														
 
															-            padding='same',
														
 
															-            dilation=dilation,
														
 
															+            padding=int((filter_size - 1) // 2) * dilation,
														
 
															             num_groups=mid_c,
														
 
															             if_act=True,
														
 
															-            act=act)
														
 
															+            act=act,
														
 
															+            dilation=dilation)
														
 
															         if self.if_se:
														
 
															-            self.mid_se = SEModule(mid_c, name=name + "_se")
														
 
															+            self.mid_se = SEModule(mid_c)
														
 
															         self.linear_conv = ConvBNLayer(
														
 
															             in_c=mid_c,
														
 
															             out_c=out_c,
														
@@ -273,92 +312,187 @@ class ResidualUnit(nn.Layer):
 
															             padding=0,
														
 
															             if_act=False,
														
 
															             act=None)
														
 
															-        self.dilation = dilation
														
 
															-    def forward(self, inputs):
														
 
															-        x = self.expand_conv(inputs)
														
 
															+    def forward(self, x):
														
 
															+        identity = x
														
 
															+        x = self.expand_conv(x)
														
 
															         x = self.bottleneck_conv(x)
														
 
															         if self.if_se:
														
 
															             x = self.mid_se(x)
														
 
															         x = self.linear_conv(x)
														
 
															         if self.if_shortcut:
														
 
															-            x = inputs + x
														
 
															+            x = paddle.add(identity, x)
														
 
															         return x
														
 
															+# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
														
 
															+class Hardsigmoid(nn.Layer):
														
 
															+    def __init__(self, slope=0.2, offset=0.5):
														
 
															+        super().__init__()
														
 
															+        self.slope = slope
														
 
															+        self.offset = offset
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        return nn.functional.hardsigmoid(
														
 
															+            x, slope=self.slope, offset=self.offset)
														
 
															+
														
 
															+
														
 
															 class SEModule(nn.Layer):
														
 
															-    def __init__(self, channel, reduction=4, name=""):
														
 
															-        super(SEModule, self).__init__()
														
 
															-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
														
 
															-        self.conv1 = nn.Conv2D(
														
 
															+    def __init__(self, channel, reduction=4):
														
 
															+        super().__init__()
														
 
															+        self.avg_pool = AdaptiveAvgPool2D(1)
														
 
															+        self.conv1 = Conv2D(
														
 
															             in_channels=channel,
														
 
															             out_channels=channel // reduction,
														
 
															             kernel_size=1,
														
 
															             stride=1,
														
 
															             padding=0)
														
 
															-        self.conv2 = nn.Conv2D(
														
 
															+        self.relu = nn.ReLU()
														
 
															+        self.conv2 = Conv2D(
														
 
															             in_channels=channel // reduction,
														
 
															             out_channels=channel,
														
 
															             kernel_size=1,
														
 
															             stride=1,
														
 
															             padding=0)
														
 
															+        self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
														
 
															-    def forward(self, inputs):
														
 
															-        outputs = self.avg_pool(inputs)
														
 
															-        outputs = self.conv1(outputs)
														
 
															-        outputs = F.relu(outputs)
														
 
															-        outputs = self.conv2(outputs)
														
 
															-        outputs = F.hardsigmoid(outputs)
														
 
															-        return paddle.multiply(x=inputs, y=outputs)
														
 
															+    def forward(self, x):
														
 
															+        identity = x
														
 
															+        x = self.avg_pool(x)
														
 
															+        x = self.conv1(x)
														
 
															+        x = self.relu(x)
														
 
															+        x = self.conv2(x)
														
 
															+        x = self.hardsigmoid(x)
														
 
															+        return paddle.multiply(x=identity, y=x)
														
 
															+@manager.BACKBONES.add_component
														
 
															 def MobileNetV3_small_x0_35(**kwargs):
														
 
															-    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["small"],
														
 
															+        scale=0.35,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
														
 
															+        out_index=OUT_INDEX["small"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															+@manager.BACKBONES.add_component
														
 
															 def MobileNetV3_small_x0_5(**kwargs):
														
 
															-    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["small"],
														
 
															+        scale=0.5,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
														
 
															+        out_index=OUT_INDEX["small"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															+@manager.BACKBONES.add_component
														
 
															 def MobileNetV3_small_x0_75(**kwargs):
														
 
															-    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["small"],
														
 
															+        scale=0.75,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
														
 
															+        out_index=OUT_INDEX["small"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															 @manager.BACKBONES.add_component
														
 
															 def MobileNetV3_small_x1_0(**kwargs):
														
 
															-    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["small"],
														
 
															+        scale=1.0,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
														
 
															+        out_index=OUT_INDEX["small"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															+@manager.BACKBONES.add_component
														
 
															 def MobileNetV3_small_x1_25(**kwargs):
														
 
															-    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["small"],
														
 
															+        scale=1.25,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
														
 
															+        out_index=OUT_INDEX["small"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															+@manager.BACKBONES.add_component
														
 
															 def MobileNetV3_large_x0_35(**kwargs):
														
 
															-    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["large"],
														
 
															+        scale=0.35,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
														
 
															+        out_index=OUT_INDEX["large"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															+@manager.BACKBONES.add_component
														
 
															 def MobileNetV3_large_x0_5(**kwargs):
														
 
															-    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["large"],
														
 
															+        scale=0.5,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
														
 
															+        out_index=OUT_INDEX["large"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															+@manager.BACKBONES.add_component
														
 
															 def MobileNetV3_large_x0_75(**kwargs):
														
 
															-    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["large"],
														
 
															+        scale=0.75,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
														
 
															+        out_index=OUT_INDEX["large"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															 @manager.BACKBONES.add_component
														
 
															 def MobileNetV3_large_x1_0(**kwargs):
														
 
															-    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["large"],
														
 
															+        scale=1.0,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
														
 
															+        out_index=OUT_INDEX["large"],
														
 
															+        **kwargs)
														
 
															     return model
														
 
															+@manager.BACKBONES.add_component
														
 
															 def MobileNetV3_large_x1_25(**kwargs):
														
 
															-    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["large"],
														
 
															+        scale=1.25,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
														
 
															+        out_index=OUT_INDEX["large"],
														
 
															+        **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def MobileNetV3_large_x1_0_os8(**kwargs):
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["large_os8"],
														
 
															+        scale=1.0,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
														
 
															+        out_index=OUT_INDEX["large"],
														
 
															+        **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def MobileNetV3_small_x1_0_os8(**kwargs):
														
 
															+    model = MobileNetV3(
														
 
															+        config=NET_CONFIG["small_os8"],
														
 
															+        scale=1.0,
														
 
															+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
														
 
															+        out_index=OUT_INDEX["small"],
														
 
															+        **kwargs)
														
 
															     return model
														
--- a/paddlers/models/ppseg/models/backbones/resnet_vd.py
+++ b/paddlers/models/ppseg/models/backbones/resnet_vd.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -206,15 +206,16 @@ class ResNet_vd(nn.Layer):
 
															         layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
														
 
															         output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
														
 
															         multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         pretrained (str, optional): The path of pretrained model.
														
 
															     """
														
 
															     def __init__(self,
														
 
															-                 input_channel=3,
														
 
															                  layers=50,
														
 
															                  output_stride=8,
														
 
															                  multi_grid=(1, 1, 1),
														
 
															+                 in_channels=3,
														
 
															                  pretrained=None,
														
 
															                  data_format='NCHW'):
														
 
															         super(ResNet_vd, self).__init__()
														
@@ -252,7 +253,7 @@ class ResNet_vd(nn.Layer):
 
															             dilation_dict = {3: 2}
														
 
															         self.conv1_1 = ConvBNLayer(
														
 
															-            in_channels=input_channel,
														
 
															+            in_channels=in_channels,
														
 
															             out_channels=32,
														
 
															             kernel_size=3,
														
 
															             stride=2,
														
--- a/paddlers/models/ppseg/models/backbones/shufflenetv2.py
+++ b/paddlers/models/ppseg/models/backbones/shufflenetv2.py
@@ -0,0 +1,315 @@
 
															+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+from paddle import ParamAttr, reshape, transpose, concat, split
														
 
															+from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
														
 
															+from paddle.nn.initializer import KaimingNormal
														
 
															+from paddle.nn.functional import swish
														
 
															+
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.utils import utils, logger
														
 
															+
														
 
															+__all__ = [
														
 
															+    'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5',
														
 
															+    'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0',
														
 
															+    'ShuffleNetV2_swish'
														
 
															+]
														
 
															+
														
 
															+
														
 
															+def channel_shuffle(x, groups):
														
 
															+    x_shape = paddle.shape(x)
														
 
															+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
														
 
															+    num_channels = x.shape[1]
														
 
															+    channels_per_group = num_channels // groups
														
 
															+
														
 
															+    # reshape
														
 
															+    x = reshape(
														
 
															+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
														
 
															+
														
 
															+    # transpose
														
 
															+    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
														
 
															+
														
 
															+    # flatten
														
 
															+    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
														
 
															+
														
 
															+    return x
														
 
															+
														
 
															+
														
 
															+class ConvBNLayer(Layer):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            in_channels,
														
 
															+            out_channels,
														
 
															+            kernel_size,
														
 
															+            stride,
														
 
															+            padding,
														
 
															+            groups=1,
														
 
															+            act=None,
														
 
															+            name=None, ):
														
 
															+        super(ConvBNLayer, self).__init__()
														
 
															+        self._conv = Conv2D(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=out_channels,
														
 
															+            kernel_size=kernel_size,
														
 
															+            stride=stride,
														
 
															+            padding=padding,
														
 
															+            groups=groups,
														
 
															+            weight_attr=ParamAttr(
														
 
															+                initializer=KaimingNormal(), name=name + "_weights"),
														
 
															+            bias_attr=False)
														
 
															+
														
 
															+        self._batch_norm = BatchNorm(
														
 
															+            out_channels,
														
 
															+            param_attr=ParamAttr(name=name + "_bn_scale"),
														
 
															+            bias_attr=ParamAttr(name=name + "_bn_offset"),
														
 
															+            act=act,
														
 
															+            moving_mean_name=name + "_bn_mean",
														
 
															+            moving_variance_name=name + "_bn_variance")
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        y = self._conv(inputs)
														
 
															+        y = self._batch_norm(y)
														
 
															+        return y
														
 
															+
														
 
															+
														
 
															+class InvertedResidual(Layer):
														
 
															+    def __init__(self, in_channels, out_channels, stride, act="relu",
														
 
															+                 name=None):
														
 
															+        super(InvertedResidual, self).__init__()
														
 
															+        self._conv_pw = ConvBNLayer(
														
 
															+            in_channels=in_channels // 2,
														
 
															+            out_channels=out_channels // 2,
														
 
															+            kernel_size=1,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            groups=1,
														
 
															+            act=act,
														
 
															+            name='stage_' + name + '_conv1')
														
 
															+        self._conv_dw = ConvBNLayer(
														
 
															+            in_channels=out_channels // 2,
														
 
															+            out_channels=out_channels // 2,
														
 
															+            kernel_size=3,
														
 
															+            stride=stride,
														
 
															+            padding=1,
														
 
															+            groups=out_channels // 2,
														
 
															+            act=None,
														
 
															+            name='stage_' + name + '_conv2')
														
 
															+        self._conv_linear = ConvBNLayer(
														
 
															+            in_channels=out_channels // 2,
														
 
															+            out_channels=out_channels // 2,
														
 
															+            kernel_size=1,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            groups=1,
														
 
															+            act=act,
														
 
															+            name='stage_' + name + '_conv3')
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        x1, x2 = split(
														
 
															+            inputs,
														
 
															+            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
														
 
															+            axis=1)
														
 
															+        x2 = self._conv_pw(x2)
														
 
															+        x2 = self._conv_dw(x2)
														
 
															+        x2 = self._conv_linear(x2)
														
 
															+        out = concat([x1, x2], axis=1)
														
 
															+        return channel_shuffle(out, 2)
														
 
															+
														
 
															+
														
 
															+class InvertedResidualDS(Layer):
														
 
															+    def __init__(self, in_channels, out_channels, stride, act="relu",
														
 
															+                 name=None):
														
 
															+        super(InvertedResidualDS, self).__init__()
														
 
															+
														
 
															+        # branch1
														
 
															+        self._conv_dw_1 = ConvBNLayer(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=in_channels,
														
 
															+            kernel_size=3,
														
 
															+            stride=stride,
														
 
															+            padding=1,
														
 
															+            groups=in_channels,
														
 
															+            act=None,
														
 
															+            name='stage_' + name + '_conv4')
														
 
															+        self._conv_linear_1 = ConvBNLayer(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=out_channels // 2,
														
 
															+            kernel_size=1,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            groups=1,
														
 
															+            act=act,
														
 
															+            name='stage_' + name + '_conv5')
														
 
															+        # branch2
														
 
															+        self._conv_pw_2 = ConvBNLayer(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=out_channels // 2,
														
 
															+            kernel_size=1,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            groups=1,
														
 
															+            act=act,
														
 
															+            name='stage_' + name + '_conv1')
														
 
															+        self._conv_dw_2 = ConvBNLayer(
														
 
															+            in_channels=out_channels // 2,
														
 
															+            out_channels=out_channels // 2,
														
 
															+            kernel_size=3,
														
 
															+            stride=stride,
														
 
															+            padding=1,
														
 
															+            groups=out_channels // 2,
														
 
															+            act=None,
														
 
															+            name='stage_' + name + '_conv2')
														
 
															+        self._conv_linear_2 = ConvBNLayer(
														
 
															+            in_channels=out_channels // 2,
														
 
															+            out_channels=out_channels // 2,
														
 
															+            kernel_size=1,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            groups=1,
														
 
															+            act=act,
														
 
															+            name='stage_' + name + '_conv3')
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        x1 = self._conv_dw_1(inputs)
														
 
															+        x1 = self._conv_linear_1(x1)
														
 
															+        x2 = self._conv_pw_2(inputs)
														
 
															+        x2 = self._conv_dw_2(x2)
														
 
															+        x2 = self._conv_linear_2(x2)
														
 
															+        out = concat([x1, x2], axis=1)
														
 
															+
														
 
															+        return channel_shuffle(out, 2)
														
 
															+
														
 
															+
														
 
															+class ShuffleNet(Layer):
														
 
															+    def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None):
														
 
															+        super(ShuffleNet, self).__init__()
														
 
															+        self.scale = scale
														
 
															+        self.pretrained = pretrained
														
 
															+        stage_repeats = [4, 8, 4]
														
 
															+
														
 
															+        if scale == 0.25:
														
 
															+            stage_out_channels = [-1, 24, 24, 48, 96, 512]
														
 
															+        elif scale == 0.33:
														
 
															+            stage_out_channels = [-1, 24, 32, 64, 128, 512]
														
 
															+        elif scale == 0.5:
														
 
															+            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
														
 
															+        elif scale == 1.0:
														
 
															+            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
														
 
															+        elif scale == 1.5:
														
 
															+            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
														
 
															+        elif scale == 2.0:
														
 
															+            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
														
 
															+        else:
														
 
															+            raise NotImplementedError("This scale size:[" + str(scale) +
														
 
															+                                      "] is not implemented!")
														
 
															+
														
 
															+        self.out_index = [3, 11, 15]
														
 
															+        self.feat_channels = stage_out_channels[1:5]
														
 
															+
														
 
															+        # 1. conv1
														
 
															+        self._conv1 = ConvBNLayer(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=stage_out_channels[1],
														
 
															+            kernel_size=3,
														
 
															+            stride=2,
														
 
															+            padding=1,
														
 
															+            act=act,
														
 
															+            name='stage1_conv')
														
 
															+        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
														
 
															+
														
 
															+        # 2. bottleneck sequences
														
 
															+        self._block_list = []
														
 
															+        for stage_id, num_repeat in enumerate(stage_repeats):
														
 
															+            for i in range(num_repeat):
														
 
															+                if i == 0:
														
 
															+                    block = self.add_sublayer(
														
 
															+                        name=str(stage_id + 2) + '_' + str(i + 1),
														
 
															+                        sublayer=InvertedResidualDS(
														
 
															+                            in_channels=stage_out_channels[stage_id + 1],
														
 
															+                            out_channels=stage_out_channels[stage_id + 2],
														
 
															+                            stride=2,
														
 
															+                            act=act,
														
 
															+                            name=str(stage_id + 2) + '_' + str(i + 1)))
														
 
															+                else:
														
 
															+                    block = self.add_sublayer(
														
 
															+                        name=str(stage_id + 2) + '_' + str(i + 1),
														
 
															+                        sublayer=InvertedResidual(
														
 
															+                            in_channels=stage_out_channels[stage_id + 2],
														
 
															+                            out_channels=stage_out_channels[stage_id + 2],
														
 
															+                            stride=1,
														
 
															+                            act=act,
														
 
															+                            name=str(stage_id + 2) + '_' + str(i + 1)))
														
 
															+                self._block_list.append(block)
														
 
															+
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        feat_list = []
														
 
															+
														
 
															+        y = self._conv1(inputs)
														
 
															+        y = self._max_pool(y)
														
 
															+        feat_list.append(y)
														
 
															+
														
 
															+        for idx, inv in enumerate(self._block_list):
														
 
															+            y = inv(y)
														
 
															+            if idx in self.out_index:
														
 
															+                feat_list.append(y)
														
 
															+        return feat_list
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def ShuffleNetV2_x0_25(**kwargs):
														
 
															+    model = ShuffleNet(scale=0.25, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def ShuffleNetV2_x0_33(**kwargs):
														
 
															+    model = ShuffleNet(scale=0.33, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def ShuffleNetV2_x0_5(**kwargs):
														
 
															+    model = ShuffleNet(scale=0.5, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def ShuffleNetV2_x1_0(**kwargs):
														
 
															+    model = ShuffleNet(scale=1.0, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def ShuffleNetV2_x1_5(**kwargs):
														
 
															+    model = ShuffleNet(scale=1.5, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def ShuffleNetV2_x2_0(**kwargs):
														
 
															+    model = ShuffleNet(scale=2.0, **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def ShuffleNetV2_swish(**kwargs):
														
 
															+    model = ShuffleNet(scale=1.0, act="swish", **kwargs)
														
 
															+    return model
														
--- a/paddlers/models/ppseg/models/backbones/stdcnet.py
+++ b/paddlers/models/ppseg/models/backbones/stdcnet.py
@@ -37,9 +37,9 @@ class STDCNet(nn.Layer):
 
															         layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
														
 
															         block_num(int,optional): block_num of features block. Default: 4.
														
 
															         type(str,optional): feature fusion method "cat"/"add". Default: "cat".
														
 
															-        num_classes(int, optional): class number for image classification. Default: 1000.
														
 
															-        dropout(float,optional): dropout ratio. if >0,use dropout ratio.  Default: 0.20.
														
 
															-        use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False.
														
 
															+        relative_lr(float,optional): parameters here receive a different learning rate when updating. The effective 
														
 
															+            learning rate is the prodcut of relative_lr and the global learning rate. Default: 1.0. 
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         pretrained(str, optional): the path of pretrained model.
														
 
															     """
														
@@ -48,34 +48,18 @@ class STDCNet(nn.Layer):
 
															                  layers=[4, 5, 3],
														
 
															                  block_num=4,
														
 
															                  type="cat",
														
 
															-                 num_classes=1000,
														
 
															-                 dropout=0.20,
														
 
															-                 use_conv_last=False,
														
 
															+                 relative_lr=1.0,
														
 
															+                 in_channels=3,
														
 
															                  pretrained=None):
														
 
															         super(STDCNet, self).__init__()
														
 
															         if type == "cat":
														
 
															             block = CatBottleneck
														
 
															         elif type == "add":
														
 
															             block = AddBottleneck
														
 
															-        self.use_conv_last = use_conv_last
														
 
															-        self.features = self._make_layers(base, layers, block_num, block)
														
 
															-        self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1)
														
 
															-
														
 
															-        if (layers == [4, 5, 3]):  #stdc1446
														
 
															-            self.x2 = nn.Sequential(self.features[:1])
														
 
															-            self.x4 = nn.Sequential(self.features[1:2])
														
 
															-            self.x8 = nn.Sequential(self.features[2:6])
														
 
															-            self.x16 = nn.Sequential(self.features[6:11])
														
 
															-            self.x32 = nn.Sequential(self.features[11:])
														
 
															-        elif (layers == [2, 2, 2]):  #stdc813
														
 
															-            self.x2 = nn.Sequential(self.features[:1])
														
 
															-            self.x4 = nn.Sequential(self.features[1:2])
														
 
															-            self.x8 = nn.Sequential(self.features[2:4])
														
 
															-            self.x16 = nn.Sequential(self.features[4:6])
														
 
															-            self.x32 = nn.Sequential(self.features[6:])
														
 
															-        else:
														
 
															-            raise NotImplementedError(
														
 
															-                "model with layers:{} is not implemented!".format(layers))
														
 
															+        self.layers = layers
														
 
															+        self.feat_channels = [base // 2, base, base * 4, base * 8, base * 16]
														
 
															+        self.features = self._make_layers(in_channels, base, layers, block_num,
														
 
															+                                          block, relative_lr)
														
 
															         self.pretrained = pretrained
														
 
															         self.init_weight()
														
@@ -84,32 +68,42 @@ class STDCNet(nn.Layer):
 
															         """
														
 
															         forward function for feature extract.
														
 
															         """
														
 
															-        feat2 = self.x2(x)
														
 
															-        feat4 = self.x4(feat2)
														
 
															-        feat8 = self.x8(feat4)
														
 
															-        feat16 = self.x16(feat8)
														
 
															-        feat32 = self.x32(feat16)
														
 
															-        if self.use_conv_last:
														
 
															-            feat32 = self.conv_last(feat32)
														
 
															-        return feat2, feat4, feat8, feat16, feat32
														
 
															-
														
 
															-    def _make_layers(self, base, layers, block_num, block):
														
 
															+        out_feats = []
														
 
															+
														
 
															+        x = self.features[0](x)
														
 
															+        out_feats.append(x)
														
 
															+        x = self.features[1](x)
														
 
															+        out_feats.append(x)
														
 
															+
														
 
															+        idx = [[2, 2 + self.layers[0]],
														
 
															+               [2 + self.layers[0], 2 + sum(self.layers[0:2])],
														
 
															+               [2 + sum(self.layers[0:2]), 2 + sum(self.layers)]]
														
 
															+        for start_idx, end_idx in idx:
														
 
															+            for i in range(start_idx, end_idx):
														
 
															+                x = self.features[i](x)
														
 
															+            out_feats.append(x)
														
 
															+
														
 
															+        return out_feats
														
 
															+
														
 
															+    def _make_layers(self, in_channels, base, layers, block_num, block,
														
 
															+                     relative_lr):
														
 
															         features = []
														
 
															-        features += [ConvBNRelu(3, base // 2, 3, 2)]
														
 
															-        features += [ConvBNRelu(base // 2, base, 3, 2)]
														
 
															+        features += [ConvBNRelu(in_channels, base // 2, 3, 2, relative_lr)]
														
 
															+        features += [ConvBNRelu(base // 2, base, 3, 2, relative_lr)]
														
 
															         for i, layer in enumerate(layers):
														
 
															             for j in range(layer):
														
 
															                 if i == 0 and j == 0:
														
 
															-                    features.append(block(base, base * 4, block_num, 2))
														
 
															+                    features.append(
														
 
															+                        block(base, base * 4, block_num, 2, relative_lr))
														
 
															                 elif j == 0:
														
 
															                     features.append(
														
 
															                         block(base * int(math.pow(2, i + 1)), base * int(
														
 
															-                            math.pow(2, i + 2)), block_num, 2))
														
 
															+                            math.pow(2, i + 2)), block_num, 2, relative_lr))
														
 
															                 else:
														
 
															                     features.append(
														
 
															                         block(base * int(math.pow(2, i + 2)), base * int(
														
 
															-                            math.pow(2, i + 2)), block_num, 1))
														
 
															+                            math.pow(2, i + 2)), block_num, 1, relative_lr))
														
 
															         return nn.Sequential(*features)
														
@@ -125,16 +119,24 @@ class STDCNet(nn.Layer):
 
															 class ConvBNRelu(nn.Layer):
														
 
															-    def __init__(self, in_planes, out_planes, kernel=3, stride=1):
														
 
															+    def __init__(self,
														
 
															+                 in_planes,
														
 
															+                 out_planes,
														
 
															+                 kernel=3,
														
 
															+                 stride=1,
														
 
															+                 relative_lr=1.0):
														
 
															         super(ConvBNRelu, self).__init__()
														
 
															+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
														
 
															         self.conv = nn.Conv2D(
														
 
															             in_planes,
														
 
															             out_planes,
														
 
															             kernel_size=kernel,
														
 
															             stride=stride,
														
 
															             padding=kernel // 2,
														
 
															+            weight_attr=param_attr,
														
 
															             bias_attr=False)
														
 
															-        self.bn = SyncBatchNorm(out_planes, data_format='NCHW')
														
 
															+        self.bn = nn.BatchNorm2D(
														
 
															+            out_planes, weight_attr=param_attr, bias_attr=param_attr)
														
 
															         self.relu = nn.ReLU()
														
 
															     def forward(self, x):
														
@@ -143,11 +145,17 @@ class ConvBNRelu(nn.Layer):
 
															 class AddBottleneck(nn.Layer):
														
 
															-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
														
 
															+    def __init__(self,
														
 
															+                 in_planes,
														
 
															+                 out_planes,
														
 
															+                 block_num=3,
														
 
															+                 stride=1,
														
 
															+                 relative_lr=1.0):
														
 
															         super(AddBottleneck, self).__init__()
														
 
															         assert block_num > 1, "block number should be larger than 1."
														
 
															         self.conv_list = nn.LayerList()
														
 
															         self.stride = stride
														
 
															+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
														
 
															         if stride == 2:
														
 
															             self.avd_layer = nn.Sequential(
														
 
															                 nn.Conv2D(
														
@@ -157,8 +165,12 @@ class AddBottleneck(nn.Layer):
 
															                     stride=2,
														
 
															                     padding=1,
														
 
															                     groups=out_planes // 2,
														
 
															+                    weight_attr=param_attr,
														
 
															                     bias_attr=False),
														
 
															-                nn.BatchNorm2D(out_planes // 2), )
														
 
															+                nn.BatchNorm2D(
														
 
															+                    out_planes // 2,
														
 
															+                    weight_attr=param_attr,
														
 
															+                    bias_attr=param_attr), )
														
 
															             self.skip = nn.Sequential(
														
 
															                 nn.Conv2D(
														
 
															                     in_planes,
														
@@ -167,34 +179,53 @@ class AddBottleneck(nn.Layer):
 
															                     stride=2,
														
 
															                     padding=1,
														
 
															                     groups=in_planes,
														
 
															+                    weight_attr=param_attr,
														
 
															                     bias_attr=False),
														
 
															-                nn.BatchNorm2D(in_planes),
														
 
															+                nn.BatchNorm2D(
														
 
															+                    in_planes, weight_attr=param_attr, bias_attr=param_attr),
														
 
															                 nn.Conv2D(
														
 
															-                    in_planes, out_planes, kernel_size=1, bias_attr=False),
														
 
															-                nn.BatchNorm2D(out_planes), )
														
 
															+                    in_planes,
														
 
															+                    out_planes,
														
 
															+                    kernel_size=1,
														
 
															+                    bias_attr=False,
														
 
															+                    weight_attr=param_attr),
														
 
															+                nn.BatchNorm2D(
														
 
															+                    out_planes, weight_attr=param_attr, bias_attr=param_attr), )
														
 
															             stride = 1
														
 
															         for idx in range(block_num):
														
 
															             if idx == 0:
														
 
															                 self.conv_list.append(
														
 
															                     ConvBNRelu(
														
 
															-                        in_planes, out_planes // 2, kernel=1))
														
 
															+                        in_planes,
														
 
															+                        out_planes // 2,
														
 
															+                        kernel=1,
														
 
															+                        relative_lr=relative_lr))
														
 
															             elif idx == 1 and block_num == 2:
														
 
															                 self.conv_list.append(
														
 
															                     ConvBNRelu(
														
 
															-                        out_planes // 2, out_planes // 2, stride=stride))
														
 
															+                        out_planes // 2,
														
 
															+                        out_planes // 2,
														
 
															+                        stride=stride,
														
 
															+                        relative_lr=relative_lr))
														
 
															             elif idx == 1 and block_num > 2:
														
 
															                 self.conv_list.append(
														
 
															                     ConvBNRelu(
														
 
															-                        out_planes // 2, out_planes // 4, stride=stride))
														
 
															+                        out_planes // 2,
														
 
															+                        out_planes // 4,
														
 
															+                        stride=stride,
														
 
															+                        relative_lr=relative_lr))
														
 
															             elif idx < block_num - 1:
														
 
															                 self.conv_list.append(
														
 
															-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
														
 
															-                               // int(math.pow(2, idx + 1))))
														
 
															+                    ConvBNRelu(
														
 
															+                        out_planes // int(math.pow(2, idx)),
														
 
															+                        out_planes // int(math.pow(2, idx + 1)),
														
 
															+                        relative_lr=relative_lr))
														
 
															             else:
														
 
															                 self.conv_list.append(
														
 
															-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
														
 
															-                               // int(math.pow(2, idx))))
														
 
															+                    ConvBNRelu(out_planes // int(math.pow(2, idx)),
														
 
															+                               out_planes // int(math.pow(2, idx))),
														
 
															+                    relative_lr=relative_lr)
														
 
															     def forward(self, x):
														
 
															         out_list = []
														
@@ -211,11 +242,17 @@ class AddBottleneck(nn.Layer):
 
															 class CatBottleneck(nn.Layer):
														
 
															-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
														
 
															+    def __init__(self,
														
 
															+                 in_planes,
														
 
															+                 out_planes,
														
 
															+                 block_num=3,
														
 
															+                 stride=1,
														
 
															+                 relative_lr=1.0):
														
 
															         super(CatBottleneck, self).__init__()
														
 
															         assert block_num > 1, "block number should be larger than 1."
														
 
															         self.conv_list = nn.LayerList()
														
 
															         self.stride = stride
														
 
															+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
														
 
															         if stride == 2:
														
 
															             self.avd_layer = nn.Sequential(
														
 
															                 nn.Conv2D(
														
@@ -225,8 +262,12 @@ class CatBottleneck(nn.Layer):
 
															                     stride=2,
														
 
															                     padding=1,
														
 
															                     groups=out_planes // 2,
														
 
															+                    weight_attr=param_attr,
														
 
															                     bias_attr=False),
														
 
															-                nn.BatchNorm2D(out_planes // 2), )
														
 
															+                nn.BatchNorm2D(
														
 
															+                    out_planes // 2,
														
 
															+                    weight_attr=param_attr,
														
 
															+                    bias_attr=param_attr), )
														
 
															             self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
														
 
															             stride = 1
														
@@ -234,23 +275,36 @@ class CatBottleneck(nn.Layer):
 
															             if idx == 0:
														
 
															                 self.conv_list.append(
														
 
															                     ConvBNRelu(
														
 
															-                        in_planes, out_planes // 2, kernel=1))
														
 
															+                        in_planes,
														
 
															+                        out_planes // 2,
														
 
															+                        kernel=1,
														
 
															+                        relative_lr=relative_lr))
														
 
															             elif idx == 1 and block_num == 2:
														
 
															                 self.conv_list.append(
														
 
															                     ConvBNRelu(
														
 
															-                        out_planes // 2, out_planes // 2, stride=stride))
														
 
															+                        out_planes // 2,
														
 
															+                        out_planes // 2,
														
 
															+                        stride=stride,
														
 
															+                        relative_lr=relative_lr))
														
 
															             elif idx == 1 and block_num > 2:
														
 
															                 self.conv_list.append(
														
 
															                     ConvBNRelu(
														
 
															-                        out_planes // 2, out_planes // 4, stride=stride))
														
 
															+                        out_planes // 2,
														
 
															+                        out_planes // 4,
														
 
															+                        stride=stride,
														
 
															+                        relative_lr=relative_lr))
														
 
															             elif idx < block_num - 1:
														
 
															                 self.conv_list.append(
														
 
															-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
														
 
															-                               // int(math.pow(2, idx + 1))))
														
 
															+                    ConvBNRelu(
														
 
															+                        out_planes // int(math.pow(2, idx)),
														
 
															+                        out_planes // int(math.pow(2, idx + 1)),
														
 
															+                        relative_lr=relative_lr))
														
 
															             else:
														
 
															                 self.conv_list.append(
														
 
															-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
														
 
															-                               // int(math.pow(2, idx))))
														
 
															+                    ConvBNRelu(
														
 
															+                        out_planes // int(math.pow(2, idx)),
														
 
															+                        out_planes // int(math.pow(2, idx)),
														
 
															+                        relative_lr=relative_lr))
														
 
															     def forward(self, x):
														
 
															         out_list = []
														
--- a/paddlers/models/ppseg/models/backbones/swin_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/swin_transformer.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -531,7 +531,7 @@ class SwinTransformer(nn.Layer):
 
															     Args:
														
 
															         pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
														
 
															         patch_size (int | tuple(int)): Patch size. Default: 4.
														
 
															-        in_chans (int): Number of input image channels. Default: 3.
														
 
															+        in_channels (int): Number of input image channels. Default: 3.
														
 
															         embed_dim (int): Number of linear projection output channels. Default: 96.
														
 
															         depths (tuple[int]): Depths of each Swin Transformer stage.
														
 
															         num_heads (tuple[int]): Number of attention head of each stage.
														
@@ -553,7 +553,7 @@ class SwinTransformer(nn.Layer):
 
															     def __init__(self,
														
 
															                  pretrain_img_size=224,
														
 
															                  patch_size=4,
														
 
															-                 in_chans=3,
														
 
															+                 in_channels=3,
														
 
															                  embed_dim=96,
														
 
															                  depths=[2, 2, 6, 2],
														
 
															                  num_heads=[3, 6, 12, 24],
														
@@ -583,7 +583,7 @@ class SwinTransformer(nn.Layer):
 
															         # split image into non-overlapping patches
														
 
															         self.patch_embed = PatchEmbed(
														
 
															             patch_size=patch_size,
														
 
															-            in_chans=in_chans,
														
 
															+            in_chans=in_channels,
														
 
															             embed_dim=embed_dim,
														
 
															             norm_layer=norm_layer if self.patch_norm else None)
														
--- a/paddlers/models/ppseg/models/backbones/top_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/top_transformer.py
@@ -0,0 +1,716 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+"""
														
 
															+This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT
														
 
															+"""
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg import utils
														
 
															+from paddlers.models.ppseg.models.backbones.transformer_utils import Identity, DropPath
														
 
															+
														
 
															+__all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"]
														
 
															+
														
 
															+
														
 
															+def make_divisible(val, divisor, min_value=None):
														
 
															+    """
														
 
															+    This function is taken from the original tf repo.
														
 
															+    It ensures that all layers have a channel number that is divisible by 8
														
 
															+    It can be seen here:
														
 
															+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
														
 
															+    """
														
 
															+    if min_value is None:
														
 
															+        min_value = divisor
														
 
															+    new_v = max(min_value, int(val + divisor / 2) // divisor * divisor)
														
 
															+    # Make sure that round down does not go down by more than 10%.
														
 
															+    if new_v < 0.9 * val:
														
 
															+        new_v += divisor
														
 
															+    return new_v
														
 
															+
														
 
															+
														
 
															+class HSigmoid(nn.Layer):
														
 
															+    def __init__(self, inplace=True):
														
 
															+        super().__init__()
														
 
															+        self.relu = nn.ReLU6()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        return self.relu(x + 3) / 6
														
 
															+
														
 
															+
														
 
															+class Conv2DBN(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 out_channels,
														
 
															+                 ks=1,
														
 
															+                 stride=1,
														
 
															+                 pad=0,
														
 
															+                 dilation=1,
														
 
															+                 groups=1,
														
 
															+                 bn_weight_init=1,
														
 
															+                 lr_mult=1.0):
														
 
															+        super().__init__()
														
 
															+        conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult)
														
 
															+        self.c = nn.Conv2D(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=out_channels,
														
 
															+            kernel_size=ks,
														
 
															+            stride=stride,
														
 
															+            padding=pad,
														
 
															+            dilation=dilation,
														
 
															+            groups=groups,
														
 
															+            weight_attr=conv_weight_attr,
														
 
															+            bias_attr=False)
														
 
															+        bn_weight_attr = paddle.ParamAttr(
														
 
															+            initializer=nn.initializer.Constant(bn_weight_init),
														
 
															+            learning_rate=lr_mult)
														
 
															+        bn_bias_attr = paddle.ParamAttr(
														
 
															+            initializer=nn.initializer.Constant(0), learning_rate=lr_mult)
														
 
															+        self.bn = nn.BatchNorm2D(
														
 
															+            out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr)
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        out = self.c(inputs)
														
 
															+        out = self.bn(out)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class ConvBNAct(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 out_channels,
														
 
															+                 kernel_size=1,
														
 
															+                 stride=1,
														
 
															+                 padding=0,
														
 
															+                 groups=1,
														
 
															+                 norm=nn.BatchNorm2D,
														
 
															+                 act=None,
														
 
															+                 bias_attr=False,
														
 
															+                 lr_mult=1.0):
														
 
															+        super(ConvBNAct, self).__init__()
														
 
															+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
														
 
															+        self.conv = nn.Conv2D(
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=out_channels,
														
 
															+            kernel_size=kernel_size,
														
 
															+            stride=stride,
														
 
															+            padding=padding,
														
 
															+            groups=groups,
														
 
															+            weight_attr=param_attr,
														
 
															+            bias_attr=param_attr if bias_attr else False)
														
 
															+        self.act = act() if act is not None else Identity()
														
 
															+        self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \
														
 
															+            if norm is not None else Identity()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.conv(x)
														
 
															+        x = self.bn(x)
														
 
															+        x = self.act(x)
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+class MLP(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_features,
														
 
															+                 hidden_features=None,
														
 
															+                 out_features=None,
														
 
															+                 act_layer=nn.ReLU,
														
 
															+                 drop=0.,
														
 
															+                 lr_mult=1.0):
														
 
															+        super().__init__()
														
 
															+        out_features = out_features or in_features
														
 
															+        hidden_features = hidden_features or in_features
														
 
															+        self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult)
														
 
															+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
														
 
															+        self.dwconv = nn.Conv2D(
														
 
															+            hidden_features,
														
 
															+            hidden_features,
														
 
															+            3,
														
 
															+            1,
														
 
															+            1,
														
 
															+            groups=hidden_features,
														
 
															+            weight_attr=param_attr,
														
 
															+            bias_attr=param_attr)
														
 
															+        self.act = act_layer()
														
 
															+        self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult)
														
 
															+        self.drop = nn.Dropout(drop)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.fc1(x)
														
 
															+        x = self.dwconv(x)
														
 
															+        x = self.act(x)
														
 
															+        x = self.drop(x)
														
 
															+        x = self.fc2(x)
														
 
															+        x = self.drop(x)
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+class InvertedResidual(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 out_channels,
														
 
															+                 kernel_size,
														
 
															+                 stride,
														
 
															+                 expand_ratio,
														
 
															+                 activations=None,
														
 
															+                 lr_mult=1.0):
														
 
															+        super(InvertedResidual, self).__init__()
														
 
															+        assert stride in [1, 2], "The stride should be 1 or 2."
														
 
															+
														
 
															+        if activations is None:
														
 
															+            activations = nn.ReLU
														
 
															+
														
 
															+        hidden_dim = int(round(in_channels * expand_ratio))
														
 
															+        self.use_res_connect = stride == 1 and in_channels == out_channels
														
 
															+
														
 
															+        layers = []
														
 
															+        if expand_ratio != 1:
														
 
															+            layers.append(
														
 
															+                Conv2DBN(
														
 
															+                    in_channels, hidden_dim, ks=1, lr_mult=lr_mult))
														
 
															+            layers.append(activations())
														
 
															+        layers.extend([
														
 
															+            Conv2DBN(
														
 
															+                hidden_dim,
														
 
															+                hidden_dim,
														
 
															+                ks=kernel_size,
														
 
															+                stride=stride,
														
 
															+                pad=kernel_size // 2,
														
 
															+                groups=hidden_dim,
														
 
															+                lr_mult=lr_mult), activations(), Conv2DBN(
														
 
															+                    hidden_dim, out_channels, ks=1, lr_mult=lr_mult)
														
 
															+        ])
														
 
															+        self.conv = nn.Sequential(*layers)
														
 
															+        self.out_channels = out_channels
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        if self.use_res_connect:
														
 
															+            return x + self.conv(x)
														
 
															+        else:
														
 
															+            return self.conv(x)
														
 
															+
														
 
															+
														
 
															+class TokenPyramidModule(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 cfgs,
														
 
															+                 out_indices,
														
 
															+                 in_channels=3,
														
 
															+                 inp_channel=16,
														
 
															+                 activation=nn.ReLU,
														
 
															+                 width_mult=1.,
														
 
															+                 lr_mult=1.):
														
 
															+        super().__init__()
														
 
															+        self.out_indices = out_indices
														
 
															+
														
 
															+        self.stem = nn.Sequential(
														
 
															+            Conv2DBN(
														
 
															+                in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult),
														
 
															+            activation())
														
 
															+
														
 
															+        self.layers = []
														
 
															+        for i, (k, t, c, s) in enumerate(cfgs):
														
 
															+            output_channel = make_divisible(c * width_mult, 8)
														
 
															+            exp_size = t * inp_channel
														
 
															+            exp_size = make_divisible(exp_size * width_mult, 8)
														
 
															+            layer_name = 'layer{}'.format(i + 1)
														
 
															+            layer = InvertedResidual(
														
 
															+                inp_channel,
														
 
															+                output_channel,
														
 
															+                kernel_size=k,
														
 
															+                stride=s,
														
 
															+                expand_ratio=t,
														
 
															+                activations=activation,
														
 
															+                lr_mult=lr_mult)
														
 
															+            self.add_sublayer(layer_name, layer)
														
 
															+            self.layers.append(layer_name)
														
 
															+            inp_channel = output_channel
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        outs = []
														
 
															+        x = self.stem(x)
														
 
															+        for i, layer_name in enumerate(self.layers):
														
 
															+            layer = getattr(self, layer_name)
														
 
															+            x = layer(x)
														
 
															+            if i in self.out_indices:
														
 
															+                outs.append(x)
														
 
															+        return outs
														
 
															+
														
 
															+
														
 
															+class Attention(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 dim,
														
 
															+                 key_dim,
														
 
															+                 num_heads,
														
 
															+                 attn_ratio=4,
														
 
															+                 activation=None,
														
 
															+                 lr_mult=1.0):
														
 
															+        super().__init__()
														
 
															+        self.num_heads = num_heads
														
 
															+        self.scale = key_dim**-0.5
														
 
															+        self.key_dim = key_dim
														
 
															+        self.nh_kd = nh_kd = key_dim * num_heads
														
 
															+        self.d = int(attn_ratio * key_dim)
														
 
															+        self.dh = int(attn_ratio * key_dim) * num_heads
														
 
															+        self.attn_ratio = attn_ratio
														
 
															+
														
 
															+        self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
														
 
															+        self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
														
 
															+        self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult)
														
 
															+
														
 
															+        self.proj = nn.Sequential(
														
 
															+            activation(),
														
 
															+            Conv2DBN(
														
 
															+                self.dh, dim, bn_weight_init=0, lr_mult=lr_mult))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x_shape = paddle.shape(x)
														
 
															+        H, W = x_shape[2], x_shape[3]
														
 
															+
														
 
															+        qq = self.to_q(x).reshape(
														
 
															+            [0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2])
														
 
															+        kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1])
														
 
															+        vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose(
														
 
															+            [0, 1, 3, 2])
														
 
															+
														
 
															+        attn = paddle.matmul(qq, kk)
														
 
															+        attn = F.softmax(attn, axis=-1)
														
 
															+
														
 
															+        xx = paddle.matmul(attn, vv)
														
 
															+
														
 
															+        xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W])
														
 
															+        xx = self.proj(xx)
														
 
															+        return xx
														
 
															+
														
 
															+
														
 
															+class Block(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 dim,
														
 
															+                 key_dim,
														
 
															+                 num_heads,
														
 
															+                 mlp_ratios=4.,
														
 
															+                 attn_ratio=2.,
														
 
															+                 drop=0.,
														
 
															+                 drop_path=0.,
														
 
															+                 act_layer=nn.ReLU,
														
 
															+                 lr_mult=1.0):
														
 
															+        super().__init__()
														
 
															+        self.dim = dim
														
 
															+        self.num_heads = num_heads
														
 
															+        self.mlp_ratios = mlp_ratios
														
 
															+
														
 
															+        self.attn = Attention(
														
 
															+            dim,
														
 
															+            key_dim=key_dim,
														
 
															+            num_heads=num_heads,
														
 
															+            attn_ratio=attn_ratio,
														
 
															+            activation=act_layer,
														
 
															+            lr_mult=lr_mult)
														
 
															+
														
 
															+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
														
 
															+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
														
 
															+        mlp_hidden_dim = int(dim * mlp_ratios)
														
 
															+        self.mlp = MLP(in_features=dim,
														
 
															+                       hidden_features=mlp_hidden_dim,
														
 
															+                       act_layer=act_layer,
														
 
															+                       drop=drop,
														
 
															+                       lr_mult=lr_mult)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        h = x
														
 
															+        x = self.attn(x)
														
 
															+        x = self.drop_path(x)
														
 
															+        x = h + x
														
 
															+
														
 
															+        h = x
														
 
															+        x = self.mlp(x)
														
 
															+        x = self.drop_path(x)
														
 
															+        x = x + h
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+class BasicLayer(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 block_num,
														
 
															+                 embedding_dim,
														
 
															+                 key_dim,
														
 
															+                 num_heads,
														
 
															+                 mlp_ratios=4.,
														
 
															+                 attn_ratio=2.,
														
 
															+                 drop=0.,
														
 
															+                 attn_drop=0.,
														
 
															+                 drop_path=0.,
														
 
															+                 act_layer=None,
														
 
															+                 lr_mult=1.0):
														
 
															+        super().__init__()
														
 
															+        self.block_num = block_num
														
 
															+
														
 
															+        self.transformer_blocks = nn.LayerList()
														
 
															+        for i in range(self.block_num):
														
 
															+            self.transformer_blocks.append(
														
 
															+                Block(
														
 
															+                    embedding_dim,
														
 
															+                    key_dim=key_dim,
														
 
															+                    num_heads=num_heads,
														
 
															+                    mlp_ratios=mlp_ratios,
														
 
															+                    attn_ratio=attn_ratio,
														
 
															+                    drop=drop,
														
 
															+                    drop_path=drop_path[i]
														
 
															+                    if isinstance(drop_path, list) else drop_path,
														
 
															+                    act_layer=act_layer,
														
 
															+                    lr_mult=lr_mult))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        # token * N 
														
 
															+        for i in range(self.block_num):
														
 
															+            x = self.transformer_blocks[i](x)
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+class PyramidPoolAgg(nn.Layer):
														
 
															+    def __init__(self, stride):
														
 
															+        super().__init__()
														
 
															+        self.stride = stride
														
 
															+        self.tmp = Identity()  # avoid the error of paddle.flops
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        '''
														
 
															+        # The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
														
 
															+        # so exporting the inference model will raise error.
														
 
															+        _, _, H, W = inputs[-1].shape
														
 
															+        H = (H - 1) // self.stride + 1
														
 
															+        W = (W - 1) // self.stride + 1
														
 
															+        return paddle.concat(
														
 
															+            [F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
														
 
															+        '''
														
 
															+        out = []
														
 
															+        ks = 2**len(inputs)
														
 
															+        stride = self.stride**len(inputs)
														
 
															+        for x in inputs:
														
 
															+            x = F.avg_pool2d(x, int(ks), int(stride))
														
 
															+            ks /= 2
														
 
															+            stride /= 2
														
 
															+            out.append(x)
														
 
															+        out = paddle.concat(out, axis=1)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class InjectionMultiSum(nn.Layer):
														
 
															+    def __init__(self, in_channels, out_channels, activations=None,
														
 
															+                 lr_mult=1.0):
														
 
															+        super(InjectionMultiSum, self).__init__()
														
 
															+
														
 
															+        self.local_embedding = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
														
 
															+        self.global_embedding = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
														
 
															+        self.global_act = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
														
 
															+        self.act = HSigmoid()
														
 
															+
														
 
															+    def forward(self, x_low, x_global):
														
 
															+        xl_hw = paddle.shape(x_low)[2:]
														
 
															+        local_feat = self.local_embedding(x_low)
														
 
															+
														
 
															+        global_act = self.global_act(x_global)
														
 
															+        sig_act = F.interpolate(
														
 
															+            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
														
 
															+
														
 
															+        global_feat = self.global_embedding(x_global)
														
 
															+        global_feat = F.interpolate(
														
 
															+            global_feat, xl_hw, mode='bilinear', align_corners=False)
														
 
															+
														
 
															+        out = local_feat * sig_act + global_feat
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class InjectionMultiSumCBR(nn.Layer):
														
 
															+    def __init__(self, in_channels, out_channels, activations=None):
														
 
															+        '''
														
 
															+        local_embedding: conv-bn-relu
														
 
															+        global_embedding: conv-bn-relu
														
 
															+        global_act: conv
														
 
															+        '''
														
 
															+        super(InjectionMultiSumCBR, self).__init__()
														
 
															+
														
 
															+        self.local_embedding = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1)
														
 
															+        self.global_embedding = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1)
														
 
															+        self.global_act = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1, norm=None, act=None)
														
 
															+        self.act = HSigmoid()
														
 
															+
														
 
															+    def forward(self, x_low, x_global):
														
 
															+        xl_hw = paddle.shape(x)[2:]
														
 
															+        local_feat = self.local_embedding(x_low)
														
 
															+        # kernel
														
 
															+        global_act = self.global_act(x_global)
														
 
															+        global_act = F.interpolate(
														
 
															+            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
														
 
															+        # feat_h
														
 
															+        global_feat = self.global_embedding(x_global)
														
 
															+        global_feat = F.interpolate(
														
 
															+            global_feat, xl_hw, mode='bilinear', align_corners=False)
														
 
															+        out = local_feat * global_act + global_feat
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class FuseBlockSum(nn.Layer):
														
 
															+    def __init__(self, in_channels, out_channels, activations=None):
														
 
															+        super(FuseBlockSum, self).__init__()
														
 
															+
														
 
															+        self.fuse1 = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1, act=None)
														
 
															+        self.fuse2 = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1, act=None)
														
 
															+
														
 
															+    def forward(self, x_low, x_high):
														
 
															+        xl_hw = paddle.shape(x)[2:]
														
 
															+        inp = self.fuse1(x_low)
														
 
															+        kernel = self.fuse2(x_high)
														
 
															+        feat_h = F.interpolate(
														
 
															+            kernel, xl_hw, mode='bilinear', align_corners=False)
														
 
															+        out = inp + feat_h
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class FuseBlockMulti(nn.Layer):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            in_channels,
														
 
															+            out_channels,
														
 
															+            stride=1,
														
 
															+            activations=None, ):
														
 
															+        super(FuseBlockMulti, self).__init__()
														
 
															+        assert stride in [1, 2], "The stride should be 1 or 2."
														
 
															+
														
 
															+        self.fuse1 = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1, act=None)
														
 
															+        self.fuse2 = ConvBNAct(
														
 
															+            in_channels, out_channels, kernel_size=1, act=None)
														
 
															+        self.act = HSigmoid()
														
 
															+
														
 
															+    def forward(self, x_low, x_high):
														
 
															+        xl_hw = paddle.shape(x)[2:]
														
 
															+        inp = self.fuse1(x_low)
														
 
															+        sig_act = self.fuse2(x_high)
														
 
															+        sig_act = F.interpolate(
														
 
															+            self.act(sig_act), xl_hw, mode='bilinear', align_corners=False)
														
 
															+        out = inp * sig_act
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+SIM_BLOCK = {
														
 
															+    "fuse_sum": FuseBlockSum,
														
 
															+    "fuse_multi": FuseBlockMulti,
														
 
															+    "multi_sum": InjectionMultiSum,
														
 
															+    "multi_sum_cbr": InjectionMultiSumCBR,
														
 
															+}
														
 
															+
														
 
															+
														
 
															+class TopTransformer(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 cfgs,
														
 
															+                 injection_out_channels,
														
 
															+                 encoder_out_indices,
														
 
															+                 trans_out_indices=[1, 2, 3],
														
 
															+                 depths=4,
														
 
															+                 key_dim=16,
														
 
															+                 num_heads=8,
														
 
															+                 attn_ratios=2,
														
 
															+                 mlp_ratios=2,
														
 
															+                 c2t_stride=2,
														
 
															+                 drop_path_rate=0.,
														
 
															+                 act_layer=nn.ReLU6,
														
 
															+                 injection_type="muli_sum",
														
 
															+                 injection=True,
														
 
															+                 lr_mult=1.0,
														
 
															+                 in_channels=3,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+        self.feat_channels = [
														
 
															+            c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices
														
 
															+        ]
														
 
															+        self.injection_out_channels = injection_out_channels
														
 
															+        self.injection = injection
														
 
															+        self.embed_dim = sum(self.feat_channels)
														
 
															+        self.trans_out_indices = trans_out_indices
														
 
															+
														
 
															+        self.tpm = TokenPyramidModule(
														
 
															+            cfgs=cfgs,
														
 
															+            out_indices=encoder_out_indices,
														
 
															+            in_channels=in_channels,
														
 
															+            lr_mult=lr_mult)
														
 
															+        self.ppa = PyramidPoolAgg(stride=c2t_stride)
														
 
															+
														
 
															+        dpr = [x.item() for x in \
														
 
															+               paddle.linspace(0, drop_path_rate, depths)]
														
 
															+        self.trans = BasicLayer(
														
 
															+            block_num=depths,
														
 
															+            embedding_dim=self.embed_dim,
														
 
															+            key_dim=key_dim,
														
 
															+            num_heads=num_heads,
														
 
															+            mlp_ratios=mlp_ratios,
														
 
															+            attn_ratio=attn_ratios,
														
 
															+            drop=0,
														
 
															+            attn_drop=0,
														
 
															+            drop_path=dpr,
														
 
															+            act_layer=act_layer,
														
 
															+            lr_mult=lr_mult)
														
 
															+
														
 
															+        self.SIM = nn.LayerList()
														
 
															+        inj_module = SIM_BLOCK[injection_type]
														
 
															+        if self.injection:
														
 
															+            for i in range(len(self.feat_channels)):
														
 
															+                if i in trans_out_indices:
														
 
															+                    self.SIM.append(
														
 
															+                        inj_module(
														
 
															+                            self.feat_channels[i],
														
 
															+                            injection_out_channels[i],
														
 
															+                            activations=act_layer,
														
 
															+                            lr_mult=lr_mult))
														
 
															+                else:
														
 
															+                    self.SIM.append(Identity())
														
 
															+
														
 
															+        self.pretrained = pretrained
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        ouputs = self.tpm(x)
														
 
															+        out = self.ppa(ouputs)
														
 
															+        out = self.trans(out)
														
 
															+
														
 
															+        if self.injection:
														
 
															+            xx = out.split(self.feat_channels, axis=1)
														
 
															+            results = []
														
 
															+            for i in range(len(self.feat_channels)):
														
 
															+                if i in self.trans_out_indices:
														
 
															+                    local_tokens = ouputs[i]
														
 
															+                    global_semantics = xx[i]
														
 
															+                    out_ = self.SIM[i](local_tokens, global_semantics)
														
 
															+                    results.append(out_)
														
 
															+            return results
														
 
															+        else:
														
 
															+            ouputs.append(out)
														
 
															+            return ouputs
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def TopTransformer_Base(**kwargs):
														
 
															+    cfgs = [
														
 
															+        # k,  t,  c, s
														
 
															+        [3, 1, 16, 1],  # 1/2        
														
 
															+        [3, 4, 32, 2],  # 1/4 1      
														
 
															+        [3, 3, 32, 1],  #            
														
 
															+        [5, 3, 64, 2],  # 1/8 3      
														
 
															+        [5, 3, 64, 1],  #            
														
 
															+        [3, 3, 128, 2],  # 1/16 5     
														
 
															+        [3, 3, 128, 1],  #            
														
 
															+        [5, 6, 160, 2],  # 1/32 7     
														
 
															+        [5, 6, 160, 1],  #            
														
 
															+        [3, 6, 160, 1],  #            
														
 
															+    ]
														
 
															+
														
 
															+    model = TopTransformer(
														
 
															+        cfgs=cfgs,
														
 
															+        injection_out_channels=[None, 256, 256, 256],
														
 
															+        encoder_out_indices=[2, 4, 6, 9],
														
 
															+        trans_out_indices=[1, 2, 3],
														
 
															+        depths=4,
														
 
															+        key_dim=16,
														
 
															+        num_heads=8,
														
 
															+        attn_ratios=2,
														
 
															+        mlp_ratios=2,
														
 
															+        c2t_stride=2,
														
 
															+        drop_path_rate=0.,
														
 
															+        act_layer=nn.ReLU6,
														
 
															+        injection_type="multi_sum",
														
 
															+        injection=True,
														
 
															+        **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def TopTransformer_Small(**kwargs):
														
 
															+    cfgs = [
														
 
															+        # k,  t,  c, s
														
 
															+        [3, 1, 16, 1],  # 1/2        
														
 
															+        [3, 4, 24, 2],  # 1/4 1      
														
 
															+        [3, 3, 24, 1],  #            
														
 
															+        [5, 3, 48, 2],  # 1/8 3      
														
 
															+        [5, 3, 48, 1],  #            
														
 
															+        [3, 3, 96, 2],  # 1/16 5     
														
 
															+        [3, 3, 96, 1],  #            
														
 
															+        [5, 6, 128, 2],  # 1/32 7     
														
 
															+        [5, 6, 128, 1],  #            
														
 
															+        [3, 6, 128, 1],  #           
														
 
															+    ]
														
 
															+
														
 
															+    model = TopTransformer(
														
 
															+        cfgs=cfgs,
														
 
															+        injection_out_channels=[None, 192, 192, 192],
														
 
															+        encoder_out_indices=[2, 4, 6, 9],
														
 
															+        trans_out_indices=[1, 2, 3],
														
 
															+        depths=4,
														
 
															+        key_dim=16,
														
 
															+        num_heads=6,
														
 
															+        attn_ratios=2,
														
 
															+        mlp_ratios=2,
														
 
															+        c2t_stride=2,
														
 
															+        drop_path_rate=0.,
														
 
															+        act_layer=nn.ReLU6,
														
 
															+        injection_type="multi_sum",
														
 
															+        injection=True,
														
 
															+        **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+@manager.BACKBONES.add_component
														
 
															+def TopTransformer_Tiny(**kwargs):
														
 
															+    cfgs = [
														
 
															+        # k,  t,  c, s
														
 
															+        [3, 1, 16, 1],  # 1/2       
														
 
															+        [3, 4, 16, 2],  # 1/4 1      
														
 
															+        [3, 3, 16, 1],  #            
														
 
															+        [5, 3, 32, 2],  # 1/8 3      
														
 
															+        [5, 3, 32, 1],  #            
														
 
															+        [3, 3, 64, 2],  # 1/16 5     
														
 
															+        [3, 3, 64, 1],  #            
														
 
															+        [5, 6, 96, 2],  # 1/32 7     
														
 
															+        [5, 6, 96, 1],  #               
														
 
															+    ]
														
 
															+
														
 
															+    model = TopTransformer(
														
 
															+        cfgs=cfgs,
														
 
															+        injection_out_channels=[None, 128, 128, 128],
														
 
															+        encoder_out_indices=[2, 4, 6, 8],
														
 
															+        trans_out_indices=[1, 2, 3],
														
 
															+        depths=4,
														
 
															+        key_dim=16,
														
 
															+        num_heads=4,
														
 
															+        attn_ratios=2,
														
 
															+        mlp_ratios=2,
														
 
															+        c2t_stride=2,
														
 
															+        drop_path_rate=0.,
														
 
															+        act_layer=nn.ReLU6,
														
 
															+        injection_type="multi_sum",
														
 
															+        injection=True,
														
 
															+        **kwargs)
														
 
															+    return model
														
--- a/paddlers/models/ppseg/models/backbones/transformer_utils.py
+++ b/paddlers/models/ppseg/models/backbones/transformer_utils.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -35,7 +35,7 @@ def drop_path(x, drop_prob=0., training=False):
 
															         return x
														
 
															     keep_prob = paddle.to_tensor(1 - drop_prob)
														
 
															     shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
														
 
															-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
														
 
															+    random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype)
														
 
															     random_tensor = paddle.floor(random_tensor)  # binarize
														
 
															     output = x.divide(keep_prob) * random_tensor
														
 
															     return output
														
--- a/paddlers/models/ppseg/models/backbones/vision_transformer.py
+++ b/paddlers/models/ppseg/models/backbones/vision_transformer.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -154,7 +154,7 @@ class VisionTransformer(nn.Layer):
 
															     def __init__(self,
														
 
															                  img_size=224,
														
 
															                  patch_size=16,
														
 
															-                 in_chans=3,
														
 
															+                 in_channels=3,
														
 
															                  embed_dim=768,
														
 
															                  depth=12,
														
 
															                  num_heads=12,
														
@@ -176,7 +176,7 @@ class VisionTransformer(nn.Layer):
 
															         self.patch_embed = PatchEmbed(
														
 
															             img_size=img_size,
														
 
															             patch_size=patch_size,
														
 
															-            in_chans=in_chans,
														
 
															+            in_chans=in_channels,
														
 
															             embed_dim=embed_dim)
														
 
															         self.pos_w = self.patch_embed.num_patches_in_w
														
 
															         self.pos_h = self.patch_embed.num_patches_in_h
														
--- a/paddlers/models/ppseg/models/backbones/xception_deeplab.py
+++ b/paddlers/models/ppseg/models/backbones/xception_deeplab.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -255,12 +255,17 @@ class XceptionDeeplab(nn.Layer):
 
															      Args:
														
 
															          backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
														
 
															+         in_channels (int, optional): The channels of input image. Default: 3.
														
 
															          pretrained (str, optional): The path of pretrained model.
														
 
															          output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.
														
 
															     """
														
 
															-    def __init__(self, backbone, pretrained=None, output_stride=16):
														
 
															+    def __init__(self,
														
 
															+                 backbone,
														
 
															+                 in_channels=3,
														
 
															+                 pretrained=None,
														
 
															+                 output_stride=16):
														
 
															         super(XceptionDeeplab, self).__init__()
														
@@ -269,7 +274,7 @@ class XceptionDeeplab(nn.Layer):
 
															         self.feat_channels = [128, 2048]
														
 
															         self._conv1 = ConvBNLayer(
														
 
															-            3,
														
 
															+            in_channels,
														
 
															             32,
														
 
															             3,
														
 
															             stride=2,
														
--- a/paddlers/models/ppseg/models/bisenet.py
+++ b/paddlers/models/ppseg/models/bisenet.py
@@ -35,6 +35,7 @@ class BiSeNetV2(nn.Layer):
 
															     Args:
														
 
															         num_classes (int): The unique number of target classes.
														
 
															         lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															     """
														
@@ -42,6 +43,7 @@ class BiSeNetV2(nn.Layer):
 
															                  num_classes,
														
 
															                  lambd=0.25,
														
 
															                  align_corners=False,
														
 
															+                 in_channels=3,
														
 
															                  pretrained=None):
														
 
															         super().__init__()
														
@@ -51,8 +53,8 @@ class BiSeNetV2(nn.Layer):
 
															         sb_channels = (C1, C3, C4, C5)
														
 
															         mid_channels = 128
														
 
															-        self.db = DetailBranch(db_channels)
														
 
															-        self.sb = SemanticBranch(sb_channels)
														
 
															+        self.db = DetailBranch(in_channels, db_channels)
														
 
															+        self.sb = SemanticBranch(in_channels, sb_channels)
														
 
															         self.bga = BGA(mid_channels, align_corners)
														
 
															         self.aux_head1 = SegHead(C1, C1, num_classes)
														
@@ -189,15 +191,15 @@ class GatherAndExpansionLayer2(nn.Layer):
 
															 class DetailBranch(nn.Layer):
														
 
															     """The detail branch of BiSeNet, which has wide channels but shallow layers."""
														
 
															-    def __init__(self, in_channels):
														
 
															+    def __init__(self, in_channels, feature_channels):
														
 
															         super().__init__()
														
 
															-        C1, C2, C3 = in_channels
														
 
															+        C1, C2, C3 = feature_channels
														
 
															         self.convs = nn.Sequential(
														
 
															             # stage 1
														
 
															             layers.ConvBNReLU(
														
 
															-                3, C1, 3, stride=2),
														
 
															+                in_channels, C1, 3, stride=2),
														
 
															             layers.ConvBNReLU(C1, C1, 3),
														
 
															             # stage 2
														
 
															             layers.ConvBNReLU(
														
@@ -217,11 +219,11 @@ class DetailBranch(nn.Layer):
 
															 class SemanticBranch(nn.Layer):
														
 
															     """The semantic branch of BiSeNet, which has narrow channels but deep layers."""
														
 
															-    def __init__(self, in_channels):
														
 
															+    def __init__(self, in_channels, feature_channels):
														
 
															         super().__init__()
														
 
															-        C1, C3, C4, C5 = in_channels
														
 
															+        C1, C3, C4, C5 = feature_channels
														
 
															-        self.stem = StemBlock(3, C1)
														
 
															+        self.stem = StemBlock(in_channels, C1)
														
 
															         self.stage3 = nn.Sequential(
														
 
															             GatherAndExpansionLayer2(C1, C3, 6),
														
--- a/paddlers/models/ppseg/models/ccnet.py
+++ b/paddlers/models/ppseg/models/ccnet.py
@@ -0,0 +1,174 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.utils import utils
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+class CCNet(nn.Layer):
														
 
															+    """
														
 
															+    The CCNet implementation based on PaddlePaddle.
														
 
															+
														
 
															+    The original article refers to
														
 
															+    Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation"
														
 
															+    (https://arxiv.org/abs/1811.11721)
														
 
															+
														
 
															+    Args:
														
 
															+        num_classes (int): The unique number of target classes.
														
 
															+        backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd.
														
 
															+        backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3).
														
 
															+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
														
 
															+        dropout_prob (float, optional): The probability of dropout. Default: 0.0.
														
 
															+        recurrence (int, optional): The number of recurrent operations. Defautl: 1.
														
 
															+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
														
 
															+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 backbone,
														
 
															+                 backbone_indices=(2, 3),
														
 
															+                 enable_auxiliary_loss=True,
														
 
															+                 dropout_prob=0.0,
														
 
															+                 recurrence=1,
														
 
															+                 align_corners=False,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+        self.enable_auxiliary_loss = enable_auxiliary_loss
														
 
															+        self.recurrence = recurrence
														
 
															+        self.align_corners = align_corners
														
 
															+
														
 
															+        self.backbone = backbone
														
 
															+        self.backbone_indices = backbone_indices
														
 
															+        backbone_channels = [
														
 
															+            backbone.feat_channels[i] for i in backbone_indices
														
 
															+        ]
														
 
															+
														
 
															+        if enable_auxiliary_loss:
														
 
															+            self.aux_head = layers.AuxLayer(
														
 
															+                backbone_channels[0],
														
 
															+                512,
														
 
															+                num_classes,
														
 
															+                dropout_prob=dropout_prob)
														
 
															+        self.head = RCCAModule(
														
 
															+            backbone_channels[1],
														
 
															+            512,
														
 
															+            num_classes,
														
 
															+            dropout_prob=dropout_prob,
														
 
															+            recurrence=recurrence)
														
 
															+        self.pretrained = pretrained
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat_list = self.backbone(x)
														
 
															+        logit_list = []
														
 
															+        output = self.head(feat_list[self.backbone_indices[-1]])
														
 
															+        logit_list.append(output)
														
 
															+        if self.training and self.enable_auxiliary_loss:
														
 
															+            aux_out = self.aux_head(feat_list[self.backbone_indices[-2]])
														
 
															+            logit_list.append(aux_out)
														
 
															+        return [
														
 
															+            F.interpolate(
														
 
															+                logit,
														
 
															+                paddle.shape(x)[2:],
														
 
															+                mode='bilinear',
														
 
															+                align_corners=self.align_corners) for logit in logit_list
														
 
															+        ]
														
 
															+
														
 
															+
														
 
															+class RCCAModule(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 out_channels,
														
 
															+                 num_classes,
														
 
															+                 dropout_prob=0.1,
														
 
															+                 recurrence=1):
														
 
															+        super().__init__()
														
 
															+        inter_channels = in_channels // 4
														
 
															+        self.recurrence = recurrence
														
 
															+        self.conva = layers.ConvBNLeakyReLU(
														
 
															+            in_channels, inter_channels, 3, padding=1, bias_attr=False)
														
 
															+        self.cca = CrissCrossAttention(inter_channels)
														
 
															+        self.convb = layers.ConvBNLeakyReLU(
														
 
															+            inter_channels, inter_channels, 3, padding=1, bias_attr=False)
														
 
															+        self.out = layers.AuxLayer(
														
 
															+            in_channels + inter_channels,
														
 
															+            out_channels,
														
 
															+            num_classes,
														
 
															+            dropout_prob=dropout_prob)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = self.conva(x)
														
 
															+        for i in range(self.recurrence):
														
 
															+            feat = self.cca(feat)
														
 
															+        feat = self.convb(feat)
														
 
															+        output = self.out(paddle.concat([x, feat], axis=1))
														
 
															+        return output
														
 
															+
														
 
															+
														
 
															+class CrissCrossAttention(nn.Layer):
														
 
															+    def __init__(self, in_channels):
														
 
															+        super().__init__()
														
 
															+        self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
														
 
															+        self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
														
 
															+        self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1)
														
 
															+        self.softmax = nn.Softmax(axis=3)
														
 
															+        self.gamma = self.create_parameter(
														
 
															+            shape=(1, ), default_initializer=nn.initializer.Constant(0))
														
 
															+        self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf'))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        b, c, h, w = paddle.shape(x)
														
 
															+        proj_q = self.q_conv(x)
														
 
															+        proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape(
														
 
															+            [b * w, -1, h]).transpose([0, 2, 1])
														
 
															+        proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape(
														
 
															+            [b * h, -1, w]).transpose([0, 2, 1])
														
 
															+
														
 
															+        proj_k = self.k_conv(x)
														
 
															+        proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
														
 
															+        proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
														
 
															+
														
 
															+        proj_v = self.v_conv(x)
														
 
															+        proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
														
 
															+        proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
														
 
															+
														
 
															+        energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape(
														
 
															+            [b, w, h, h]).transpose([0, 2, 1, 3])
														
 
															+        energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w])
														
 
															+        concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3))
														
 
															+
														
 
															+        attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape(
														
 
															+            [b * w, h, h])
														
 
															+        attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w])
														
 
															+        out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape(
														
 
															+            [b, w, -1, h]).transpose([0, 2, 3, 1])
														
 
															+        out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape(
														
 
															+            [b, h, -1, w]).transpose([0, 2, 1, 3])
														
 
															+        return self.gamma * (out_h + out_w) + x
														
 
															+
														
 
															+    def Inf(self, B, H, W):
														
 
															+        return -paddle.tile(
														
 
															+            paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0),
														
 
															+            [B * W, 1, 1])
														
--- a/paddlers/models/ppseg/models/ddrnet.py
+++ b/paddlers/models/ppseg/models/ddrnet.py
@@ -0,0 +1,403 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg.cvlibs import manager, param_init
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.utils import utils
														
 
															+
														
 
															+
														
 
															+class DualResNet(nn.Layer):
														
 
															+    """
														
 
															+    The DDRNet implementation based on PaddlePaddle.
														
 
															+
														
 
															+    The original article refers to
														
 
															+    Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes"
														
 
															+    (https://arxiv.org/abs/2101.06085)
														
 
															+
														
 
															+    Args:
														
 
															+        num_classes (int): The unique number of target classes.
														
 
															+        in_channels (int, optional): Number of input channels. Default: 3.
														
 
															+        block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2].
														
 
															+        planes (int): Base channels in network. Default: 64.
														
 
															+        spp_planes (int): Branch channels for DAPPM. Default: 128.
														
 
															+        head_planes (int): Mid channels of segmentation head. Default: 128.
														
 
															+        enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 in_channels=3,
														
 
															+                 block_layers=[2, 2, 2, 2],
														
 
															+                 planes=64,
														
 
															+                 spp_planes=128,
														
 
															+                 head_planes=128,
														
 
															+                 enable_auxiliary_loss=False,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+        highres_planes = planes * 2
														
 
															+        self.enable_auxiliary_loss = enable_auxiliary_loss
														
 
															+        self.conv1 = nn.Sequential(
														
 
															+            layers.ConvBNReLU(
														
 
															+                in_channels, planes, kernel_size=3, stride=2, padding=1),
														
 
															+            layers.ConvBNReLU(
														
 
															+                planes, planes, kernel_size=3, stride=2, padding=1), )
														
 
															+        self.relu = nn.ReLU()
														
 
															+        self.layer1 = self._make_layers(BasicBlock, planes, planes,
														
 
															+                                        block_layers[0])
														
 
															+        self.layer2 = self._make_layers(
														
 
															+            BasicBlock, planes, planes * 2, block_layers[1], stride=2)
														
 
															+        self.layer3 = self._make_layers(
														
 
															+            BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2)
														
 
															+        self.layer4 = self._make_layers(
														
 
															+            BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2)
														
 
															+
														
 
															+        self.compression3 = layers.ConvBN(
														
 
															+            planes * 4, highres_planes, kernel_size=1, bias_attr=False)
														
 
															+
														
 
															+        self.compression4 = layers.ConvBN(
														
 
															+            planes * 8, highres_planes, kernel_size=1, bias_attr=False)
														
 
															+
														
 
															+        self.down3 = layers.ConvBN(
														
 
															+            highres_planes,
														
 
															+            planes * 4,
														
 
															+            kernel_size=3,
														
 
															+            stride=2,
														
 
															+            bias_attr=False)
														
 
															+
														
 
															+        self.down4 = nn.Sequential(
														
 
															+            layers.ConvBNReLU(
														
 
															+                highres_planes,
														
 
															+                planes * 4,
														
 
															+                kernel_size=3,
														
 
															+                stride=2,
														
 
															+                padding=1,
														
 
															+                bias_attr=False),
														
 
															+            layers.ConvBN(
														
 
															+                planes * 4,
														
 
															+                planes * 8,
														
 
															+                kernel_size=3,
														
 
															+                stride=2,
														
 
															+                padding=1,
														
 
															+                bias_attr=False))
														
 
															+
														
 
															+        self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes,
														
 
															+                                         2)
														
 
															+        self.layer4_ = self._make_layers(BasicBlock, highres_planes,
														
 
															+                                         highres_planes, 2)
														
 
															+        self.layer5_ = self._make_layers(Bottleneck, highres_planes,
														
 
															+                                         highres_planes, 1)
														
 
															+        self.layer5 = self._make_layers(
														
 
															+            Bottleneck, planes * 8, planes * 8, 1, stride=2)
														
 
															+
														
 
															+        self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
														
 
															+        if self.enable_auxiliary_loss:
														
 
															+            self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes)
														
 
															+        self.head = DDRNetHead(planes * 4, head_planes, num_classes)
														
 
															+
														
 
															+        self.pretrained = pretrained
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+        else:
														
 
															+            for m in self.sublayers():
														
 
															+                if isinstance(m, nn.Conv2D):
														
 
															+                    param_init.kaiming_normal_init(m.weight)
														
 
															+                elif isinstance(m, nn.BatchNorm2D):
														
 
															+                    param_init.constant_init(m.weight, value=1)
														
 
															+                    param_init.constant_init(m.bias, value=0)
														
 
															+
														
 
															+    def _make_layers(self, block, inplanes, planes, blocks, stride=1):
														
 
															+        downsample = None
														
 
															+        if stride != 1 or inplanes != planes * block.expansion:
														
 
															+            downsample = nn.Sequential(
														
 
															+                nn.Conv2D(
														
 
															+                    inplanes,
														
 
															+                    planes * block.expansion,
														
 
															+                    kernel_size=1,
														
 
															+                    stride=stride,
														
 
															+                    bias_attr=False),
														
 
															+                nn.BatchNorm2D(planes * block.expansion), )
														
 
															+        layers = []
														
 
															+        layers.append(block(inplanes, planes, stride, downsample))
														
 
															+        inplanes = planes * block.expansion
														
 
															+        for i in range(1, blocks):
														
 
															+            if i == (blocks - 1):
														
 
															+                layers.append(block(inplanes, planes, stride=1, no_relu=True))
														
 
															+            else:
														
 
															+                layers.append(block(inplanes, planes, stride=1, no_relu=False))
														
 
															+        return nn.Sequential(*layers)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        n, c, h, w = paddle.shape(x)
														
 
															+        width_output = w // 8
														
 
															+        height_output = h // 8
														
 
															+
														
 
															+        x = self.conv1(x)
														
 
															+        stage1_out = self.layer1(x)
														
 
															+        stage2_out = self.layer2(self.relu(stage1_out))
														
 
															+        stage3_out = self.layer3(self.relu(stage2_out))
														
 
															+        stage3_out_dual = self.layer3_(self.relu(stage2_out))
														
 
															+        x = stage3_out + self.down3(self.relu(stage3_out_dual))
														
 
															+        stage3_merge = stage3_out_dual + F.interpolate(
														
 
															+            self.compression3(self.relu(stage3_out)),
														
 
															+            size=[height_output, width_output],
														
 
															+            mode='bilinear')
														
 
															+
														
 
															+        stage4_out = self.layer4(self.relu(x))
														
 
															+        stage4_out_dual = self.layer4_(self.relu(stage3_merge))
														
 
															+
														
 
															+        x = stage4_out + self.down4(self.relu(stage4_out_dual))
														
 
															+        stage4_merge = stage4_out_dual + F.interpolate(
														
 
															+            self.compression4(self.relu(stage4_out)),
														
 
															+            size=[height_output, width_output],
														
 
															+            mode='bilinear')
														
 
															+
														
 
															+        stage5_out_dual = self.layer5_(self.relu(stage4_merge))
														
 
															+        x = F.interpolate(
														
 
															+            self.spp(self.layer5(self.relu(x))),
														
 
															+            size=[height_output, width_output],
														
 
															+            mode='bilinear')
														
 
															+
														
 
															+        output = self.head(x + stage5_out_dual)
														
 
															+        logit_list = []
														
 
															+        logit_list.append(output)
														
 
															+
														
 
															+        if self.enable_auxiliary_loss:
														
 
															+            aux_out = self.aux_head(stage3_merge)
														
 
															+            logit_list.append(aux_out)
														
 
															+        return [
														
 
															+            F.interpolate(
														
 
															+                logit, [h, w], mode='bilinear') for logit in logit_list
														
 
															+        ]
														
 
															+
														
 
															+
														
 
															+class BasicBlock(nn.Layer):
														
 
															+    expansion = 1
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 inplanes,
														
 
															+                 planes,
														
 
															+                 stride=1,
														
 
															+                 downsample=None,
														
 
															+                 no_relu=False):
														
 
															+        super().__init__()
														
 
															+        self.conv_bn_relu = layers.ConvBNReLU(
														
 
															+            inplanes,
														
 
															+            planes,
														
 
															+            kernel_size=3,
														
 
															+            stride=stride,
														
 
															+            padding=1,
														
 
															+            bias_attr=False)
														
 
															+        self.relu = nn.ReLU()
														
 
															+        self.conv_bn = layers.ConvBN(
														
 
															+            planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
														
 
															+        self.downsample = downsample
														
 
															+        self.stride = stride
														
 
															+        self.no_relu = no_relu
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        residual = x
														
 
															+        out = self.conv_bn_relu(x)
														
 
															+        out = self.conv_bn(out)
														
 
															+        if self.downsample is not None:
														
 
															+            residual = self.downsample(x)
														
 
															+        out += residual
														
 
															+        if self.no_relu:
														
 
															+            return out
														
 
															+        else:
														
 
															+            return self.relu(out)
														
 
															+
														
 
															+
														
 
															+class Bottleneck(nn.Layer):
														
 
															+    expansion = 2
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 inplanes,
														
 
															+                 planes,
														
 
															+                 stride=1,
														
 
															+                 downsample=None,
														
 
															+                 no_relu=True):
														
 
															+        super().__init__()
														
 
															+        self.conv_bn_relu1 = layers.ConvBNReLU(
														
 
															+            inplanes, planes, kernel_size=1, bias_attr=False)
														
 
															+        self.conv_bn_relu2 = layers.ConvBNReLU(
														
 
															+            planes,
														
 
															+            planes,
														
 
															+            kernel_size=3,
														
 
															+            stride=stride,
														
 
															+            padding=1,
														
 
															+            bias_attr=False)
														
 
															+        self.conv_bn = layers.ConvBN(
														
 
															+            planes, planes * self.expansion, kernel_size=1, bias_attr=False)
														
 
															+        self.relu = nn.ReLU()
														
 
															+        self.downsample = downsample
														
 
															+        self.stride = stride
														
 
															+        self.no_relu = no_relu
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        residual = x
														
 
															+        out = self.conv_bn_relu1(x)
														
 
															+        out = self.conv_bn_relu2(out)
														
 
															+        out = self.conv_bn(out)
														
 
															+        if self.downsample is not None:
														
 
															+            residual = self.downsample(x)
														
 
															+        out += residual
														
 
															+        if self.no_relu:
														
 
															+            return out
														
 
															+        else:
														
 
															+            return self.relu(out)
														
 
															+
														
 
															+
														
 
															+class DAPPM(nn.Layer):
														
 
															+    def __init__(self, inplanes, branch_planes, outplanes):
														
 
															+        super().__init__()
														
 
															+        self.scale1 = nn.Sequential(
														
 
															+            nn.AvgPool2D(
														
 
															+                kernel_size=5, stride=2, padding=2),
														
 
															+            layers.SyncBatchNorm(inplanes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
														
 
															+        self.scale2 = nn.Sequential(
														
 
															+            nn.AvgPool2D(
														
 
															+                kernel_size=9, stride=4, padding=4),
														
 
															+            layers.SyncBatchNorm(inplanes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
														
 
															+        self.scale3 = nn.Sequential(
														
 
															+            nn.AvgPool2D(
														
 
															+                kernel_size=17, stride=8, padding=8),
														
 
															+            layers.SyncBatchNorm(inplanes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
														
 
															+        self.scale4 = nn.Sequential(
														
 
															+            nn.AdaptiveAvgPool2D((1, 1)),
														
 
															+            layers.SyncBatchNorm(inplanes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
														
 
															+        self.scale0 = nn.Sequential(
														
 
															+            layers.SyncBatchNorm(inplanes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
														
 
															+        self.process1 = nn.Sequential(
														
 
															+            layers.SyncBatchNorm(branch_planes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                branch_planes,
														
 
															+                branch_planes,
														
 
															+                kernel_size=3,
														
 
															+                padding=1,
														
 
															+                bias_attr=False), )
														
 
															+        self.process2 = nn.Sequential(
														
 
															+            layers.SyncBatchNorm(branch_planes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                branch_planes,
														
 
															+                branch_planes,
														
 
															+                kernel_size=3,
														
 
															+                padding=1,
														
 
															+                bias_attr=False), )
														
 
															+        self.process3 = nn.Sequential(
														
 
															+            layers.SyncBatchNorm(branch_planes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                branch_planes,
														
 
															+                branch_planes,
														
 
															+                kernel_size=3,
														
 
															+                padding=1,
														
 
															+                bias_attr=False), )
														
 
															+        self.process4 = nn.Sequential(
														
 
															+            layers.SyncBatchNorm(branch_planes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                branch_planes,
														
 
															+                branch_planes,
														
 
															+                kernel_size=3,
														
 
															+                padding=1,
														
 
															+                bias_attr=False), )
														
 
															+        self.compression = nn.Sequential(
														
 
															+            layers.SyncBatchNorm(branch_planes * 5),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                branch_planes * 5, outplanes, kernel_size=1, bias_attr=False))
														
 
															+        self.shortcut = nn.Sequential(
														
 
															+            layers.SyncBatchNorm(inplanes),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Conv2D(
														
 
															+                inplanes, outplanes, kernel_size=1, bias_attr=False))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        n, c, h, w = paddle.shape(x)
														
 
															+        x0 = self.scale0(x)
														
 
															+        x1 = self.process1(
														
 
															+            F.interpolate(
														
 
															+                self.scale1(x), size=[h, w], mode='bilinear') + x0)
														
 
															+        x2 = self.process2(
														
 
															+            F.interpolate(
														
 
															+                self.scale2(x), size=[h, w], mode='bilinear') + x1)
														
 
															+        x3 = self.process3(
														
 
															+            F.interpolate(
														
 
															+                self.scale3(x), size=[h, w], mode='bilinear') + x2)
														
 
															+        x4 = self.process4(
														
 
															+            F.interpolate(
														
 
															+                self.scale4(x), size=[h, w], mode='bilinear') + x3)
														
 
															+
														
 
															+        out = self.compression(paddle.concat([x0, x1, x2, x3, x4],
														
 
															+                                             1)) + self.shortcut(x)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class DDRNetHead(nn.Layer):
														
 
															+    def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
														
 
															+        super().__init__()
														
 
															+        self.bn1 = nn.BatchNorm2D(inplanes)
														
 
															+        self.relu = nn.ReLU()
														
 
															+        self.conv_bn_relu = layers.ConvBNReLU(
														
 
															+            inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False)
														
 
															+        self.conv = nn.Conv2D(
														
 
															+            interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True)
														
 
															+
														
 
															+        self.scale_factor = scale_factor
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.bn1(x)
														
 
															+        x = self.relu(x)
														
 
															+        x = self.conv_bn_relu(x)
														
 
															+        out = self.conv(x)
														
 
															+
														
 
															+        if self.scale_factor is not None:
														
 
															+            out = F.interpolate(
														
 
															+                out, scale_factor=self.scale_factor, mode='bilinear')
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+def DDRNet_23(**kwargs):
														
 
															+    return DualResNet(
														
 
															+        block_layers=[2, 2, 2, 2],
														
 
															+        planes=64,
														
 
															+        spp_planes=128,
														
 
															+        head_planes=128,
														
 
															+        **kwargs)
														
--- a/paddlers/models/ppseg/models/emanet.py
+++ b/paddlers/models/ppseg/models/emanet.py
@@ -209,7 +209,9 @@ class EMAU(nn.Layer):
 
															             mu = F.normalize(mu, axis=1, p=2)
														
 
															             mu = self.mu * (1 - self.momentum) + mu * self.momentum
														
 
															             if paddle.distributed.get_world_size() > 1:
														
 
															-                mu = paddle.distributed.all_reduce(mu)
														
 
															+                out = paddle.distributed.all_reduce(mu)
														
 
															+                if out is not None:
														
 
															+                    mu = out
														
 
															                 mu /= paddle.distributed.get_world_size()
														
 
															             self.mu = mu
														
--- a/paddlers/models/ppseg/models/enet.py
+++ b/paddlers/models/ppseg/models/enet.py
@@ -34,6 +34,7 @@ class ENet(nn.Layer):
 
															     Args:
														
 
															         num_classes (int): The unique number of target classes.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															         encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
														
 
															             function; otherwise, PReLU is used. Default: False.
														
@@ -43,13 +44,14 @@ class ENet(nn.Layer):
 
															     def __init__(self,
														
 
															                  num_classes,
														
 
															+                 in_channels=3,
														
 
															                  pretrained=None,
														
 
															                  encoder_relu=False,
														
 
															                  decoder_relu=True):
														
 
															         super(ENet, self).__init__()
														
 
															         self.numclasses = num_classes
														
 
															-        self.initial_block = InitialBlock(3, 16, relu=encoder_relu)
														
 
															+        self.initial_block = InitialBlock(in_channels, 16, relu=encoder_relu)
														
 
															         self.downsample1_0 = DownsamplingBottleneck(
														
 
															             16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)
														
--- a/paddlers/models/ppseg/models/fast_scnn.py
+++ b/paddlers/models/ppseg/models/fast_scnn.py
@@ -34,6 +34,7 @@ class FastSCNN(nn.Layer):
 
															     (https://arxiv.org/pdf/1902.04502.pdf).
														
 
															     Args:
														
 
															         num_classes (int): The unique number of target classes.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
														
 
															             If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
														
 
															         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
														
@@ -43,13 +44,15 @@ class FastSCNN(nn.Layer):
 
															     def __init__(self,
														
 
															                  num_classes,
														
 
															+                 in_channels=3,
														
 
															                  enable_auxiliary_loss=True,
														
 
															                  align_corners=False,
														
 
															                  pretrained=None):
														
 
															         super().__init__()
														
 
															-        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
														
 
															+        self.learning_to_downsample = LearningToDownsample(in_channels, 32, 48,
														
 
															+                                                           64)
														
 
															         self.global_feature_extractor = GlobalFeatureExtractor(
														
 
															             in_channels=64,
														
 
															             block_channels=[64, 96, 128],
														
@@ -108,11 +111,18 @@ class LearningToDownsample(nn.Layer):
 
															         out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
														
 
															     """
														
 
															-    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
														
 
															+    def __init__(self,
														
 
															+                 in_channels=3,
														
 
															+                 dw_channels1=32,
														
 
															+                 dw_channels2=48,
														
 
															+                 out_channels=64):
														
 
															         super(LearningToDownsample, self).__init__()
														
 
															         self.conv_bn_relu = layers.ConvBNReLU(
														
 
															-            in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
														
 
															+            in_channels=in_channels,
														
 
															+            out_channels=dw_channels1,
														
 
															+            kernel_size=3,
														
 
															+            stride=2)
														
 
															         self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
														
 
															             in_channels=dw_channels1,
														
 
															             out_channels=dw_channels2,
														
--- a/paddlers/models/ppseg/models/ginet.py
+++ b/paddlers/models/ppseg/models/ginet.py
@@ -92,7 +92,7 @@ class GINet(nn.Layer):
 
															         return [
														
 
															             F.interpolate(
														
 
															-                logit, (h, w),
														
 
															+                logit, [h, w],
														
 
															                 mode='bilinear',
														
 
															                 align_corners=self.align_corners) for logit in logit_list
														
 
															         ]
														
--- a/paddlers/models/ppseg/models/glore.py
+++ b/paddlers/models/ppseg/models/glore.py
@@ -0,0 +1,198 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.utils import utils
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+class GloRe(nn.Layer):
														
 
															+    """
														
 
															+    The GloRe implementation based on PaddlePaddle.
														
 
															+
														
 
															+    The original article refers to:
														
 
															+       Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks"
														
 
															+       (https://arxiv.org/pdf/1811.12814.pdf)
														
 
															+    
														
 
															+    Args:
														
 
															+        num_classes (int): The unique number of target classes.
														
 
															+        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
														
 
															+        backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
														
 
															+        gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512.
														
 
															+        gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128.
														
 
															+        gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128.
														
 
															+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
														
 
															+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
														
 
															+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 backbone,
														
 
															+                 backbone_indices=(2, 3),
														
 
															+                 gru_channels=512,
														
 
															+                 gru_num_state=128,
														
 
															+                 gru_num_node=64,
														
 
															+                 enable_auxiliary_loss=True,
														
 
															+                 align_corners=False,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.backbone = backbone
														
 
															+        backbone_channels = [
														
 
															+            backbone.feat_channels[i] for i in backbone_indices
														
 
															+        ]
														
 
															+
														
 
															+        self.head = GloReHead(num_classes, backbone_indices, backbone_channels,
														
 
															+                              gru_channels, gru_num_state, gru_num_node,
														
 
															+                              enable_auxiliary_loss)
														
 
															+        self.align_corners = align_corners
														
 
															+        self.pretrained = pretrained
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat_list = self.backbone(x)
														
 
															+        logit_list = self.head(feat_list)
														
 
															+        return [
														
 
															+            F.interpolate(
														
 
															+                logit,
														
 
															+                paddle.shape(x)[2:],
														
 
															+                mode='bilinear',
														
 
															+                align_corners=self.align_corners) for logit in logit_list
														
 
															+        ]
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+
														
 
															+class GloReHead(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 backbone_indices,
														
 
															+                 backbone_channels,
														
 
															+                 gru_channels=512,
														
 
															+                 gru_num_state=128,
														
 
															+                 gru_num_node=64,
														
 
															+                 enable_auxiliary_loss=True):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        in_channels = backbone_channels[1]
														
 
															+        self.conv_bn_relu = layers.ConvBNReLU(
														
 
															+            in_channels, gru_channels, 1, bias_attr=False)
														
 
															+        self.gru_module = GruModule(
														
 
															+            num_input=gru_channels,
														
 
															+            num_state=gru_num_state,
														
 
															+            num_node=gru_num_node)
														
 
															+
														
 
															+        self.dropout = nn.Dropout(0.1)
														
 
															+        self.classifier = nn.Conv2D(512, num_classes, kernel_size=1)
														
 
															+        self.auxlayer = layers.AuxLayer(
														
 
															+            in_channels=backbone_channels[0],
														
 
															+            inter_channels=backbone_channels[0] // 4,
														
 
															+            out_channels=num_classes)
														
 
															+
														
 
															+        self.backbone_indices = backbone_indices
														
 
															+        self.enable_auxiliary_loss = enable_auxiliary_loss
														
 
															+
														
 
															+    def forward(self, feat_list):
														
 
															+
														
 
															+        logit_list = []
														
 
															+        x = feat_list[self.backbone_indices[1]]
														
 
															+
														
 
															+        feature = self.conv_bn_relu(x)
														
 
															+        gru_output = self.gru_module(feature)
														
 
															+        output = self.dropout(gru_output)
														
 
															+        logit = self.classifier(output)
														
 
															+        logit_list.append(logit)
														
 
															+
														
 
															+        if self.enable_auxiliary_loss:
														
 
															+            low_level_feat = feat_list[self.backbone_indices[0]]
														
 
															+            auxiliary_logit = self.auxlayer(low_level_feat)
														
 
															+            logit_list.append(auxiliary_logit)
														
 
															+
														
 
															+        return logit_list
														
 
															+
														
 
															+
														
 
															+class GCN(nn.Layer):
														
 
															+    def __init__(self, num_state, num_node, bias=False):
														
 
															+        super(GCN, self).__init__()
														
 
															+        self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1)
														
 
															+        self.relu = nn.ReLU()
														
 
															+        self.conv2 = nn.Conv1D(
														
 
															+            num_state, num_state, kernel_size=1, bias_attr=bias)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        h = self.conv1(paddle.transpose(x, perm=(0, 2, 1)))
														
 
															+        h = paddle.transpose(h, perm=(0, 2, 1))
														
 
															+        h = h + x
														
 
															+        h = self.relu(self.conv2(h))
														
 
															+        return h
														
 
															+
														
 
															+
														
 
															+class GruModule(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 num_input=512,
														
 
															+                 num_state=128,
														
 
															+                 num_node=64,
														
 
															+                 normalize=False):
														
 
															+        super(GruModule, self).__init__()
														
 
															+        self.normalize = normalize
														
 
															+        self.num_state = num_state
														
 
															+        self.num_node = num_node
														
 
															+        self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1)
														
 
															+        self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1)
														
 
															+        self.gcn = GCN(num_state=self.num_state, num_node=self.num_node)
														
 
															+        self.extend_dim = nn.Conv2D(
														
 
															+            self.num_state, num_input, kernel_size=1, bias_attr=False)
														
 
															+        self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        n, c, h, w = input.shape
														
 
															+        # B, C, H, W
														
 
															+        reduction_dim = self.reduction_dim(input)
														
 
															+        # B, N, H, W
														
 
															+        mat_B = self.projection_mat(input)
														
 
															+        # B, C, H*W
														
 
															+        reshaped_reduction = paddle.reshape(
														
 
															+            reduction_dim, shape=[n, self.num_state, h * w])
														
 
															+        # B, N, H*W
														
 
															+        reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w])
														
 
															+        # B, N, H*W
														
 
															+        reproject = reshaped_B
														
 
															+        # B, C, N
														
 
															+        node_state_V = paddle.matmul(
														
 
															+            reshaped_reduction, paddle.transpose(
														
 
															+                reshaped_B, perm=[0, 2, 1]))
														
 
															+
														
 
															+        if self.normalize:
														
 
															+            node_state_V = node_state_V * (1. / reshaped_reduction.shape[2])
														
 
															+
														
 
															+        # B, C, N
														
 
															+        gcn_out = self.gcn(node_state_V)
														
 
															+        # B, C, H*W
														
 
															+        Y = paddle.matmul(gcn_out, reproject)
														
 
															+        # B, C, H, W
														
 
															+        Y = paddle.reshape(Y, shape=[n, self.num_state, h, w])
														
 
															+        Y_extend = self.extend_dim(Y)
														
 
															+        Y_extend = self.extend_bn(Y_extend)
														
 
															+
														
 
															+        out = input + Y_extend
														
 
															+        return out
														
--- a/paddlers/models/ppseg/models/hardnet.py
+++ b/paddlers/models/ppseg/models/hardnet.py
@@ -31,6 +31,7 @@ class HarDNet(nn.Layer):
 
															     Args:
														
 
															         num_classes (int): The unique number of target classes.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
														
 
															         ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
														
 
															         grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
														
@@ -43,6 +44,7 @@ class HarDNet(nn.Layer):
 
															     def __init__(self,
														
 
															                  num_classes,
														
 
															+                 in_channels=3,
														
 
															                  stem_channels=(16, 24, 32, 48),
														
 
															                  ch_list=(64, 96, 160, 224, 320),
														
 
															                  grmul=1.7,
														
@@ -60,7 +62,7 @@ class HarDNet(nn.Layer):
 
															         self.stem = nn.Sequential(
														
 
															             layers.ConvBNReLU(
														
 
															-                3, stem_channels[0], kernel_size=3, bias_attr=False),
														
 
															+                in_channels, stem_channels[0], kernel_size=3, bias_attr=False),
														
 
															             layers.ConvBNReLU(
														
 
															                 stem_channels[0],
														
 
															                 stem_channels[1],
														
--- a/paddlers/models/ppseg/models/layers/__init__.py
+++ b/paddlers/models/ppseg/models/layers/__init__.py
@@ -12,9 +12,10 @@
 
															 # See the License for the specific language governing permissions and
														
 
															 # limitations under the License.
														
 
															-from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU
														
 
															+from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU, ConvBNAct, ConvBNLeakyReLU
														
 
															 from .activation import Activation
														
 
															 from .pyramid_pool import ASPPModule, PPModule
														
 
															 from .attention import AttentionBlock
														
 
															 from .nonlocal2d import NonLocal2D
														
 
															 from .wrap_functions import *
														
 
															+from .tensor_fusion import UAFM_SpAtten, UAFM_SpAtten_S, UAFM_ChAtten, UAFM_ChAtten_S, UAFM, UAFMMobile, UAFMMobile_SpAtten
														
--- a/paddlers/models/ppseg/models/layers/attention.py
+++ b/paddlers/models/ppseg/models/layers/attention.py
@@ -144,3 +144,129 @@ class AttentionBlock(nn.Layer):
 
															         if self.out_project is not None:
														
 
															             context = self.out_project(context)
														
 
															         return context
														
 
															+
														
 
															+
														
 
															+class DualAttentionModule(nn.Layer):
														
 
															+    """
														
 
															+    Dual attention module.
														
 
															+
														
 
															+    Args:
														
 
															+        in_channels (int): The number of input channels.
														
 
															+        out_channels (int): The number of output channels.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, in_channels, out_channels):
														
 
															+        super().__init__()
														
 
															+        inter_channels = in_channels // 4
														
 
															+
														
 
															+        self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
														
 
															+        self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
														
 
															+        self.pam = PAM(inter_channels)
														
 
															+        self.cam = CAM(inter_channels)
														
 
															+        self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
														
 
															+        self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
														
 
															+        self.conv3 = layers.ConvBNReLU(inter_channels, out_channels, 3)
														
 
															+
														
 
															+    def forward(self, feats):
														
 
															+        channel_feats = self.channel_conv(feats)
														
 
															+        channel_feats = self.cam(channel_feats)
														
 
															+        channel_feats = self.conv1(channel_feats)
														
 
															+
														
 
															+        position_feats = self.position_conv(feats)
														
 
															+        position_feats = self.pam(position_feats)
														
 
															+        position_feats = self.conv2(position_feats)
														
 
															+
														
 
															+        feats_sum = position_feats + channel_feats
														
 
															+        out = self.conv3(feats_sum)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class PAM(nn.Layer):
														
 
															+    """
														
 
															+    Position attention module.
														
 
															+    Args:
														
 
															+        in_channels (int): The number of input channels.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, in_channels):
														
 
															+        super().__init__()
														
 
															+        mid_channels = in_channels // 8
														
 
															+        self.mid_channels = mid_channels
														
 
															+        self.in_channels = in_channels
														
 
															+
														
 
															+        self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
														
 
															+        self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
														
 
															+        self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1)
														
 
															+
														
 
															+        self.gamma = self.create_parameter(
														
 
															+            shape=[1],
														
 
															+            dtype='float32',
														
 
															+            default_initializer=nn.initializer.Constant(0))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x_shape = paddle.shape(x)
														
 
															+
														
 
															+        # query: n, h * w, c1
														
 
															+        query = self.query_conv(x)
														
 
															+        query = paddle.reshape(query, (0, self.mid_channels, -1))
														
 
															+        query = paddle.transpose(query, (0, 2, 1))
														
 
															+
														
 
															+        # key: n, c1, h * w
														
 
															+        key = self.key_conv(x)
														
 
															+        key = paddle.reshape(key, (0, self.mid_channels, -1))
														
 
															+
														
 
															+        # sim: n, h * w, h * w
														
 
															+        sim = paddle.bmm(query, key)
														
 
															+        sim = F.softmax(sim, axis=-1)
														
 
															+
														
 
															+        value = self.value_conv(x)
														
 
															+        value = paddle.reshape(value, (0, self.in_channels, -1))
														
 
															+        sim = paddle.transpose(sim, (0, 2, 1))
														
 
															+
														
 
															+        # feat: from (n, c2, h * w) -> (n, c2, h, w)
														
 
															+        feat = paddle.bmm(value, sim)
														
 
															+        feat = paddle.reshape(feat,
														
 
															+                              (0, self.in_channels, x_shape[2], x_shape[3]))
														
 
															+
														
 
															+        out = self.gamma * feat + x
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class CAM(nn.Layer):
														
 
															+    """
														
 
															+    Channel attention module.
														
 
															+    Args:
														
 
															+        in_channels (int): The number of input channels.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, channels):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.channels = channels
														
 
															+        self.gamma = self.create_parameter(
														
 
															+            shape=[1],
														
 
															+            dtype='float32',
														
 
															+            default_initializer=nn.initializer.Constant(0))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x_shape = paddle.shape(x)
														
 
															+        # query: n, c, h * w
														
 
															+        query = paddle.reshape(x, (0, self.channels, -1))
														
 
															+        # key: n, h * w, c
														
 
															+        key = paddle.reshape(x, (0, self.channels, -1))
														
 
															+        key = paddle.transpose(key, (0, 2, 1))
														
 
															+
														
 
															+        # sim: n, c, c
														
 
															+        sim = paddle.bmm(query, key)
														
 
															+        # The danet author claims that this can avoid gradient divergence
														
 
															+        sim = paddle.max(sim, axis=-1, keepdim=True).tile(
														
 
															+            [1, 1, self.channels]) - sim
														
 
															+        sim = F.softmax(sim, axis=-1)
														
 
															+
														
 
															+        # feat: from (n, c, h * w) to (n, c, h, w)
														
 
															+        value = paddle.reshape(x, (0, self.channels, -1))
														
 
															+        feat = paddle.bmm(sim, value)
														
 
															+        feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3]))
														
 
															+
														
 
															+        out = self.gamma * feat + x
														
 
															+        return out
														
--- a/paddlers/models/ppseg/models/layers/layer_libs.py
+++ b/paddlers/models/ppseg/models/layers/layer_libs.py
@@ -56,6 +56,37 @@ class ConvBNReLU(nn.Layer):
 
															         return x
														
 
															+class ConvBNAct(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 out_channels,
														
 
															+                 kernel_size,
														
 
															+                 padding='same',
														
 
															+                 act_type=None,
														
 
															+                 **kwargs):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self._conv = nn.Conv2D(
														
 
															+            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
														
 
															+
														
 
															+        if 'data_format' in kwargs:
														
 
															+            data_format = kwargs['data_format']
														
 
															+        else:
														
 
															+            data_format = 'NCHW'
														
 
															+        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
														
 
															+
														
 
															+        self._act_type = act_type
														
 
															+        if act_type is not None:
														
 
															+            self._act = layers.Activation(act_type)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self._conv(x)
														
 
															+        x = self._batch_norm(x)
														
 
															+        if self._act_type is not None:
														
 
															+            x = self._act(x)
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															 class ConvBN(nn.Layer):
														
 
															     def __init__(self,
														
 
															                  in_channels,
														
@@ -293,3 +324,29 @@ class ConvBNPReLU(nn.Layer):
 
															         x = self._batch_norm(x)
														
 
															         x = self._prelu(x)
														
 
															         return x
														
 
															+
														
 
															+
														
 
															+class ConvBNLeakyReLU(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 out_channels,
														
 
															+                 kernel_size,
														
 
															+                 padding='same',
														
 
															+                 **kwargs):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self._conv = nn.Conv2D(
														
 
															+            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
														
 
															+
														
 
															+        if 'data_format' in kwargs:
														
 
															+            data_format = kwargs['data_format']
														
 
															+        else:
														
 
															+            data_format = 'NCHW'
														
 
															+        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
														
 
															+        self._relu = layers.Activation("leakyrelu")
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self._conv(x)
														
 
															+        x = self._batch_norm(x)
														
 
															+        x = self._relu(x)
														
 
															+        return x
														
--- a/paddlers/models/ppseg/models/layers/tensor_fusion.py
+++ b/paddlers/models/ppseg/models/layers/tensor_fusion.py
@@ -0,0 +1,285 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+from paddle import ParamAttr
														
 
															+from paddle.nn.initializer import Constant
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.models.layers import tensor_fusion_helper as helper
														
 
															+
														
 
															+
														
 
															+class UAFM(nn.Layer):
														
 
															+    """
														
 
															+    The base of Unified Attention Fusion Module.
														
 
															+    Args:
														
 
															+        x_ch (int): The channel of x tensor, which is the low level feature.
														
 
															+        y_ch (int): The channel of y tensor, which is the high level feature.
														
 
															+        out_ch (int): The channel of output tensor.
														
 
															+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
														
 
															+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.conv_x = layers.ConvBNReLU(
														
 
															+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
														
 
															+        self.conv_out = layers.ConvBNReLU(
														
 
															+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
														
 
															+        self.resize_mode = resize_mode
														
 
															+
														
 
															+    def check(self, x, y):
														
 
															+        assert x.ndim == 4 and y.ndim == 4
														
 
															+        x_h, x_w = x.shape[2:]
														
 
															+        y_h, y_w = y.shape[2:]
														
 
															+        assert x_h >= y_h and x_w >= y_w
														
 
															+
														
 
															+    def prepare(self, x, y):
														
 
															+        x = self.prepare_x(x, y)
														
 
															+        y = self.prepare_y(x, y)
														
 
															+        return x, y
														
 
															+
														
 
															+    def prepare_x(self, x, y):
														
 
															+        x = self.conv_x(x)
														
 
															+        return x
														
 
															+
														
 
															+    def prepare_y(self, x, y):
														
 
															+        y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode)
														
 
															+        return y_up
														
 
															+
														
 
															+    def fuse(self, x, y):
														
 
															+        out = x + y
														
 
															+        out = self.conv_out(out)
														
 
															+        return out
														
 
															+
														
 
															+    def forward(self, x, y):
														
 
															+        """
														
 
															+        Args:
														
 
															+            x (Tensor): The low level feature.
														
 
															+            y (Tensor): The high level feature.
														
 
															+        """
														
 
															+        self.check(x, y)
														
 
															+        x, y = self.prepare(x, y)
														
 
															+        out = self.fuse(x, y)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class UAFM_ChAtten(UAFM):
														
 
															+    """
														
 
															+    The UAFM with channel attention, which uses mean and max values.
														
 
															+    Args:
														
 
															+        x_ch (int): The channel of x tensor, which is the low level feature.
														
 
															+        y_ch (int): The channel of y tensor, which is the high level feature.
														
 
															+        out_ch (int): The channel of output tensor.
														
 
															+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
														
 
															+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
														
 
															+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
														
 
															+
														
 
															+        self.conv_xy_atten = nn.Sequential(
														
 
															+            layers.ConvBNAct(
														
 
															+                4 * y_ch,
														
 
															+                y_ch // 2,
														
 
															+                kernel_size=1,
														
 
															+                bias_attr=False,
														
 
															+                act_type="leakyrelu"),
														
 
															+            layers.ConvBN(
														
 
															+                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
														
 
															+
														
 
															+    def fuse(self, x, y):
														
 
															+        """
														
 
															+        Args:
														
 
															+            x (Tensor): The low level feature.
														
 
															+            y (Tensor): The high level feature.
														
 
															+        """
														
 
															+        atten = helper.avg_max_reduce_hw([x, y], self.training)
														
 
															+        atten = F.sigmoid(self.conv_xy_atten(atten))
														
 
															+
														
 
															+        out = x * atten + y * (1 - atten)
														
 
															+        out = self.conv_out(out)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class UAFM_ChAtten_S(UAFM):
														
 
															+    """
														
 
															+    The UAFM with channel attention, which uses mean values.
														
 
															+    Args:
														
 
															+        x_ch (int): The channel of x tensor, which is the low level feature.
														
 
															+        y_ch (int): The channel of y tensor, which is the high level feature.
														
 
															+        out_ch (int): The channel of output tensor.
														
 
															+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
														
 
															+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
														
 
															+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
														
 
															+
														
 
															+        self.conv_xy_atten = nn.Sequential(
														
 
															+            layers.ConvBNAct(
														
 
															+                2 * y_ch,
														
 
															+                y_ch // 2,
														
 
															+                kernel_size=1,
														
 
															+                bias_attr=False,
														
 
															+                act_type="leakyrelu"),
														
 
															+            layers.ConvBN(
														
 
															+                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
														
 
															+
														
 
															+    def fuse(self, x, y):
														
 
															+        """
														
 
															+        Args:
														
 
															+            x (Tensor): The low level feature.
														
 
															+            y (Tensor): The high level feature.
														
 
															+        """
														
 
															+        atten = helper.avg_reduce_hw([x, y])
														
 
															+        atten = F.sigmoid(self.conv_xy_atten(atten))
														
 
															+
														
 
															+        out = x * atten + y * (1 - atten)
														
 
															+        out = self.conv_out(out)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class UAFM_SpAtten(UAFM):
														
 
															+    """
														
 
															+    The UAFM with spatial attention, which uses mean and max values.
														
 
															+    Args:
														
 
															+        x_ch (int): The channel of x tensor, which is the low level feature.
														
 
															+        y_ch (int): The channel of y tensor, which is the high level feature.
														
 
															+        out_ch (int): The channel of output tensor.
														
 
															+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
														
 
															+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
														
 
															+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
														
 
															+
														
 
															+        self.conv_xy_atten = nn.Sequential(
														
 
															+            layers.ConvBNReLU(
														
 
															+                4, 2, kernel_size=3, padding=1, bias_attr=False),
														
 
															+            layers.ConvBN(
														
 
															+                2, 1, kernel_size=3, padding=1, bias_attr=False))
														
 
															+        self._scale = self.create_parameter(
														
 
															+            shape=[1],
														
 
															+            attr=ParamAttr(initializer=Constant(value=1.)),
														
 
															+            dtype="float32")
														
 
															+        self._scale.stop_gradient = True
														
 
															+
														
 
															+    def fuse(self, x, y):
														
 
															+        """
														
 
															+        Args:
														
 
															+            x (Tensor): The low level feature.
														
 
															+            y (Tensor): The high level feature.
														
 
															+        """
														
 
															+        atten = helper.avg_max_reduce_channel([x, y])
														
 
															+        atten = F.sigmoid(self.conv_xy_atten(atten))
														
 
															+
														
 
															+        out = x * atten + y * (self._scale - atten)
														
 
															+        out = self.conv_out(out)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class UAFM_SpAtten_S(UAFM):
														
 
															+    """
														
 
															+    The UAFM with spatial attention, which uses mean values.
														
 
															+    Args:
														
 
															+        x_ch (int): The channel of x tensor, which is the low level feature.
														
 
															+        y_ch (int): The channel of y tensor, which is the high level feature.
														
 
															+        out_ch (int): The channel of output tensor.
														
 
															+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
														
 
															+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
														
 
															+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
														
 
															+
														
 
															+        self.conv_xy_atten = nn.Sequential(
														
 
															+            layers.ConvBNReLU(
														
 
															+                2, 2, kernel_size=3, padding=1, bias_attr=False),
														
 
															+            layers.ConvBN(
														
 
															+                2, 1, kernel_size=3, padding=1, bias_attr=False))
														
 
															+
														
 
															+    def fuse(self, x, y):
														
 
															+        """
														
 
															+        Args:
														
 
															+            x (Tensor): The low level feature.
														
 
															+            y (Tensor): The high level feature.
														
 
															+        """
														
 
															+        atten = helper.avg_reduce_channel([x, y])
														
 
															+        atten = F.sigmoid(self.conv_xy_atten(atten))
														
 
															+
														
 
															+        out = x * atten + y * (1 - atten)
														
 
															+        out = self.conv_out(out)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class UAFMMobile(UAFM):
														
 
															+    """
														
 
															+    Unified Attention Fusion Module for mobile.
														
 
															+    Args:
														
 
															+        x_ch (int): The channel of x tensor, which is the low level feature.
														
 
															+        y_ch (int): The channel of y tensor, which is the high level feature.
														
 
															+        out_ch (int): The channel of output tensor.
														
 
															+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
														
 
															+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
														
 
															+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
														
 
															+
														
 
															+        self.conv_x = layers.SeparableConvBNReLU(
														
 
															+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
														
 
															+        self.conv_out = layers.SeparableConvBNReLU(
														
 
															+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
														
 
															+
														
 
															+
														
 
															+class UAFMMobile_SpAtten(UAFM):
														
 
															+    """
														
 
															+    Unified Attention Fusion Module with spatial attention for mobile.
														
 
															+    Args:
														
 
															+        x_ch (int): The channel of x tensor, which is the low level feature.
														
 
															+        y_ch (int): The channel of y tensor, which is the high level feature.
														
 
															+        out_ch (int): The channel of output tensor.
														
 
															+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
														
 
															+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
														
 
															+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
														
 
															+
														
 
															+        self.conv_x = layers.SeparableConvBNReLU(
														
 
															+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
														
 
															+        self.conv_out = layers.SeparableConvBNReLU(
														
 
															+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
														
 
															+
														
 
															+        self.conv_xy_atten = nn.Sequential(
														
 
															+            layers.ConvBNReLU(
														
 
															+                4, 2, kernel_size=3, padding=1, bias_attr=False),
														
 
															+            layers.ConvBN(
														
 
															+                2, 1, kernel_size=3, padding=1, bias_attr=False))
														
 
															+
														
 
															+    def fuse(self, x, y):
														
 
															+        """
														
 
															+        Args:
														
 
															+            x (Tensor): The low level feature.
														
 
															+            y (Tensor): The high level feature.
														
 
															+        """
														
 
															+        atten = helper.avg_max_reduce_channel([x, y])
														
 
															+        atten = F.sigmoid(self.conv_xy_atten(atten))
														
 
															+
														
 
															+        out = x * atten + y * (1 - atten)
														
 
															+        out = self.conv_out(out)
														
 
															+        return out
														
--- a/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
+++ b/paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
@@ -0,0 +1,133 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+
														
 
															+def avg_reduce_hw(x):
														
 
															+    # Reduce hw by avg
														
 
															+    # Return cat([avg_pool_0, avg_pool_1, ...])
														
 
															+    if not isinstance(x, (list, tuple)):
														
 
															+        return F.adaptive_avg_pool2d(x, 1)
														
 
															+    elif len(x) == 1:
														
 
															+        return F.adaptive_avg_pool2d(x[0], 1)
														
 
															+    else:
														
 
															+        res = []
														
 
															+        for xi in x:
														
 
															+            res.append(F.adaptive_avg_pool2d(xi, 1))
														
 
															+        return paddle.concat(res, axis=1)
														
 
															+
														
 
															+
														
 
															+def avg_max_reduce_hw_helper(x, is_training, use_concat=True):
														
 
															+    assert not isinstance(x, (list, tuple))
														
 
															+    avg_pool = F.adaptive_avg_pool2d(x, 1)
														
 
															+    # TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training.
														
 
															+    if is_training:
														
 
															+        max_pool = F.adaptive_max_pool2d(x, 1)
														
 
															+    else:
														
 
															+        max_pool = paddle.max(x, axis=[2, 3], keepdim=True)
														
 
															+
														
 
															+    if use_concat:
														
 
															+        res = paddle.concat([avg_pool, max_pool], axis=1)
														
 
															+    else:
														
 
															+        res = [avg_pool, max_pool]
														
 
															+    return res
														
 
															+
														
 
															+
														
 
															+def avg_max_reduce_hw(x, is_training):
														
 
															+    # Reduce hw by avg and max
														
 
															+    # Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...])
														
 
															+    if not isinstance(x, (list, tuple)):
														
 
															+        return avg_max_reduce_hw_helper(x, is_training)
														
 
															+    elif len(x) == 1:
														
 
															+        return avg_max_reduce_hw_helper(x[0], is_training)
														
 
															+    else:
														
 
															+        res_avg = []
														
 
															+        res_max = []
														
 
															+        for xi in x:
														
 
															+            avg, max = avg_max_reduce_hw_helper(xi, is_training, False)
														
 
															+            res_avg.append(avg)
														
 
															+            res_max.append(max)
														
 
															+        res = res_avg + res_max
														
 
															+        return paddle.concat(res, axis=1)
														
 
															+
														
 
															+
														
 
															+def avg_reduce_channel(x):
														
 
															+    # Reduce channel by avg
														
 
															+    # Return cat([avg_ch_0, avg_ch_1, ...])
														
 
															+    if not isinstance(x, (list, tuple)):
														
 
															+        return paddle.mean(x, axis=1, keepdim=True)
														
 
															+    elif len(x) == 1:
														
 
															+        return paddle.mean(x[0], axis=1, keepdim=True)
														
 
															+    else:
														
 
															+        res = []
														
 
															+        for xi in x:
														
 
															+            res.append(paddle.mean(xi, axis=1, keepdim=True))
														
 
															+        return paddle.concat(res, axis=1)
														
 
															+
														
 
															+
														
 
															+def max_reduce_channel(x):
														
 
															+    # Reduce channel by max
														
 
															+    # Return cat([max_ch_0, max_ch_1, ...])
														
 
															+    if not isinstance(x, (list, tuple)):
														
 
															+        return paddle.max(x, axis=1, keepdim=True)
														
 
															+    elif len(x) == 1:
														
 
															+        return paddle.max(x[0], axis=1, keepdim=True)
														
 
															+    else:
														
 
															+        res = []
														
 
															+        for xi in x:
														
 
															+            res.append(paddle.max(xi, axis=1, keepdim=True))
														
 
															+        return paddle.concat(res, axis=1)
														
 
															+
														
 
															+
														
 
															+def avg_max_reduce_channel_helper(x, use_concat=True):
														
 
															+    # Reduce hw by avg and max, only support single input
														
 
															+    assert not isinstance(x, (list, tuple))
														
 
															+    mean_value = paddle.mean(x, axis=1, keepdim=True)
														
 
															+    max_value = paddle.max(x, axis=1, keepdim=True)
														
 
															+
														
 
															+    if use_concat:
														
 
															+        res = paddle.concat([mean_value, max_value], axis=1)
														
 
															+    else:
														
 
															+        res = [mean_value, max_value]
														
 
															+    return res
														
 
															+
														
 
															+
														
 
															+def avg_max_reduce_channel(x):
														
 
															+    # Reduce hw by avg and max
														
 
															+    # Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
														
 
															+    if not isinstance(x, (list, tuple)):
														
 
															+        return avg_max_reduce_channel_helper(x)
														
 
															+    elif len(x) == 1:
														
 
															+        return avg_max_reduce_channel_helper(x[0])
														
 
															+    else:
														
 
															+        res = []
														
 
															+        for xi in x:
														
 
															+            res.extend(avg_max_reduce_channel_helper(xi, False))
														
 
															+        return paddle.concat(res, axis=1)
														
 
															+
														
 
															+
														
 
															+def cat_avg_max_reduce_channel(x):
														
 
															+    # Reduce hw by cat+avg+max
														
 
															+    assert isinstance(x, (list, tuple)) and len(x) > 1
														
 
															+
														
 
															+    x = paddle.concat(x, axis=1)
														
 
															+
														
 
															+    mean_value = paddle.mean(x, axis=1, keepdim=True)
														
 
															+    max_value = paddle.max(x, axis=1, keepdim=True)
														
 
															+    res = paddle.concat([mean_value, max_value], axis=1)
														
 
															+
														
 
															+    return res
														
--- a/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
@@ -99,7 +99,7 @@ class BCELoss(nn.Layer):
 
															                     raise ValueError(
														
 
															                         "if type of `weight` is str, it should equal to 'dynamic', but it is {}"
														
 
															                         .format(self.weight))
														
 
															-            elif isinstance(self.weight, paddle.VarBase):
														
 
															+            elif not isinstance(self.weight, paddle.Tensor):
														
 
															                 raise TypeError(
														
 
															                     'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
														
 
															                     .format(type(self.weight)))
														
--- a/paddlers/models/ppseg/models/losses/cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/cross_entropy_loss.py
@@ -78,8 +78,6 @@ class CrossEntropyLoss(nn.Layer):
 
															             logit = paddle.transpose(logit, [0, 2, 3, 1])
														
 
															         label = label.astype('int64')
														
 
															-        # In F.cross_entropy, the ignore_index is invalid, which needs to be fixed.
														
 
															-        # When there is 255 in the label and paddle version <= 2.1.3, the cross_entropy OP will report an error, which is fixed in paddle develop version.
														
 
															         loss = F.cross_entropy(
														
 
															             logit,
														
 
															             label,
														
@@ -121,7 +119,7 @@ class CrossEntropyLoss(nn.Layer):
 
															             loss = loss * semantic_weights
														
 
															         if self.weight is not None:
														
 
															-            _one_hot = F.one_hot(label, logit.shape[-1])
														
 
															+            _one_hot = F.one_hot(label * mask, logit.shape[-1])
														
 
															             coef = paddle.sum(_one_hot * self.weight, axis=-1)
														
 
															         else:
														
 
															             coef = paddle.ones_like(label)
														
--- a/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
+++ b/paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
@@ -16,7 +16,7 @@ import numpy as np
 
															 import paddle
														
 
															 from paddle import nn
														
 
															 import paddle.nn.functional as F
														
 
															-from scipy.ndimage.interpolation import shift
														
 
															+from scipy.ndimage import shift
														
 
															 from paddlers.models.ppseg.cvlibs import manager
														
--- a/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
+++ b/paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
--- a/paddlers/models/ppseg/models/losses/dice_loss.py
+++ b/paddlers/models/ppseg/models/losses/dice_loss.py
@@ -19,38 +19,59 @@ from paddlers.models.ppseg.cvlibs import manager
 
															 @manager.LOSSES.add_component
														
 
															 class DiceLoss(nn.Layer):
														
 
															     """
														
 
															-    Implements the dice loss function.
														
 
															+    The implements of the dice loss.
														
 
															     Args:
														
 
															-        ignore_index (int64): Specifies a target value that is ignored
														
 
															-            and does not contribute to the input gradient. Default ``255``.
														
 
															-        smooth (float32): laplace smoothing,
														
 
															-            to smooth dice loss and accelerate convergence. following:
														
 
															-            https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
														
 
															+        weight (list[float], optional): The weight for each class. Default: None.
														
 
															+        ignore_index (int64): ignore_index (int64, optional): Specifies a target value that
														
 
															+            is ignored and does not contribute to the input gradient. Default ``255``.
														
 
															+        smooth (float32): Laplace smoothing to smooth dice loss and accelerate convergence.
														
 
															+            Default: 1.0
														
 
															     """
														
 
															-    def __init__(self, ignore_index=255, smooth=0.):
														
 
															-        super(DiceLoss, self).__init__()
														
 
															+    def __init__(self, weight=None, ignore_index=255, smooth=1.0):
														
 
															+        super().__init__()
														
 
															+        self.weight = weight
														
 
															         self.ignore_index = ignore_index
														
 
															-        self.eps = 1e-5
														
 
															         self.smooth = smooth
														
 
															+        self.eps = 1e-8
														
 
															     def forward(self, logits, labels):
														
 
															-        labels = paddle.cast(labels, dtype='int32')
														
 
															-        labels_one_hot = F.one_hot(labels, num_classes=logits.shape[1])
														
 
															-        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
														
 
															-        labels_one_hot = paddle.cast(labels_one_hot, dtype='float32')
														
 
															+        num_class = logits.shape[1]
														
 
															+        if self.weight is not None:
														
 
															+            assert num_class == len(self.weight), \
														
 
															+                "The lenght of weight should be euqal to the num class"
														
 
															+
														
 
															+        mask = labels != self.ignore_index
														
 
															+        mask = paddle.cast(paddle.unsqueeze(mask, 1), 'float32')
														
 
															+        labels[labels == self.ignore_index] = 0
														
 
															+        labels_one_hot = F.one_hot(labels, num_class)
														
 
															+        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
														
 
															         logits = F.softmax(logits, axis=1)
														
 
															-        mask = (paddle.unsqueeze(labels, 1) != self.ignore_index)
														
 
															-        logits = logits * mask
														
 
															-        labels_one_hot = labels_one_hot * mask
														
 
															+        dice_loss = 0.0
														
 
															+        for i in range(num_class):
														
 
															+            dice_loss_i = dice_loss_helper(logits[:, i], labels_one_hot[:, i],
														
 
															+                                           mask, self.smooth, self.eps)
														
 
															+            if self.weight is not None:
														
 
															+                dice_loss_i *= self.weight[i]
														
 
															+            dice_loss += dice_loss_i
														
 
															+        dice_loss = dice_loss / num_class
														
 
															+
														
 
															+        return dice_loss
														
 
															-        dims = (0, ) + tuple(range(2, labels.ndimension() + 1))
														
 
															-        intersection = paddle.sum(logits * labels_one_hot, dims)
														
 
															-        cardinality = paddle.sum(logits + labels_one_hot, dims)
														
 
															-        dice_loss = ((2. * intersection + self.smooth) /
														
 
															-                     (cardinality + self.eps + self.smooth)).mean()
														
 
															-        return 1 - dice_loss
														
 
															+def dice_loss_helper(logit, label, mask, smooth, eps):
														
 
															+    assert logit.shape == label.shape, \
														
 
															+        "The shape of logit and label should be the same"
														
 
															+    logit = paddle.reshape(logit, [0, -1])
														
 
															+    label = paddle.reshape(label, [0, -1])
														
 
															+    mask = paddle.reshape(mask, [0, -1])
														
 
															+    logit *= mask
														
 
															+    label *= mask
														
 
															+    intersection = paddle.sum(logit * label, axis=1)
														
 
															+    cardinality = paddle.sum(logit + label, axis=1)
														
 
															+    dice_loss = 1 - (2 * intersection + smooth) / (cardinality + smooth + eps)
														
 
															+    dice_loss = dice_loss.mean()
														
 
															+    return dice_loss
														
--- a/paddlers/models/ppseg/models/losses/focal_loss.py
+++ b/paddlers/models/ppseg/models/losses/focal_loss.py
@@ -23,38 +23,110 @@ from paddlers.models.ppseg.cvlibs import manager
 
															 @manager.LOSSES.add_component
														
 
															 class FocalLoss(nn.Layer):
														
 
															     """
														
 
															-    Focal Loss.
														
 
															+    The implement of focal loss.
														
 
															-    Code referenced from:
														
 
															-    https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
														
 
															+    The focal loss requires the label is 0 or 1 for now.
														
 
															     Args:
														
 
															-        gamma (float): the coefficient of Focal Loss.
														
 
															-        ignore_index (int64): Specifies a target value that is ignored
														
 
															+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
														
 
															+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
														
 
															+        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
														
 
															+        ignore_index (int64, optional): Specifies a target value that is ignored
														
 
															             and does not contribute to the input gradient. Default ``255``.
														
 
															     """
														
 
															-    def __init__(self, gamma=2.0, ignore_index=255, edge_label=False):
														
 
															-        super(FocalLoss, self).__init__()
														
 
															+    def __init__(self, alpha=0.25, gamma=2.0, ignore_index=255):
														
 
															+        super().__init__()
														
 
															+        self.alpha = alpha
														
 
															         self.gamma = gamma
														
 
															         self.ignore_index = ignore_index
														
 
															-        self.edge_label = edge_label
														
 
															+        self.EPS = 1e-10
														
 
															     def forward(self, logit, label):
														
 
															-        logit = paddle.reshape(
														
 
															-            logit, [logit.shape[0], logit.shape[1], -1])  # N,C,H,W => N,C,H*W
														
 
															-        logit = paddle.transpose(logit, [0, 2, 1])  # N,C,H*W => N,H*W,C
														
 
															-        logit = paddle.reshape(logit,
														
 
															-                               [-1, logit.shape[2]])  # N,H*W,C => N*H*W,C
														
 
															-        label = paddle.reshape(label, [-1, 1])
														
 
															-        range_ = paddle.arange(0, label.shape[0])
														
 
															-        range_ = paddle.unsqueeze(range_, axis=-1)
														
 
															-        label = paddle.cast(label, dtype='int64')
														
 
															-        label = paddle.concat([range_, label], axis=-1)
														
 
															-        logpt = F.log_softmax(logit)
														
 
															-        logpt = paddle.gather_nd(logpt, label)
														
 
															-
														
 
															-        pt = paddle.exp(logpt.detach())
														
 
															-        loss = -1 * (1 - pt)**self.gamma * logpt
														
 
															-        loss = paddle.mean(loss)
														
 
															-        return loss
														
 
															+        """
														
 
															+        Forward computation.
														
 
															+
														
 
															+        Args:
														
 
															+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
														
 
															+                (N, C, H, W), where C is number of classes.
														
 
															+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
														
 
															+                where each value is 0 <= label[i] <= C-1.
														
 
															+        Returns:
														
 
															+            (Tensor): The average loss.
														
 
															+        """
														
 
															+        assert logit.ndim == 4, "The ndim of logit should be 4."
														
 
															+        assert logit.shape[1] == 2, "The channel of logit should be 2."
														
 
															+        assert label.ndim == 3, "The ndim of label should be 3."
														
 
															+
														
 
															+        class_num = logit.shape[1]  # class num is 2
														
 
															+        logit = paddle.transpose(logit, [0, 2, 3, 1])  # N,C,H,W => N,H,W,C
														
 
															+
														
 
															+        mask = label != self.ignore_index  # N,H,W
														
 
															+        mask = paddle.unsqueeze(mask, 3)
														
 
															+        mask = paddle.cast(mask, 'float32')
														
 
															+        mask.stop_gradient = True
														
 
															+
														
 
															+        label = F.one_hot(label, class_num)  # N,H,W,C
														
 
															+        label = paddle.cast(label, logit.dtype)
														
 
															+        label.stop_gradient = True
														
 
															+
														
 
															+        loss = F.sigmoid_focal_loss(
														
 
															+            logit=logit,
														
 
															+            label=label,
														
 
															+            alpha=self.alpha,
														
 
															+            gamma=self.gamma,
														
 
															+            reduction='none')
														
 
															+        loss = loss * mask
														
 
															+        avg_loss = paddle.sum(loss) / (
														
 
															+            paddle.sum(paddle.cast(mask != 0., 'int32')) * class_num + self.EPS)
														
 
															+        return avg_loss
														
 
															+
														
 
															+
														
 
															+@manager.LOSSES.add_component
														
 
															+class MultiClassFocalLoss(nn.Layer):
														
 
															+    """
														
 
															+    The implement of focal loss for multi class.
														
 
															+
														
 
															+    Args:
														
 
															+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
														
 
															+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
														
 
															+        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
														
 
															+        ignore_index (int64, optional): Specifies a target value that is ignored
														
 
															+            and does not contribute to the input gradient. Default ``255``.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, num_class, alpha=1.0, gamma=2.0, ignore_index=255):
														
 
															+        super().__init__()
														
 
															+        self.num_class = num_class
														
 
															+        self.alpha = alpha
														
 
															+        self.gamma = gamma
														
 
															+        self.ignore_index = ignore_index
														
 
															+        self.EPS = 1e-10
														
 
															+
														
 
															+    def forward(self, logit, label):
														
 
															+        """
														
 
															+        Forward computation.
														
 
															+
														
 
															+        Args:
														
 
															+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
														
 
															+                (N, C, H, W), where C is number of classes.
														
 
															+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
														
 
															+                where each value is 0 <= label[i] <= C-1.
														
 
															+        Returns:
														
 
															+            (Tensor): The average loss.
														
 
															+        """
														
 
															+        assert logit.ndim == 4, "The ndim of logit should be 4."
														
 
															+        assert label.ndim == 3, "The ndim of label should be 3."
														
 
															+
														
 
															+        logit = paddle.transpose(logit, [0, 2, 3, 1])
														
 
															+        label = label.astype('int64')
														
 
															+        ce_loss = F.cross_entropy(
														
 
															+            logit, label, ignore_index=self.ignore_index, reduction='none')
														
 
															+
														
 
															+        pt = paddle.exp(-ce_loss)
														
 
															+        focal_loss = self.alpha * ((1 - pt)**self.gamma) * ce_loss
														
 
															+
														
 
															+        mask = paddle.cast(label != self.ignore_index, 'float32')
														
 
															+        focal_loss *= mask
														
 
															+        avg_loss = paddle.mean(focal_loss) / (paddle.mean(mask) + self.EPS)
														
 
															+        return avg_loss
														
--- a/paddlers/models/ppseg/models/losses/l1_loss.py
+++ b/paddlers/models/ppseg/models/losses/l1_loss.py
@@ -74,3 +74,25 @@ class L1Loss(nn.L1Loss):
 
															     def __init__(self, reduction='mean', ignore_index=255):
														
 
															         super().__init__(reduction=reduction)
														
 
															+        self.ignore_index = ignore_index
														
 
															+        self.EPS = 1e-10
														
 
															+
														
 
															+    def forward(self, input, label):
														
 
															+        mask = label != self.ignore_index
														
 
															+        mask = paddle.cast(mask, "float32")
														
 
															+        label.stop_gradient = True
														
 
															+        mask.stop_gradient = True
														
 
															+
														
 
															+        output = paddle.nn.functional.l1_loss(
														
 
															+            input, label, "none", name=self.name) * mask
														
 
															+
														
 
															+        if self.reduction == "mean":
														
 
															+            return paddle.mean(output) / (paddle.mean(mask) + self.EPS)
														
 
															+        elif self.reduction == "none":
														
 
															+            return output
														
 
															+        elif self.reduction == "sum":
														
 
															+            return paddle.sum(output)
														
 
															+        else:
														
 
															+            raise ValueError(
														
 
															+                "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
														
 
															+                "received %s, which is not allowed." % self.reduction)
														
--- a/paddlers/models/ppseg/models/losses/lovasz_loss.py
+++ b/paddlers/models/ppseg/models/losses/lovasz_loss.py
@@ -124,8 +124,12 @@ def lovasz_hinge_flat(logits, labels):
 
															     signs = 2. * labels - 1.
														
 
															     signs.stop_gradient = True
														
 
															     errors = 1. - logits * signs
														
 
															-    errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
														
 
															-                                                        'descending', True)
														
 
															+    if hasattr(paddle, "_legacy_C_ops"):
														
 
															+        errors_sorted, perm = paddle._legacy_C_ops.argsort(errors, 'axis', 0,
														
 
															+                                                           'descending', True)
														
 
															+    else:
														
 
															+        errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
														
 
															+                                                    'descending', True)
														
 
															     errors_sorted.stop_gradient = False
														
 
															     gt_sorted = paddle.gather(labels, perm)
														
 
															     grad = lovasz_grad(gt_sorted)
														
@@ -181,8 +185,12 @@ def lovasz_softmax_flat(probas, labels, classes='present'):
 
															         else:
														
 
															             class_pred = probas[:, c]
														
 
															         errors = paddle.abs(fg - class_pred)
														
 
															-        errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
														
 
															-                                                            'descending', True)
														
 
															+        if hasattr(paddle, "_legacy_C_ops"):
														
 
															+            errors_sorted, perm = paddle._legacy_C_ops.argsort(
														
 
															+                errors, 'axis', 0, 'descending', True)
														
 
															+        else:
														
 
															+            errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
														
 
															+                                                        'descending', True)
														
 
															         errors_sorted.stop_gradient = False
														
 
															         fg_sorted = paddle.gather(fg, perm)
														
--- a/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
@@ -55,7 +55,7 @@ class OhemCrossEntropyLoss(nn.Layer):
 
															         # get the label after ohem
														
 
															         n, c, h, w = logit.shape
														
 
															-        label = label.reshape((-1, ))
														
 
															+        label = label.reshape((-1, )).astype('int64')
														
 
															         valid_mask = (label != self.ignore_index).astype('int64')
														
 
															         num_valid = valid_mask.sum()
														
 
															         label = label * valid_mask
														
--- a/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
+++ b/paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
@@ -101,9 +101,12 @@ class PixelContrastCrossEntropyLoss(nn.Layer):
 
															                 elif num_hard >= n_view / 2:
														
 
															                     num_easy_keep = num_easy
														
 
															                     num_hard_keep = n_view - num_easy_keep
														
 
															-                else:
														
 
															+                elif num_easy >= n_view / 2:
														
 
															                     num_hard_keep = num_hard
														
 
															                     num_easy_keep = n_view - num_hard_keep
														
 
															+                else:
														
 
															+                    num_hard_keep = num_hard
														
 
															+                    num_easy_keep = num_easy
														
 
															                 indices = None
														
 
															                 if num_hard > 0:
														
--- a/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
+++ b/paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
@@ -92,6 +92,7 @@ class SemanticConnectivityLoss(nn.Layer):
 
															                 label_num_conn, label_conn = cv2.connectedComponents(
														
 
															                     labels_np_class.astype(np.uint8))
														
 
															+                origin_pred_num_conn = pred_num_conn
														
 
															                 if pred_num_conn > 2 * label_num_conn:
														
 
															                     pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
														
 
															                 real_pred_num = pred_num_conn - 1
														
@@ -100,8 +101,9 @@ class SemanticConnectivityLoss(nn.Layer):
 
															                 # Connected Components Matching and SC Loss Calculation
														
 
															                 if real_label_num > 0 and real_pred_num > 0:
														
 
															                     img_connectivity = compute_class_connectiveity(
														
 
															-                        pred_conn, label_conn, pred_num_conn, label_num_conn,
														
 
															-                        pred_i, real_label_num, real_pred_num, zero)
														
 
															+                        pred_conn, label_conn, pred_num_conn,
														
 
															+                        origin_pred_num_conn, label_num_conn, pred_i,
														
 
															+                        real_label_num, real_pred_num, zero)
														
 
															                     sc_loss += 1 - img_connectivity
														
 
															                 elif real_label_num == 0 and real_pred_num == 0:
														
 
															                     # if no connected component, SC Loss = 0, so pass
														
@@ -122,12 +124,12 @@ class SemanticConnectivityLoss(nn.Layer):
 
															 def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
														
 
															-                                label_num_conn, pred, real_label_num,
														
 
															-                                real_pred_num, zero):
														
 
															+                                origin_pred_num_conn, label_num_conn, pred,
														
 
															+                                real_label_num, real_pred_num, zero):
														
 
															     pred_conn = paddle.to_tensor(pred_conn)
														
 
															     label_conn = paddle.to_tensor(label_conn)
														
 
															-    pred_conn = F.one_hot(pred_conn, pred_num_conn)
														
 
															+    pred_conn = F.one_hot(pred_conn, origin_pred_num_conn)
														
 
															     label_conn = F.one_hot(label_conn, label_num_conn)
														
 
															     ious = paddle.zeros((real_label_num, real_pred_num))
														
--- a/paddlers/models/ppseg/models/lraspp.py
+++ b/paddlers/models/ppseg/models/lraspp.py
@@ -0,0 +1,162 @@
 
															+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from functools import partial
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg import utils
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+class LRASPP(nn.Layer):
														
 
															+    """
														
 
															+    Semantic segmentation model with a light R-ASPP head.
														
 
															+    
														
 
															+    The original article refers to
														
 
															+        Howard, Andrew, et al. "Searching for mobilenetv3."
														
 
															+        (https://arxiv.org/pdf/1909.11065.pdf)
														
 
															+
														
 
															+    Args:
														
 
															+        num_classes (int): The number of target classes.
														
 
															+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
														
 
															+            has feat_channels, of which the length is 5.
														
 
															+        backbone_indices (List(int), optional): The values indicate the indices of backbone output 
														
 
															+            used as the input of the LR-ASPP head.
														
 
															+            Default: [0, 1, 3].
														
 
															+        lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head.
														
 
															+            Default: [32, 64].
														
 
															+        lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head.
														
 
															+            Default: 128
														
 
															+        resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head.
														
 
															+            Default: bilinear.
														
 
															+        use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use
														
 
															+            a 49x49 kernel for average pooling.
														
 
															+            Default: True.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 backbone,
														
 
															+                 backbone_indices=[0, 1, 3],
														
 
															+                 lraspp_head_inter_chs=[32, 64],
														
 
															+                 lraspp_head_out_ch=128,
														
 
															+                 resize_mode='bilinear',
														
 
															+                 use_gap=True,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        # backbone
														
 
															+        assert hasattr(backbone, 'feat_channels'), \
														
 
															+            "The backbone should has feat_channels."
														
 
															+        assert len(backbone.feat_channels) >= len(backbone_indices), \
														
 
															+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
														
 
															+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
														
 
															+        assert len(backbone.feat_channels) > max(backbone_indices), \
														
 
															+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
														
 
															+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
														
 
															+        self.backbone = backbone
														
 
															+
														
 
															+        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
														
 
															+            "should not be lesser than 1"
														
 
															+
														
 
															+        # head
														
 
															+        assert len(backbone_indices) == len(
														
 
															+            lraspp_head_inter_chs
														
 
															+        ) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs."
														
 
															+        self.backbone_indices = backbone_indices
														
 
															+
														
 
															+        self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels,
														
 
															+                                      lraspp_head_inter_chs, lraspp_head_out_ch,
														
 
															+                                      num_classes, resize_mode, use_gap)
														
 
															+
														
 
															+        # pretrained
														
 
															+        self.pretrained = pretrained
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x_hw = paddle.shape(x)[2:]
														
 
															+
														
 
															+        feats_backbone = self.backbone(x)
														
 
															+        assert len(feats_backbone) >= len(self.backbone_indices), \
														
 
															+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
														
 
															+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
														
 
															+
														
 
															+        y = self.lraspp_head(feats_backbone)
														
 
															+        y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False)
														
 
															+        logit_list = [y]
														
 
															+
														
 
															+        return logit_list
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+
														
 
															+class LRASPPHead(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 indices,
														
 
															+                 in_chs,
														
 
															+                 mid_chs,
														
 
															+                 out_ch,
														
 
															+                 n_classes,
														
 
															+                 resize_mode,
														
 
															+                 use_gap,
														
 
															+                 align_corners=False):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.indices = indices[-2::-1]
														
 
															+        self.in_chs = [in_chs[i] for i in indices[::-1]]
														
 
															+        self.mid_chs = mid_chs[::-1]
														
 
															+        self.convs = nn.LayerList()
														
 
															+        self.conv_ups = nn.LayerList()
														
 
															+        for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs):
														
 
															+            self.convs.append(
														
 
															+                nn.Conv2D(
														
 
															+                    in_ch, mid_ch, kernel_size=1, bias_attr=False))
														
 
															+            self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1))
														
 
															+        self.conv_w = nn.Sequential(
														
 
															+            nn.AvgPool2D(
														
 
															+                kernel_size=(49, 49), stride=(16, 20))
														
 
															+            if not use_gap else nn.AdaptiveAvgPool2D(1),
														
 
															+            nn.Conv2D(
														
 
															+                self.in_chs[0], out_ch, 1, bias_attr=False),
														
 
															+            nn.Sigmoid())
														
 
															+        self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1)
														
 
															+        self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False)
														
 
															+        self.conv_out = nn.Conv2D(
														
 
															+            out_ch, n_classes, kernel_size=1, bias_attr=False)
														
 
															+
														
 
															+        self.interp = partial(
														
 
															+            F.interpolate, mode=resize_mode, align_corners=align_corners)
														
 
															+
														
 
															+    def forward(self, in_feat_list):
														
 
															+        x = in_feat_list[-1]
														
 
															+
														
 
															+        x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:])
														
 
															+        y = self.conv_t(x)
														
 
															+
														
 
															+        for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups):
														
 
															+            feat = in_feat_list[idx]
														
 
															+            y = self.interp(y, paddle.shape(feat)[2:])
														
 
															+            y = paddle.concat([y, conv(feat)], axis=1)
														
 
															+            y = conv_up(y)
														
 
															+
														
 
															+        y = self.conv_out(y)
														
 
															+        return y
														
--- a/paddlers/models/ppseg/models/mla_transformer.py
+++ b/paddlers/models/ppseg/models/mla_transformer.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
--- a/paddlers/models/ppseg/models/mobileseg.py
+++ b/paddlers/models/ppseg/models/mobileseg.py
@@ -0,0 +1,289 @@
 
															+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg import utils
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+class MobileSeg(nn.Layer):
														
 
															+    """
														
 
															+    The semantic segmentation models for mobile devices.
														
 
															+
														
 
															+    Args:
														
 
															+        num_classes (int): The number of target classes.
														
 
															+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
														
 
															+            has feat_channels, of which the length is 5.
														
 
															+        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
														
 
															+            Default: [2, 3, 4].
														
 
															+        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
														
 
															+        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
														
 
															+        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
														
 
															+        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
														
 
															+        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
														
 
															+            Default: [64, 64, 64].
														
 
															+        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
														
 
															+            Default: bilinear.
														
 
															+        use_last_fuse (bool, optional): Whether use fusion in the last. Default: False.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 backbone,
														
 
															+                 backbone_indices=[1, 2, 3],
														
 
															+                 cm_bin_sizes=[1, 2],
														
 
															+                 cm_out_ch=64,
														
 
															+                 arm_type='UAFMMobile',
														
 
															+                 arm_out_chs=[32, 48, 64],
														
 
															+                 seg_head_inter_chs=[32, 32, 32],
														
 
															+                 resize_mode='bilinear',
														
 
															+                 use_last_fuse=False,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        # backbone
														
 
															+        assert hasattr(backbone, 'feat_channels'), \
														
 
															+            "The backbone should has feat_channels."
														
 
															+        assert len(backbone.feat_channels) >= len(backbone_indices), \
														
 
															+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
														
 
															+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
														
 
															+        assert len(backbone.feat_channels) > max(backbone_indices), \
														
 
															+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
														
 
															+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
														
 
															+        self.backbone = backbone
														
 
															+
														
 
															+        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
														
 
															+            "should not be lesser than 1"
														
 
															+        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
														
 
															+        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
														
 
															+
														
 
															+        # head
														
 
															+        if len(arm_out_chs) == 1:
														
 
															+            arm_out_chs = arm_out_chs * len(backbone_indices)
														
 
															+        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
														
 
															+            "arm_out_chs and backbone_indices should be equal"
														
 
															+
														
 
															+        self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs,
														
 
															+                                        cm_bin_sizes, cm_out_ch, arm_type,
														
 
															+                                        resize_mode, use_last_fuse)
														
 
															+
														
 
															+        if len(seg_head_inter_chs) == 1:
														
 
															+            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
														
 
															+        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
														
 
															+            "seg_head_inter_chs and backbone_indices should be equal"
														
 
															+        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
														
 
															+        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
														
 
															+            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
														
 
															+
														
 
															+        # pretrained
														
 
															+        self.pretrained = pretrained
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x_hw = paddle.shape(x)[2:]
														
 
															+
														
 
															+        feats_backbone = self.backbone(x)  # [x4, x8, x16, x32]
														
 
															+        assert len(feats_backbone) >= len(self.backbone_indices), \
														
 
															+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
														
 
															+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
														
 
															+
														
 
															+        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
														
 
															+        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
														
 
															+
														
 
															+        if self.training:
														
 
															+            logit_list = []
														
 
															+            for x, seg_head in zip(feats_head, self.seg_heads):
														
 
															+                x = seg_head(x)
														
 
															+                logit_list.append(x)
														
 
															+            logit_list = [
														
 
															+                F.interpolate(
														
 
															+                    x, x_hw, mode='bilinear', align_corners=False)
														
 
															+                for x in logit_list
														
 
															+            ]
														
 
															+        else:
														
 
															+            x = self.seg_heads[0](feats_head[0])
														
 
															+            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
														
 
															+            logit_list = [x]
														
 
															+
														
 
															+        return logit_list
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+
														
 
															+class MobileSegHead(nn.Layer):
														
 
															+    """
														
 
															+    The head of MobileSeg.
														
 
															+
														
 
															+    Args:
														
 
															+        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
														
 
															+        arm_out_chs (List(int)): The out channels of each arm module.
														
 
															+        cm_bin_sizes (List(int)): The bin size of context module.
														
 
															+        cm_out_ch (int): The output channel of the last context module.
														
 
															+        arm_type (str): The type of attention refinement module.
														
 
															+        resize_mode (str): The resize mode for the upsampling operation in decoder.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
														
 
															+                 arm_type, resize_mode, use_last_fuse):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch,
														
 
															+                                      cm_out_ch, cm_bin_sizes)
														
 
															+
														
 
															+        assert hasattr(layers,arm_type), \
														
 
															+            "Not support arm_type ({})".format(arm_type)
														
 
															+        arm_class = eval("layers." + arm_type)
														
 
															+
														
 
															+        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
														
 
															+        for i in range(len(backbone_out_chs)):
														
 
															+            low_chs = backbone_out_chs[i]
														
 
															+            high_ch = cm_out_ch if i == len(
														
 
															+                backbone_out_chs) - 1 else arm_out_chs[i + 1]
														
 
															+            out_ch = arm_out_chs[i]
														
 
															+            arm = arm_class(
														
 
															+                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
														
 
															+            self.arm_list.append(arm)
														
 
															+
														
 
															+        self.use_last_fuse = use_last_fuse
														
 
															+        if self.use_last_fuse:
														
 
															+            self.fuse_convs = nn.LayerList()
														
 
															+            for i in range(1, len(arm_out_chs)):
														
 
															+                conv = layers.SeparableConvBNReLU(
														
 
															+                    arm_out_chs[i],
														
 
															+                    arm_out_chs[0],
														
 
															+                    kernel_size=3,
														
 
															+                    bias_attr=False)
														
 
															+                self.fuse_convs.append(conv)
														
 
															+            self.last_conv = layers.SeparableConvBNReLU(
														
 
															+                len(arm_out_chs) * arm_out_chs[0],
														
 
															+                arm_out_chs[0],
														
 
															+                kernel_size=3,
														
 
															+                bias_attr=False)
														
 
															+
														
 
															+    def forward(self, in_feat_list):
														
 
															+        """
														
 
															+        Args:
														
 
															+            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
														
 
															+                x2, x4 and x8 are optional.
														
 
															+        Returns:
														
 
															+            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
														
 
															+                x2, x4 and x8 are optional.
														
 
															+                The length of in_feat_list and out_feat_list are the same.
														
 
															+        """
														
 
															+
														
 
															+        high_feat = self.cm(in_feat_list[-1])
														
 
															+        out_feat_list = []
														
 
															+
														
 
															+        for i in reversed(range(len(in_feat_list))):
														
 
															+            low_feat = in_feat_list[i]
														
 
															+            arm = self.arm_list[i]
														
 
															+            high_feat = arm(low_feat, high_feat)
														
 
															+            out_feat_list.insert(0, high_feat)
														
 
															+
														
 
															+        if self.use_last_fuse:
														
 
															+            x_list = [out_feat_list[0]]
														
 
															+            size = paddle.shape(out_feat_list[0])[2:]
														
 
															+            for i, (x, conv
														
 
															+                    ) in enumerate(zip(out_feat_list[1:], self.fuse_convs)):
														
 
															+                x = conv(x)
														
 
															+                x = F.interpolate(
														
 
															+                    x, size=size, mode='bilinear', align_corners=False)
														
 
															+                x_list.append(x)
														
 
															+            x = paddle.concat(x_list, axis=1)
														
 
															+            x = self.last_conv(x)
														
 
															+            out_feat_list[0] = x
														
 
															+
														
 
															+        return out_feat_list
														
 
															+
														
 
															+
														
 
															+class MobileContextModule(nn.Layer):
														
 
															+    """
														
 
															+    Context Module for Mobile Model.
														
 
															+
														
 
															+    Args:
														
 
															+        in_channels (int): The number of input channels to pyramid pooling module.
														
 
															+        inter_channels (int): The number of inter channels to pyramid pooling module.
														
 
															+        out_channels (int): The number of output channels after pyramid pooling module.
														
 
															+        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
														
 
															+        align_corners (bool): An argument of F.interpolate. It should be set to False
														
 
															+            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 inter_channels,
														
 
															+                 out_channels,
														
 
															+                 bin_sizes,
														
 
															+                 align_corners=False):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.stages = nn.LayerList([
														
 
															+            self._make_stage(in_channels, inter_channels, size)
														
 
															+            for size in bin_sizes
														
 
															+        ])
														
 
															+
														
 
															+        self.conv_out = layers.SeparableConvBNReLU(
														
 
															+            in_channels=inter_channels,
														
 
															+            out_channels=out_channels,
														
 
															+            kernel_size=3,
														
 
															+            bias_attr=False)
														
 
															+
														
 
															+        self.align_corners = align_corners
														
 
															+
														
 
															+    def _make_stage(self, in_channels, out_channels, size):
														
 
															+        prior = nn.AdaptiveAvgPool2D(output_size=size)
														
 
															+        conv = layers.ConvBNReLU(
														
 
															+            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
														
 
															+        return nn.Sequential(prior, conv)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        out = None
														
 
															+        input_shape = paddle.shape(input)[2:]
														
 
															+
														
 
															+        for stage in self.stages:
														
 
															+            x = stage(input)
														
 
															+            x = F.interpolate(
														
 
															+                x,
														
 
															+                input_shape,
														
 
															+                mode='bilinear',
														
 
															+                align_corners=self.align_corners)
														
 
															+            if out is None:
														
 
															+                out = x
														
 
															+            else:
														
 
															+                out += x
														
 
															+
														
 
															+        out = self.conv_out(out)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class SegHead(nn.Layer):
														
 
															+    def __init__(self, in_chan, mid_chan, n_classes):
														
 
															+        super().__init__()
														
 
															+        self.conv = layers.SeparableConvBNReLU(
														
 
															+            in_chan, mid_chan, kernel_size=3, bias_attr=False)
														
 
															+        self.conv_out = nn.Conv2D(
														
 
															+            mid_chan, n_classes, kernel_size=1, bias_attr=False)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.conv(x)
														
 
															+        x = self.conv_out(x)
														
 
															+        return x
														
--- a/paddlers/models/ppseg/models/pointrend.py
+++ b/paddlers/models/ppseg/models/pointrend.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
--- a/paddlers/models/ppseg/models/portraitnet.py
+++ b/paddlers/models/ppseg/models/portraitnet.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -50,19 +50,9 @@ class PortraitNet(nn.Layer):
 
															         self.init_weight()
														
 
															     def forward(self, x):
														
 
															-        img = x[:, :3, :, :]
														
 
															-        img_ori = x[:, 3:, :, :]
														
 
															-
														
 
															-        feat_list = self.backbone(img)
														
 
															+        feat_list = self.backbone(x)
														
 
															         logits_list = self.head(feat_list)
														
 
															-
														
 
															-        feat_list = self.backbone(img_ori)
														
 
															-        logits_ori_list = self.head(feat_list)
														
 
															-
														
 
															-        return [
														
 
															-            logits_list[0], logits_ori_list[0], logits_list[1],
														
 
															-            logits_ori_list[1]
														
 
															-        ]
														
 
															+        return [logits_list]
														
 
															     def init_weight(self):
														
 
															         if self.pretrained is not None:
														
--- a/paddlers/models/ppseg/models/pp_liteseg.py
+++ b/paddlers/models/ppseg/models/pp_liteseg.py
@@ -0,0 +1,273 @@
 
															+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg import utils
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.utils import utils
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+class PPLiteSeg(nn.Layer):
														
 
															+    """
														
 
															+    The PP_LiteSeg implementation based on PaddlePaddle.
														
 
															+
														
 
															+    The original article refers to "Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu,
														
 
															+    Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai,
														
 
															+    Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LiteSeg: A Superior Real-Time Semantic
														
 
															+    Segmentation Model. https://arxiv.org/abs/2204.02681".
														
 
															+
														
 
															+    Args:
														
 
															+        num_classes (int): The number of target classes.
														
 
															+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
														
 
															+            has feat_channels, of which the length is 5.
														
 
															+        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
														
 
															+            Default: [2, 3, 4].
														
 
															+        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
														
 
															+        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
														
 
															+        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
														
 
															+        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
														
 
															+        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
														
 
															+            Default: [64, 64, 64].
														
 
															+        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
														
 
															+            Default: bilinear.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 backbone,
														
 
															+                 backbone_indices=[2, 3, 4],
														
 
															+                 arm_type='UAFM_SpAtten',
														
 
															+                 cm_bin_sizes=[1, 2, 4],
														
 
															+                 cm_out_ch=128,
														
 
															+                 arm_out_chs=[64, 96, 128],
														
 
															+                 seg_head_inter_chs=[64, 64, 64],
														
 
															+                 resize_mode='bilinear',
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        # backbone
														
 
															+        assert hasattr(backbone, 'feat_channels'), \
														
 
															+            "The backbone should has feat_channels."
														
 
															+        assert len(backbone.feat_channels) >= len(backbone_indices), \
														
 
															+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
														
 
															+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
														
 
															+        assert len(backbone.feat_channels) > max(backbone_indices), \
														
 
															+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
														
 
															+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
														
 
															+        self.backbone = backbone
														
 
															+
														
 
															+        assert len(backbone_indices) > 1, "The lenght of backbone_indices " \
														
 
															+            "should be greater than 1"
														
 
															+        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
														
 
															+        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
														
 
															+
														
 
															+        # head
														
 
															+        if len(arm_out_chs) == 1:
														
 
															+            arm_out_chs = arm_out_chs * len(backbone_indices)
														
 
															+        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
														
 
															+            "arm_out_chs and backbone_indices should be equal"
														
 
															+
														
 
															+        self.ppseg_head = PPLiteSegHead(backbone_out_chs, arm_out_chs,
														
 
															+                                        cm_bin_sizes, cm_out_ch, arm_type,
														
 
															+                                        resize_mode)
														
 
															+
														
 
															+        if len(seg_head_inter_chs) == 1:
														
 
															+            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
														
 
															+        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
														
 
															+            "seg_head_inter_chs and backbone_indices should be equal"
														
 
															+        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
														
 
															+        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
														
 
															+            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
														
 
															+
														
 
															+        # pretrained
														
 
															+        self.pretrained = pretrained
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x_hw = paddle.shape(x)[2:]
														
 
															+
														
 
															+        feats_backbone = self.backbone(x)  # [x2, x4, x8, x16, x32]
														
 
															+        assert len(feats_backbone) >= len(self.backbone_indices), \
														
 
															+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
														
 
															+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
														
 
															+
														
 
															+        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
														
 
															+
														
 
															+        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
														
 
															+
														
 
															+        if self.training:
														
 
															+            logit_list = []
														
 
															+
														
 
															+            for x, seg_head in zip(feats_head, self.seg_heads):
														
 
															+                x = seg_head(x)
														
 
															+                logit_list.append(x)
														
 
															+
														
 
															+            logit_list = [
														
 
															+                F.interpolate(
														
 
															+                    x, x_hw, mode='bilinear', align_corners=False)
														
 
															+                for x in logit_list
														
 
															+            ]
														
 
															+        else:
														
 
															+            x = self.seg_heads[0](feats_head[0])
														
 
															+            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
														
 
															+            logit_list = [x]
														
 
															+
														
 
															+        return logit_list
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+
														
 
															+class PPLiteSegHead(nn.Layer):
														
 
															+    """
														
 
															+    The head of PPLiteSeg.
														
 
															+
														
 
															+    Args:
														
 
															+        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
														
 
															+        arm_out_chs (List(int)): The out channels of each arm module.
														
 
															+        cm_bin_sizes (List(int)): The bin size of context module.
														
 
															+        cm_out_ch (int): The output channel of the last context module.
														
 
															+        arm_type (str): The type of attention refinement module.
														
 
															+        resize_mode (str): The resize mode for the upsampling operation in decoder.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
														
 
															+                 arm_type, resize_mode):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.cm = PPContextModule(backbone_out_chs[-1], cm_out_ch, cm_out_ch,
														
 
															+                                  cm_bin_sizes)
														
 
															+
														
 
															+        assert hasattr(layers,arm_type), \
														
 
															+            "Not support arm_type ({})".format(arm_type)
														
 
															+        arm_class = eval("layers." + arm_type)
														
 
															+
														
 
															+        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
														
 
															+        for i in range(len(backbone_out_chs)):
														
 
															+            low_chs = backbone_out_chs[i]
														
 
															+            high_ch = cm_out_ch if i == len(
														
 
															+                backbone_out_chs) - 1 else arm_out_chs[i + 1]
														
 
															+            out_ch = arm_out_chs[i]
														
 
															+            arm = arm_class(
														
 
															+                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
														
 
															+            self.arm_list.append(arm)
														
 
															+
														
 
															+    def forward(self, in_feat_list):
														
 
															+        """
														
 
															+        Args:
														
 
															+            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
														
 
															+                x2, x4 and x8 are optional.
														
 
															+        Returns:
														
 
															+            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
														
 
															+                x2, x4 and x8 are optional.
														
 
															+                The length of in_feat_list and out_feat_list are the same.
														
 
															+        """
														
 
															+
														
 
															+        high_feat = self.cm(in_feat_list[-1])
														
 
															+        out_feat_list = []
														
 
															+
														
 
															+        for i in reversed(range(len(in_feat_list))):
														
 
															+            low_feat = in_feat_list[i]
														
 
															+            arm = self.arm_list[i]
														
 
															+            high_feat = arm(low_feat, high_feat)
														
 
															+            out_feat_list.insert(0, high_feat)
														
 
															+
														
 
															+        return out_feat_list
														
 
															+
														
 
															+
														
 
															+class PPContextModule(nn.Layer):
														
 
															+    """
														
 
															+    Simple Context module.
														
 
															+
														
 
															+    Args:
														
 
															+        in_channels (int): The number of input channels to pyramid pooling module.
														
 
															+        inter_channels (int): The number of inter channels to pyramid pooling module.
														
 
															+        out_channels (int): The number of output channels after pyramid pooling module.
														
 
															+        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
														
 
															+        align_corners (bool): An argument of F.interpolate. It should be set to False
														
 
															+            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 in_channels,
														
 
															+                 inter_channels,
														
 
															+                 out_channels,
														
 
															+                 bin_sizes,
														
 
															+                 align_corners=False):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.stages = nn.LayerList([
														
 
															+            self._make_stage(in_channels, inter_channels, size)
														
 
															+            for size in bin_sizes
														
 
															+        ])
														
 
															+
														
 
															+        self.conv_out = layers.ConvBNReLU(
														
 
															+            in_channels=inter_channels,
														
 
															+            out_channels=out_channels,
														
 
															+            kernel_size=3,
														
 
															+            padding=1)
														
 
															+
														
 
															+        self.align_corners = align_corners
														
 
															+
														
 
															+    def _make_stage(self, in_channels, out_channels, size):
														
 
															+        prior = nn.AdaptiveAvgPool2D(output_size=size)
														
 
															+        conv = layers.ConvBNReLU(
														
 
															+            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
														
 
															+        return nn.Sequential(prior, conv)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        out = None
														
 
															+        input_shape = paddle.shape(input)[2:]
														
 
															+
														
 
															+        for stage in self.stages:
														
 
															+            x = stage(input)
														
 
															+            x = F.interpolate(
														
 
															+                x,
														
 
															+                input_shape,
														
 
															+                mode='bilinear',
														
 
															+                align_corners=self.align_corners)
														
 
															+            if out is None:
														
 
															+                out = x
														
 
															+            else:
														
 
															+                out += x
														
 
															+
														
 
															+        out = self.conv_out(out)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class SegHead(nn.Layer):
														
 
															+    def __init__(self, in_chan, mid_chan, n_classes):
														
 
															+        super().__init__()
														
 
															+        self.conv = layers.ConvBNReLU(
														
 
															+            in_chan,
														
 
															+            mid_chan,
														
 
															+            kernel_size=3,
														
 
															+            stride=1,
														
 
															+            padding=1,
														
 
															+            bias_attr=False)
														
 
															+        self.conv_out = nn.Conv2D(
														
 
															+            mid_chan, n_classes, kernel_size=1, bias_attr=False)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.conv(x)
														
 
															+        x = self.conv_out(x)
														
 
															+        return x
														
--- a/paddlers/models/ppseg/models/pphumanseg_lite.py
+++ b/paddlers/models/ppseg/models/pphumanseg_lite.py
@@ -27,13 +27,17 @@ __all__ = ['PPHumanSegLite']
 
															 class PPHumanSegLite(nn.Layer):
														
 
															     "A self-developed ultra lightweight model from paddlers.models.ppseg, is suitable for real-time scene segmentation on web or mobile terminals."
														
 
															-    def __init__(self, num_classes, pretrained=None, align_corners=False):
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 in_channels=3,
														
 
															+                 pretrained=None,
														
 
															+                 align_corners=False):
														
 
															         super().__init__()
														
 
															         self.pretrained = pretrained
														
 
															         self.num_classes = num_classes
														
 
															         self.align_corners = align_corners
														
 
															-        self.conv_bn0 = _ConvBNReLU(3, 36, 3, 2, 1)
														
 
															+        self.conv_bn0 = _ConvBNReLU(in_channels, 36, 3, 2, 1)
														
 
															         self.conv_bn1 = _ConvBNReLU(36, 18, 1, 1, 0)
														
 
															         self.block1 = nn.Sequential(
														
--- a/paddlers/models/ppseg/models/segformer.py
+++ b/paddlers/models/ppseg/models/segformer.py
@@ -127,51 +127,3 @@ class SegFormer(nn.Layer):
 
															                 mode='bilinear',
														
 
															                 align_corners=self.align_corners)
														
 
															         ]
														
 
															-
														
 
															-
														
 
															-@manager.MODELS.add_component
														
 
															-def SegFormer_B0(**kwargs):
														
 
															-    return SegFormer(
														
 
															-        backbone=manager.BACKBONES['MixVisionTransformer_B0'](),
														
 
															-        embedding_dim=256,
														
 
															-        **kwargs)
														
 
															-
														
 
															-
														
 
															-@manager.MODELS.add_component
														
 
															-def SegFormer_B1(**kwargs):
														
 
															-    return SegFormer(
														
 
															-        backbone=manager.BACKBONES['MixVisionTransformer_B1'](),
														
 
															-        embedding_dim=256,
														
 
															-        **kwargs)
														
 
															-
														
 
															-
														
 
															-@manager.MODELS.add_component
														
 
															-def SegFormer_B2(**kwargs):
														
 
															-    return SegFormer(
														
 
															-        backbone=manager.BACKBONES['MixVisionTransformer_B2'](),
														
 
															-        embedding_dim=768,
														
 
															-        **kwargs)
														
 
															-
														
 
															-
														
 
															-@manager.MODELS.add_component
														
 
															-def SegFormer_B3(**kwargs):
														
 
															-    return SegFormer(
														
 
															-        backbone=manager.BACKBONES['MixVisionTransformer_B3'](),
														
 
															-        embedding_dim=768,
														
 
															-        **kwargs)
														
 
															-
														
 
															-
														
 
															-@manager.MODELS.add_component
														
 
															-def SegFormer_B4(**kwargs):
														
 
															-    return SegFormer(
														
 
															-        backbone=manager.BACKBONES['MixVisionTransformer_B4'](),
														
 
															-        embedding_dim=768,
														
 
															-        **kwargs)
														
 
															-
														
 
															-
														
 
															-@manager.MODELS.add_component
														
 
															-def SegFormer_B5(**kwargs):
														
 
															-    return SegFormer(
														
 
															-        backbone=manager.BACKBONES['MixVisionTransformer_B5'](),
														
 
															-        embedding_dim=768,
														
 
															-        **kwargs)
														
--- a/paddlers/models/ppseg/models/segnet.py
+++ b/paddlers/models/ppseg/models/segnet.py
@@ -32,14 +32,14 @@ class SegNet(nn.Layer):
 
															         num_classes (int): The unique number of target classes.
														
 
															     """
														
 
															-    def __init__(self, num_classes, pretrained=None):
														
 
															+    def __init__(self, num_classes, in_channels=3, pretrained=None):
														
 
															         super().__init__()
														
 
															         # Encoder Module
														
 
															         self.enco1 = nn.Sequential(
														
 
															             layers.ConvBNReLU(
														
 
															-                3, 64, 3, padding=1),
														
 
															+                in_channels, 64, 3, padding=1),
														
 
															             layers.ConvBNReLU(
														
 
															                 64, 64, 3, padding=1))
														
--- a/paddlers/models/ppseg/models/sinet.py
+++ b/paddlers/models/ppseg/models/sinet.py
@@ -0,0 +1,449 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+# Refer to the origin implementation: https://github.com/clovaai/c3_sinet/blob/master/models/SINet.py
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.utils import utils
														
 
															+
														
 
															+CFG = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]], [[3, 1], [3, 1]],
														
 
															+       [[5, 1], [3, 2]], [[5, 2], [3, 4]], [[3, 1], [3, 1]], [[5, 1], [5, 1]],
														
 
															+       [[3, 2], [3, 4]], [[3, 1], [5, 2]]]
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+class SINet(nn.Layer):
														
 
															+    """
														
 
															+    The SINet implementation based on PaddlePaddle.
														
 
															+
														
 
															+    The original article refers to
														
 
															+    Hyojin Park, Lars Lowe Sjösund, YoungJoon Yoo, Nicolas Monet, Jihwan Bang, Nojun Kwak
														
 
															+    "SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules
														
 
															+    and Information Blocking Decoder", (https://arxiv.org/abs/1911.09099).
														
 
															+
														
 
															+    Args:
														
 
															+        num_classes (int): The unique number of target classes.
														
 
															+        config (List, optional): The config for SINet. Defualt use the CFG.
														
 
															+        stage2_blocks (int, optional): The num of blocks in stage2. Default: 2.
														
 
															+        stage3_blocks (int, optional): The num of blocks in stage3. Default: 8.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes=2,
														
 
															+                 config=CFG,
														
 
															+                 stage2_blocks=2,
														
 
															+                 stage3_blocks=8,
														
 
															+                 in_channels=3,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+        dim1 = 16
														
 
															+        dim2 = 48
														
 
															+        dim3 = 96
														
 
															+
														
 
															+        self.encoder = SINetEncoder(config, in_channels, num_classes,
														
 
															+                                    stage2_blocks, stage3_blocks)
														
 
															+
														
 
															+        self.up = nn.UpsamplingBilinear2D(scale_factor=2)
														
 
															+        self.bn_3 = nn.BatchNorm(num_classes)
														
 
															+
														
 
															+        self.level2_C = CBR(dim2, num_classes, 1, 1)
														
 
															+        self.bn_2 = nn.BatchNorm(num_classes)
														
 
															+
														
 
															+        self.classifier = nn.Sequential(
														
 
															+            nn.UpsamplingBilinear2D(scale_factor=2),
														
 
															+            nn.Conv2D(
														
 
															+                num_classes, num_classes, 3, 1, 1, bias_attr=False))
														
 
															+
														
 
															+        self.pretrained = pretrained
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output1 = self.encoder.level1(input)  # x2
														
 
															+
														
 
															+        output2_0 = self.encoder.level2_0(output1)  # x4
														
 
															+        for i, layer in enumerate(self.encoder.level2):
														
 
															+            if i == 0:
														
 
															+                output2 = layer(output2_0)
														
 
															+            else:
														
 
															+                output2 = layer(output2)
														
 
															+        output2_cat = self.encoder.BR2(paddle.concat([output2_0, output2], 1))
														
 
															+
														
 
															+        output3_0 = self.encoder.level3_0(output2_cat)  # x8
														
 
															+        for i, layer in enumerate(self.encoder.level3):
														
 
															+            if i == 0:
														
 
															+                output3 = layer(output3_0)
														
 
															+            else:
														
 
															+                output3 = layer(output3)
														
 
															+        output3_cat = self.encoder.BR3(paddle.concat([output3_0, output3], 1))
														
 
															+        enc_final = self.encoder.classifier(output3_cat)  # x8
														
 
															+
														
 
															+        dec_stage1 = self.bn_3(self.up(enc_final))  # x4
														
 
															+        stage1_confidence = paddle.max(F.softmax(dec_stage1), axis=1)
														
 
															+        stage1_gate = (1 - stage1_confidence).unsqueeze(1)
														
 
															+
														
 
															+        dec_stage2_0 = self.level2_C(output2)  # x4
														
 
															+        dec_stage2 = self.bn_2(
														
 
															+            self.up(dec_stage2_0 * stage1_gate + dec_stage1))  # x2
														
 
															+
														
 
															+        out = self.classifier(dec_stage2)  # x
														
 
															+
														
 
															+        return [out]
														
 
															+
														
 
															+
														
 
															+def channel_shuffle(x, groups):
														
 
															+    x_shape = paddle.shape(x)
														
 
															+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
														
 
															+    num_channels = x.shape[1]
														
 
															+    channels_per_group = num_channels // groups
														
 
															+
														
 
															+    # reshape
														
 
															+    x = paddle.reshape(
														
 
															+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
														
 
															+
														
 
															+    # transpose
														
 
															+    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
														
 
															+
														
 
															+    # flatten
														
 
															+    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
														
 
															+
														
 
															+    return x
														
 
															+
														
 
															+
														
 
															+class CBR(nn.Layer):
														
 
															+    '''
														
 
															+    This class defines the convolution layer with batch normalization and PReLU activation
														
 
															+    '''
														
 
															+
														
 
															+    def __init__(self, nIn, nOut, kSize, stride=1):
														
 
															+        super().__init__()
														
 
															+        padding = int((kSize - 1) / 2)
														
 
															+
														
 
															+        self.conv = nn.Conv2D(
														
 
															+            nIn,
														
 
															+            nOut, (kSize, kSize),
														
 
															+            stride=stride,
														
 
															+            padding=(padding, padding),
														
 
															+            bias_attr=False)
														
 
															+        self.bn = nn.BatchNorm(nOut)
														
 
															+        self.act = nn.PReLU(nOut)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output = self.conv(input)
														
 
															+        output = self.bn(output)
														
 
															+        output = self.act(output)
														
 
															+        return output
														
 
															+
														
 
															+
														
 
															+class SeparableCBR(nn.Layer):
														
 
															+    '''
														
 
															+    This class defines the convolution layer with batch normalization and PReLU activation
														
 
															+    '''
														
 
															+
														
 
															+    def __init__(self, nIn, nOut, kSize, stride=1):
														
 
															+        super().__init__()
														
 
															+        padding = int((kSize - 1) / 2)
														
 
															+
														
 
															+        self.conv = nn.Sequential(
														
 
															+            nn.Conv2D(
														
 
															+                nIn,
														
 
															+                nIn, (kSize, kSize),
														
 
															+                stride=stride,
														
 
															+                padding=(padding, padding),
														
 
															+                groups=nIn,
														
 
															+                bias_attr=False),
														
 
															+            nn.Conv2D(
														
 
															+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
														
 
															+        self.bn = nn.BatchNorm(nOut)
														
 
															+        self.act = nn.PReLU(nOut)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output = self.conv(input)
														
 
															+        output = self.bn(output)
														
 
															+        output = self.act(output)
														
 
															+        return output
														
 
															+
														
 
															+
														
 
															+class SqueezeBlock(nn.Layer):
														
 
															+    def __init__(self, exp_size, divide=4.0):
														
 
															+        super(SqueezeBlock, self).__init__()
														
 
															+
														
 
															+        if divide > 1:
														
 
															+            self.dense = nn.Sequential(
														
 
															+                nn.Linear(exp_size, int(exp_size / divide)),
														
 
															+                nn.PReLU(int(exp_size / divide)),
														
 
															+                nn.Linear(int(exp_size / divide), exp_size),
														
 
															+                nn.PReLU(exp_size), )
														
 
															+        else:
														
 
															+            self.dense = nn.Sequential(
														
 
															+                nn.Linear(exp_size, exp_size), nn.PReLU(exp_size))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        alpha = F.adaptive_avg_pool2d(x, [1, 1])
														
 
															+        alpha = paddle.squeeze(alpha, axis=[2, 3])
														
 
															+        alpha = self.dense(alpha)
														
 
															+        alpha = paddle.unsqueeze(alpha, axis=[2, 3])
														
 
															+        out = x * alpha
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class SESeparableCBR(nn.Layer):
														
 
															+    '''
														
 
															+    This class defines the convolution layer with batch normalization and PReLU activation
														
 
															+    '''
														
 
															+
														
 
															+    def __init__(self, nIn, nOut, kSize, stride=1, divide=2.0):
														
 
															+        super().__init__()
														
 
															+        padding = int((kSize - 1) / 2)
														
 
															+
														
 
															+        self.conv = nn.Sequential(
														
 
															+            nn.Conv2D(
														
 
															+                nIn,
														
 
															+                nIn, (kSize, kSize),
														
 
															+                stride=stride,
														
 
															+                padding=(padding, padding),
														
 
															+                groups=nIn,
														
 
															+                bias_attr=False),
														
 
															+            SqueezeBlock(
														
 
															+                nIn, divide=divide),
														
 
															+            nn.Conv2D(
														
 
															+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
														
 
															+
														
 
															+        self.bn = nn.BatchNorm(nOut)
														
 
															+        self.act = nn.PReLU(nOut)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output = self.conv(input)
														
 
															+        output = self.bn(output)
														
 
															+        output = self.act(output)
														
 
															+        return output
														
 
															+
														
 
															+
														
 
															+class BR(nn.Layer):
														
 
															+    '''
														
 
															+    This class groups the batch normalization and PReLU activation
														
 
															+    '''
														
 
															+
														
 
															+    def __init__(self, nOut):
														
 
															+        super().__init__()
														
 
															+        self.bn = nn.BatchNorm(nOut)
														
 
															+        self.act = nn.PReLU(nOut)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output = self.bn(input)
														
 
															+        output = self.act(output)
														
 
															+        return output
														
 
															+
														
 
															+
														
 
															+class CB(nn.Layer):
														
 
															+    '''
														
 
															+    This class groups the convolution and batch normalization
														
 
															+    '''
														
 
															+
														
 
															+    def __init__(self, nIn, nOut, kSize, stride=1):
														
 
															+        super().__init__()
														
 
															+        padding = int((kSize - 1) / 2)
														
 
															+        self.conv = nn.Conv2D(
														
 
															+            nIn,
														
 
															+            nOut, (kSize, kSize),
														
 
															+            stride=stride,
														
 
															+            padding=(padding, padding),
														
 
															+            bias_attr=False)
														
 
															+        self.bn = nn.BatchNorm(nOut)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output = self.conv(input)
														
 
															+        output = self.bn(output)
														
 
															+        return output
														
 
															+
														
 
															+
														
 
															+class C(nn.Layer):
														
 
															+    '''
														
 
															+    This class is for a convolutional layer.
														
 
															+    '''
														
 
															+
														
 
															+    def __init__(self, nIn, nOut, kSize, stride=1, group=1):
														
 
															+        super().__init__()
														
 
															+        padding = int((kSize - 1) / 2)
														
 
															+        self.conv = nn.Conv2D(
														
 
															+            nIn,
														
 
															+            nOut, (kSize, kSize),
														
 
															+            stride=stride,
														
 
															+            padding=(padding, padding),
														
 
															+            bias_attr=False,
														
 
															+            groups=group)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output = self.conv(input)
														
 
															+        return output
														
 
															+
														
 
															+
														
 
															+class S2block(nn.Layer):
														
 
															+    '''
														
 
															+    This class defines the dilated convolution.
														
 
															+    '''
														
 
															+
														
 
															+    def __init__(self, nIn, nOut, kSize, avgsize):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.resolution_down = False
														
 
															+        if avgsize > 1:
														
 
															+            self.resolution_down = True
														
 
															+            self.down_res = nn.AvgPool2D(avgsize, avgsize)
														
 
															+            self.up_res = nn.UpsamplingBilinear2D(scale_factor=avgsize)
														
 
															+            self.avgsize = avgsize
														
 
															+
														
 
															+        padding = int((kSize - 1) / 2)
														
 
															+        self.conv = nn.Sequential(
														
 
															+            nn.Conv2D(
														
 
															+                nIn,
														
 
															+                nIn,
														
 
															+                kernel_size=(kSize, kSize),
														
 
															+                stride=1,
														
 
															+                padding=(padding, padding),
														
 
															+                groups=nIn,
														
 
															+                bias_attr=False),
														
 
															+            nn.BatchNorm(nIn))
														
 
															+
														
 
															+        self.act_conv1x1 = nn.Sequential(
														
 
															+            nn.PReLU(nIn),
														
 
															+            nn.Conv2D(
														
 
															+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
														
 
															+
														
 
															+        self.bn = nn.BatchNorm(nOut)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        if self.resolution_down:
														
 
															+            input = self.down_res(input)
														
 
															+        output = self.conv(input)
														
 
															+
														
 
															+        output = self.act_conv1x1(output)
														
 
															+        if self.resolution_down:
														
 
															+            output = self.up_res(output)
														
 
															+        return self.bn(output)
														
 
															+
														
 
															+
														
 
															+class S2module(nn.Layer):
														
 
															+    '''
														
 
															+    This class defines the ESP block, which is based on the following principle
														
 
															+        Reduce ---> Split ---> Transform --> Merge
														
 
															+    '''
														
 
															+
														
 
															+    def __init__(self, nIn, nOut, add=True, config=[[3, 1], [5, 1]]):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        group_n = len(config)
														
 
															+        assert group_n == 2
														
 
															+        n = int(nOut / group_n)
														
 
															+        n1 = nOut - group_n * n
														
 
															+
														
 
															+        self.c1 = C(nIn, n, 1, 1, group=group_n)
														
 
															+        # self.c1 = C(nIn, n, 1, 1)
														
 
															+
														
 
															+        for i in range(group_n):
														
 
															+            if i == 0:
														
 
															+                self.layer_0 = S2block(
														
 
															+                    n, n + n1, kSize=config[i][0], avgsize=config[i][1])
														
 
															+            else:
														
 
															+                self.layer_1 = S2block(
														
 
															+                    n, n, kSize=config[i][0], avgsize=config[i][1])
														
 
															+
														
 
															+        self.BR = BR(nOut)
														
 
															+        self.add = add
														
 
															+        self.group_n = group_n
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output1 = self.c1(input)
														
 
															+        output1 = channel_shuffle(output1, self.group_n)
														
 
															+        res_0 = self.layer_0(output1)
														
 
															+        res_1 = self.layer_1(output1)
														
 
															+        combine = paddle.concat([res_0, res_1], 1)
														
 
															+
														
 
															+        if self.add:
														
 
															+            combine = input + combine
														
 
															+        output = self.BR(combine)
														
 
															+        return output
														
 
															+
														
 
															+
														
 
															+class SINetEncoder(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 config,
														
 
															+                 in_channels=3,
														
 
															+                 num_classes=2,
														
 
															+                 stage2_blocks=2,
														
 
															+                 stage3_blocks=8):
														
 
															+        super().__init__()
														
 
															+        assert stage2_blocks == 2
														
 
															+        dim1 = 16
														
 
															+        dim2 = 48
														
 
															+        dim3 = 96
														
 
															+
														
 
															+        self.level1 = CBR(in_channels, 12, 3, 2)
														
 
															+
														
 
															+        self.level2_0 = SESeparableCBR(12, dim1, 3, 2, divide=1)
														
 
															+
														
 
															+        self.level2 = nn.LayerList()
														
 
															+        for i in range(0, stage2_blocks):
														
 
															+            if i == 0:
														
 
															+                self.level2.append(
														
 
															+                    S2module(
														
 
															+                        dim1, dim2, config=config[i], add=False))
														
 
															+            else:
														
 
															+                self.level2.append(S2module(dim2, dim2, config=config[i]))
														
 
															+        self.BR2 = BR(dim2 + dim1)
														
 
															+
														
 
															+        self.level3_0 = SESeparableCBR(dim2 + dim1, dim2, 3, 2, divide=2)
														
 
															+        self.level3 = nn.LayerList()
														
 
															+        for i in range(0, stage3_blocks):
														
 
															+            if i == 0:
														
 
															+                self.level3.append(
														
 
															+                    S2module(
														
 
															+                        dim2, dim3, config=config[2 + i], add=False))
														
 
															+            else:
														
 
															+                self.level3.append(S2module(dim3, dim3, config=config[2 + i]))
														
 
															+        self.BR3 = BR(dim3 + dim2)
														
 
															+
														
 
															+        self.classifier = C(dim3 + dim2, num_classes, 1, 1)
														
 
															+
														
 
															+    def forward(self, input):
														
 
															+        output1 = self.level1(input)  # x2
														
 
															+
														
 
															+        output2_0 = self.level2_0(output1)  # x4
														
 
															+        for i, layer in enumerate(self.level2):
														
 
															+            if i == 0:
														
 
															+                output2 = layer(output2_0)
														
 
															+            else:
														
 
															+                output2 = layer(output2)
														
 
															+
														
 
															+        output3_0 = self.level3_0(
														
 
															+            self.BR2(paddle.concat([output2_0, output2], 1)))  # x8
														
 
															+        for i, layer in enumerate(self.level3):
														
 
															+            if i == 0:
														
 
															+                output3 = layer(output3_0)
														
 
															+            else:
														
 
															+                output3 = layer(output3)
														
 
															+
														
 
															+        output3_cat = self.BR3(paddle.concat([output3_0, output3], 1))
														
 
															+        classifier = self.classifier(output3_cat)
														
 
															+        return classifier
														
--- a/paddlers/models/ppseg/models/stdcseg.py
+++ b/paddlers/models/ppseg/models/stdcseg.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
--- a/paddlers/models/ppseg/models/topformer.py
+++ b/paddlers/models/ppseg/models/topformer.py
@@ -0,0 +1,155 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import warnings
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+from paddlers.models.ppseg.utils import utils
														
 
															+from paddlers.models.ppseg.models.backbones.top_transformer import ConvBNAct
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+class TopFormer(nn.Layer):
														
 
															+    """
														
 
															+    The Token Pyramid Transformer(TopFormer) implementation based on PaddlePaddle.
														
 
															+
														
 
															+    The original article refers to
														
 
															+    Zhang, Wenqiang, Zilong Huang, Guozhong Luo, Tao Chen, Xinggang Wang, Wenyu Liu, Gang Yu,
														
 
															+    and Chunhua Shen. "TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation." 
														
 
															+    In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition,
														
 
															+    pp. 12083-12093. 2022.
														
 
															+
														
 
															+    This model refers to https://github.com/hustvl/TopFormer.
														
 
															+
														
 
															+    Args:
														
 
															+        num_classes(int,optional): The unique number of target classes.
														
 
															+        backbone(nn.Layer): Backbone network.
														
 
															+        head_use_dw (bool, optional): Whether the head use depthwise convolutions. Default: False.
														
 
															+        align_corners (bool, optional): Set the align_corners in resizing. Default: False.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 backbone,
														
 
															+                 head_use_dw=False,
														
 
															+                 align_corners=False,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+        self.backbone = backbone
														
 
															+
														
 
															+        head_in_channels = [
														
 
															+            i for i in backbone.injection_out_channels if i is not None
														
 
															+        ]
														
 
															+        self.decode_head = TopFormerHead(
														
 
															+            num_classes=num_classes,
														
 
															+            in_channels=head_in_channels,
														
 
															+            use_dw=head_use_dw,
														
 
															+            align_corners=align_corners)
														
 
															+
														
 
															+        self.align_corners = align_corners
														
 
															+        self.pretrained = pretrained
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x_hw = paddle.shape(x)[2:]
														
 
															+        x = self.backbone(x)  # len=3, 1/8,1/16,1/32
														
 
															+        x = self.decode_head(x)
														
 
															+        x = F.interpolate(
														
 
															+            x, x_hw, mode='bilinear', align_corners=self.align_corners)
														
 
															+
														
 
															+        return [x]
														
 
															+
														
 
															+
														
 
															+class TopFormerHead(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 in_channels,
														
 
															+                 in_index=[0, 1, 2],
														
 
															+                 in_transform='multiple_select',
														
 
															+                 use_dw=False,
														
 
															+                 dropout_ratio=0.1,
														
 
															+                 align_corners=False):
														
 
															+        super().__init__()
														
 
															+
														
 
															+        self.in_index = in_index
														
 
															+        self.in_transform = in_transform
														
 
															+        self.align_corners = align_corners
														
 
															+
														
 
															+        self._init_inputs(in_channels, in_index, in_transform)
														
 
															+        self.linear_fuse = ConvBNAct(
														
 
															+            in_channels=self.last_channels,
														
 
															+            out_channels=self.last_channels,
														
 
															+            kernel_size=1,
														
 
															+            stride=1,
														
 
															+            groups=self.last_channels if use_dw else 1,
														
 
															+            act=nn.ReLU)
														
 
															+        self.dropout = nn.Dropout2D(dropout_ratio)
														
 
															+        self.conv_seg = nn.Conv2D(
														
 
															+            self.last_channels, num_classes, kernel_size=1)
														
 
															+
														
 
															+    def _init_inputs(self, in_channels, in_index, in_transform):
														
 
															+        assert in_transform in [None, 'resize_concat', 'multiple_select']
														
 
															+        if in_transform is not None:
														
 
															+            assert len(in_channels) == len(in_index)
														
 
															+            if in_transform == 'resize_concat':
														
 
															+                self.last_channels = sum(in_channels)
														
 
															+            else:
														
 
															+                self.last_channels = in_channels[0]
														
 
															+        else:
														
 
															+            assert isinstance(in_channels, int)
														
 
															+            assert isinstance(in_index, int)
														
 
															+            self.last_channels = in_channels
														
 
															+
														
 
															+    def _transform_inputs(self, inputs):
														
 
															+        if self.in_transform == 'resize_concat':
														
 
															+            inputs = [inputs[i] for i in self.in_index]
														
 
															+            inputs = [
														
 
															+                F.interpolate(
														
 
															+                    input_data=x,
														
 
															+                    size=paddle.shape(inputs[0])[2:],
														
 
															+                    mode='bilinear',
														
 
															+                    align_corners=self.align_corners) for x in inputs
														
 
															+            ]
														
 
															+            inputs = paddle.concat(inputs, axis=1)
														
 
															+        elif self.in_transform == 'multiple_select':
														
 
															+            inputs_tmp = [inputs[i] for i in self.in_index]
														
 
															+            inputs = inputs_tmp[0]
														
 
															+            for x in inputs_tmp[1:]:
														
 
															+                x = F.interpolate(
														
 
															+                    x,
														
 
															+                    size=paddle.shape(inputs)[2:],
														
 
															+                    mode='bilinear',
														
 
															+                    align_corners=self.align_corners)
														
 
															+                inputs += x
														
 
															+        else:
														
 
															+            inputs = inputs[self.in_index]
														
 
															+
														
 
															+        return inputs
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self._transform_inputs(x)
														
 
															+        x = self.linear_fuse(x)
														
 
															+        x = self.dropout(x)
														
 
															+        x = self.conv_seg(x)
														
 
															+        return x
														
--- a/paddlers/models/ppseg/models/u2net.py
+++ b/paddlers/models/ppseg/models/u2net.py
@@ -34,15 +34,15 @@ class U2Net(nn.Layer):
 
															     Args:

														
 
															         num_classes (int): The unique number of target classes.

														
 
															-        in_ch (int, optional): Input channels. Default: 3.

														
 
															+        in_channels (int, optional): Input channels. Default: 3.

														
 
															         pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.

														
 
															     """

														
 
															-    def __init__(self, num_classes, in_ch=3, pretrained=None):

														
 
															+    def __init__(self, num_classes, in_channels=3, pretrained=None):

														
 
															         super(U2Net, self).__init__()

														
 
															-        self.stage1 = RSU7(in_ch, 32, 64)

														
 
															+        self.stage1 = RSU7(in_channels, 32, 64)

														
 
															         self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)

														
 
															         self.stage2 = RSU6(64, 32, 128)

														
@@ -153,10 +153,10 @@ class U2Net(nn.Layer):
 
															 class U2Netp(nn.Layer):

														
 
															     """Please Refer to U2Net above."""

														
 
															-    def __init__(self, num_classes, in_ch=3, pretrained=None):

														
 
															+    def __init__(self, num_classes, in_channels=3, pretrained=None):

														
 
															         super(U2Netp, self).__init__()

														
 
															-        self.stage1 = RSU7(in_ch, 16, 64)

														
 
															+        self.stage1 = RSU7(in_channels, 16, 64)

														
 
															         self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)

														
 
															         self.stage2 = RSU6(64, 16, 64)

														
--- a/paddlers/models/ppseg/models/unet.py
+++ b/paddlers/models/ppseg/models/unet.py
@@ -36,18 +36,19 @@ class UNet(nn.Layer):
 
															             is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
														
 
															         use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
														
 
															             If False, use resize_bilinear. Default: False.
														
 
															+        in_channels (int, optional): The channels of input image. Default: 3.
														
 
															         pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
														
 
															     """
														
 
															     def __init__(self,
														
 
															                  num_classes,
														
 
															-                 input_channel=3,
														
 
															                  align_corners=False,
														
 
															                  use_deconv=False,
														
 
															+                 in_channels=3,
														
 
															                  pretrained=None):
														
 
															         super().__init__()
														
 
															-        self.encode = Encoder(input_channel)
														
 
															+        self.encode = Encoder(in_channels)
														
 
															         self.decode = Decoder(align_corners, use_deconv=use_deconv)
														
 
															         self.cls = self.conv = nn.Conv2D(
														
 
															             in_channels=64,
														
@@ -73,12 +74,11 @@ class UNet(nn.Layer):
 
															 class Encoder(nn.Layer):
														
 
															-    def __init__(self, input_channel=3):
														
 
															+    def __init__(self, in_channels=3):
														
 
															         super().__init__()
														
 
															         self.double_conv = nn.Sequential(
														
 
															-            layers.ConvBNReLU(input_channel, 64, 3),
														
 
															-            layers.ConvBNReLU(64, 64, 3))
														
 
															+            layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3))
														
 
															         down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
														
 
															         self.down_sample_list = nn.LayerList([
														
 
															             self.down_sampling(channel[0], channel[1])
														
--- a/paddlers/models/ppseg/models/unet_plusplus.py
+++ b/paddlers/models/ppseg/models/unet_plusplus.py
@@ -31,8 +31,8 @@ class UNetPlusPlus(nn.Layer):
 
															     (https://arxiv.org/abs/1807.10165).
														
 
															     Args:
														
 
															-        in_channels (int): The channel number of input image.
														
 
															         num_classes (int): The unique number of target classes.
														
 
															+        in_channels (int, optional): The channel number of input image. Default: 3.
														
 
															         use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
														
 
															             If False, use resize_bilinear. Default: False.
														
 
															         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
														
@@ -42,8 +42,8 @@ class UNetPlusPlus(nn.Layer):
 
															         """
														
 
															     def __init__(self,
														
 
															-                 in_channels,
														
 
															                  num_classes,
														
 
															+                 in_channels=3,
														
 
															                  use_deconv=False,
														
 
															                  align_corners=False,
														
 
															                  pretrained=None,
														
--- a/paddlers/models/ppseg/models/upernet.py
+++ b/paddlers/models/ppseg/models/upernet.py
@@ -0,0 +1,173 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppseg import utils
														
 
															+from paddlers.models.ppseg.cvlibs import manager
														
 
															+from paddlers.models.ppseg.models import layers
														
 
															+
														
 
															+
														
 
															+@manager.MODELS.add_component
														
 
															+class UPerNet(nn.Layer):
														
 
															+    """
														
 
															+    The UPerNet implementation based on PaddlePaddle.
														
 
															+
														
 
															+    The original article refers to
														
 
															+    Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding"
														
 
															+    (https://arxiv.org/abs/1807.10221).
														
 
															+
														
 
															+    Args:
														
 
															+        num_classes (int): The unique number of target classes.
														
 
															+        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
														
 
															+        backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone.
														
 
															+        channels (int): The channels of inter layers. Default: 512.
														
 
															+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False.
														
 
															+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
														
 
															+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
														
 
															+        dropout_prob (float): Dropout ratio for upernet head. Default: 0.1.
														
 
															+        pretrained (str, optional): The path or url of pretrained model. Default: None.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 backbone,
														
 
															+                 backbone_indices,
														
 
															+                 channels=512,
														
 
															+                 enable_auxiliary_loss=False,
														
 
															+                 align_corners=False,
														
 
															+                 dropout_prob=0.1,
														
 
															+                 pretrained=None):
														
 
															+        super().__init__()
														
 
															+        self.backbone = backbone
														
 
															+        self.backbone_indices = backbone_indices
														
 
															+        self.in_channels = [
														
 
															+            self.backbone.feat_channels[i] for i in backbone_indices
														
 
															+        ]
														
 
															+        self.align_corners = align_corners
														
 
															+        self.pretrained = pretrained
														
 
															+        self.enable_auxiliary_loss = enable_auxiliary_loss
														
 
															+
														
 
															+        fpn_inplanes = [
														
 
															+            self.backbone.feat_channels[i] for i in backbone_indices
														
 
															+        ]
														
 
															+        self.head = UPerNetHead(
														
 
															+            num_classes=num_classes,
														
 
															+            fpn_inplanes=fpn_inplanes,
														
 
															+            dropout_prob=dropout_prob,
														
 
															+            channels=channels,
														
 
															+            enable_auxiliary_loss=self.enable_auxiliary_loss)
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feats = self.backbone(x)
														
 
															+        feats = [feats[i] for i in self.backbone_indices]
														
 
															+        logit_list = self.head(feats)
														
 
															+        logit_list = [
														
 
															+            F.interpolate(
														
 
															+                logit,
														
 
															+                paddle.shape(x)[2:],
														
 
															+                mode='bilinear',
														
 
															+                align_corners=self.align_corners) for logit in logit_list
														
 
															+        ]
														
 
															+        return logit_list
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        if self.pretrained is not None:
														
 
															+            utils.load_entire_model(self, self.pretrained)
														
 
															+
														
 
															+
														
 
															+class UPerNetHead(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 num_classes,
														
 
															+                 fpn_inplanes,
														
 
															+                 channels,
														
 
															+                 dropout_prob=0.1,
														
 
															+                 enable_auxiliary_loss=False,
														
 
															+                 align_corners=True):
														
 
															+        super(UPerNetHead, self).__init__()
														
 
															+        self.align_corners = align_corners
														
 
															+        self.ppm = layers.PPModule(
														
 
															+            in_channels=fpn_inplanes[-1],
														
 
															+            out_channels=channels,
														
 
															+            bin_sizes=(1, 2, 3, 6),
														
 
															+            dim_reduction=True,
														
 
															+            align_corners=True)
														
 
															+        self.enable_auxiliary_loss = enable_auxiliary_loss
														
 
															+        self.lateral_convs = nn.LayerList()
														
 
															+        self.fpn_convs = nn.LayerList()
														
 
															+
														
 
															+        for fpn_inplane in fpn_inplanes[:-1]:
														
 
															+            self.lateral_convs.append(
														
 
															+                layers.ConvBNReLU(fpn_inplane, channels, 1))
														
 
															+            self.fpn_convs.append(
														
 
															+                layers.ConvBNReLU(
														
 
															+                    channels, channels, 3, bias_attr=False))
														
 
															+
														
 
															+        if self.enable_auxiliary_loss:
														
 
															+            self.aux_head = layers.AuxLayer(
														
 
															+                fpn_inplanes[2],
														
 
															+                fpn_inplanes[2],
														
 
															+                num_classes,
														
 
															+                dropout_prob=dropout_prob)
														
 
															+
														
 
															+        self.fpn_bottleneck = layers.ConvBNReLU(
														
 
															+            len(fpn_inplanes) * channels, channels, 3, padding=1)
														
 
															+
														
 
															+        self.conv_last = nn.Sequential(
														
 
															+            layers.ConvBNReLU(
														
 
															+                len(fpn_inplanes) * channels, channels, 3, bias_attr=False),
														
 
															+            nn.Conv2D(
														
 
															+                channels, num_classes, kernel_size=1))
														
 
															+        self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1)
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        laterals = []
														
 
															+        for i, lateral_conv in enumerate(self.lateral_convs):
														
 
															+            laterals.append(lateral_conv(inputs[i]))
														
 
															+
														
 
															+        laterals.append(self.ppm(inputs[-1]))
														
 
															+        fpn_levels = len(laterals)
														
 
															+        for i in range(fpn_levels - 1, 0, -1):
														
 
															+            prev_shape = paddle.shape(laterals[i - 1])
														
 
															+            laterals[i - 1] = laterals[i - 1] + F.interpolate(
														
 
															+                laterals[i],
														
 
															+                size=prev_shape[2:],
														
 
															+                mode='bilinear',
														
 
															+                align_corners=self.align_corners)
														
 
															+
														
 
															+        fpn_outs = []
														
 
															+        for i in range(fpn_levels - 1):
														
 
															+            fpn_outs.append(self.fpn_convs[i](laterals[i]))
														
 
															+        fpn_outs.append(laterals[-1])
														
 
															+
														
 
															+        for i in range(fpn_levels - 1, 0, -1):
														
 
															+            fpn_outs[i] = F.interpolate(
														
 
															+                fpn_outs[i],
														
 
															+                size=paddle.shape(fpn_outs[0])[2:],
														
 
															+                mode='bilinear',
														
 
															+                align_corners=self.align_corners)
														
 
															+        fuse_out = paddle.concat(fpn_outs, axis=1)
														
 
															+        x = self.fpn_bottleneck(fuse_out)
														
 
															+
														
 
															+        x = self.conv_seg(x)
														
 
															+        logits_list = [x]
														
 
															+        if self.enable_auxiliary_loss:
														
 
															+            aux_out = self.aux_head(inputs[2])
														
 
															+            logits_list.append(aux_out)
														
 
															+            return logits_list
														
 
															+        else:
														
 
															+            return logits_list
														
--- a/paddlers/models/ppseg/transforms/functional.py
+++ b/paddlers/models/ppseg/transforms/functional.py
@@ -15,7 +15,14 @@
 
															 import cv2
														
 
															 import numpy as np
														
 
															 from PIL import Image, ImageEnhance
														
 
															-from scipy.ndimage.morphology import distance_transform_edt
														
 
															+from scipy.ndimage import distance_transform_edt
														
 
															+
														
 
															+
														
 
															+def rescale_size(img_size, target_size):
														
 
															+    scale = min(
														
 
															+        max(target_size) / max(img_size), min(target_size) / min(img_size))
														
 
															+    rescaled_size = [round(i * scale) for i in img_size]
														
 
															+    return rescaled_size, scale
														
 
															 def normalize(im, mean, std):
														
--- a/paddlers/models/ppseg/transforms/transforms.py
+++ b/paddlers/models/ppseg/transforms/transforms.py
--- a/paddlers/models/ppseg/utils/__init__.py
+++ b/paddlers/models/ppseg/utils/__init__.py
@@ -19,5 +19,4 @@ from .env import seg_env, get_sys_env
 
															 from .utils import *
														
 
															 from .timer import TimeAverager, calculate_eta
														
 
															 from . import visualize
														
 
															-from .config_check import config_check
														
 
															 from .ema import EMA
														
--- a/paddlers/models/ppseg/utils/config_check.py
+++ b/paddlers/models/ppseg/utils/config_check.py
@@ -1,59 +0,0 @@
 
															-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
														
 
															-#
														
 
															-# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-# you may not use this file except in compliance with the License.
														
 
															-# You may obtain a copy of the License at
														
 
															-#
														
 
															-#    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-#
														
 
															-# Unless required by applicable law or agreed to in writing, software
														
 
															-# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-# See the License for the specific language governing permissions and
														
 
															-# limitations under the License.
														
 
															-
														
 
															-import numpy as np
														
 
															-
														
 
															-
														
 
															-def config_check(cfg, train_dataset=None, val_dataset=None):
														
 
															-    """
														
 
															-    To check config。
														
 
															-
														
 
															-    Args:
														
 
															-        cfg (paddleseg.cvlibs.Config): An object of paddleseg.cvlibs.Config.
														
 
															-        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
														
 
															-        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
														
 
															-    """
														
 
															-
														
 
															-    num_classes_check(cfg, train_dataset, val_dataset)
														
 
															-
														
 
															-
														
 
															-def num_classes_check(cfg, train_dataset, val_dataset):
														
 
															-    """"
														
 
															-    Check that the num_classes in model, train_dataset and val_dataset is consistent.
														
 
															-    """
														
 
															-    num_classes_set = set()
														
 
															-    if train_dataset and hasattr(train_dataset, 'num_classes'):
														
 
															-        num_classes_set.add(train_dataset.num_classes)
														
 
															-    if val_dataset and hasattr(val_dataset, 'num_classes'):
														
 
															-        num_classes_set.add(val_dataset.num_classes)
														
 
															-    if cfg.dic.get('model', None) and cfg.dic['model'].get('num_classes', None):
														
 
															-        num_classes_set.add(cfg.dic['model'].get('num_classes'))
														
 
															-    if (not cfg.train_dataset) and (not cfg.val_dataset):
														
 
															-        raise ValueError(
														
 
															-            'One of `train_dataset` or `val_dataset should be given, but there are none.'
														
 
															-        )
														
 
															-    if len(num_classes_set) == 0:
														
 
															-        raise ValueError(
														
 
															-            '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
														
 
															-        )
														
 
															-    elif len(num_classes_set) > 1:
														
 
															-        raise ValueError(
														
 
															-            '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
														
 
															-            .format(num_classes_set))
														
 
															-    else:
														
 
															-        num_classes = num_classes_set.pop()
														
 
															-        if train_dataset:
														
 
															-            train_dataset.num_classes = num_classes
														
 
															-        if val_dataset:
														
 
															-            val_dataset.num_classes = num_classes
														
--- a/paddlers/models/ppseg/utils/env/__init__.py
+++ b/paddlers/models/ppseg/utils/env/__init__.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
														
 
															+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License"
														
 
															 # you may not use this file except in compliance with the License.
														
--- a/paddlers/models/ppseg/utils/env/seg_env.py
+++ b/paddlers/models/ppseg/utils/env/seg_env.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
														
 
															+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License"
														
 
															 # you may not use this file except in compliance with the License.
														
--- a/paddlers/models/ppseg/utils/env/sys_env.py
+++ b/paddlers/models/ppseg/utils/env/sys_env.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -20,6 +20,7 @@ import sys
 
															 import cv2
														
 
															 import paddle
														
 
															+import paddlers.models.ppseg as ppseg
														
 
															 IS_WINDOWS = sys.platform == 'win32'
														
@@ -57,8 +58,12 @@ def _get_nvcc_info(cuda_home):
 
															     if cuda_home is not None and os.path.isdir(cuda_home):
														
 
															         try:
														
 
															             nvcc = os.path.join(cuda_home, 'bin/nvcc')
														
 
															-            nvcc = subprocess.check_output(
														
 
															-                "{} -V".format(nvcc), shell=True).decode()
														
 
															+            if not IS_WINDOWS:
														
 
															+                nvcc = subprocess.check_output(
														
 
															+                    "{} -V".format(nvcc), shell=True).decode()
														
 
															+            else:
														
 
															+                nvcc = subprocess.check_output(
														
 
															+                    "\"{}\" -V".format(nvcc), shell=True).decode()
														
 
															             nvcc = nvcc.strip().split('\n')[-1]
														
 
															         except subprocess.SubprocessError:
														
 
															             nvcc = "Not Available"
														
@@ -116,6 +121,7 @@ def get_sys_env():
 
															     except:
														
 
															         pass
														
 
															+    env_info['PaddleSeg'] = ppseg.__version__
														
 
															     env_info['PaddlePaddle'] = paddle.__version__
														
 
															     env_info['OpenCV'] = cv2.__version__
														
--- a/paddlers/models/ppseg/utils/metrics.py
+++ b/paddlers/models/ppseg/utils/metrics.py
@@ -135,37 +135,6 @@ def mean_iou(intersect_area, pred_area, label_area):
 
															     return np.array(class_iou), miou
														
 
															-def fwiou(intersect_area, pred_area, label_area):
														
 
															-    """
														
 
															-    Calculate iou.
														
 
															-
														
 
															-    Args:
														
 
															-        intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
														
 
															-        pred_area (Tensor): The prediction area on all classes.
														
 
															-        label_area (Tensor): The ground truth area on all classes.
														
 
															-
														
 
															-    Returns:
														
 
															-        np.ndarray: iou on all classes.
														
 
															-        float: Frequency Weighted iou of all classes.
														
 
															-        np.ndarray: Frequency of all classes.
														
 
															-    """
														
 
															-    intersect_area = intersect_area.numpy()
														
 
															-    pred_area = pred_area.numpy()
														
 
															-    label_area = label_area.numpy()
														
 
															-    union = pred_area + label_area - intersect_area
														
 
															-    class_iou = []
														
 
															-    for i in range(len(intersect_area)):
														
 
															-        if union[i] == 0:
														
 
															-            iou = 0
														
 
															-        else:
														
 
															-            iou = intersect_area[i] / union[i]
														
 
															-        class_iou.append(iou)
														
 
															-    fw = label_area / np.sum(label_area)
														
 
															-    fwious = np.array(fw) * np.array(class_iou)
														
 
															-    fwiou = np.sum(fwious)
														
 
															-    return np.array(class_iou), fwiou, fw
														
 
															-
														
 
															-
														
 
															 def dice(intersect_area, pred_area, label_area):
														
 
															     """
														
 
															     Calculate DICE.
														
@@ -194,6 +163,7 @@ def dice(intersect_area, pred_area, label_area):
 
															     return np.array(class_dice), mdice
														
 
															+# This is a deprecated function, please use class_measurement function.
														
 
															 def accuracy(intersect_area, pred_area):
														
 
															     """
														
 
															     Calculate accuracy
														
@@ -219,6 +189,38 @@ def accuracy(intersect_area, pred_area):
 
															     return np.array(class_acc), macc
														
 
															+def class_measurement(intersect_area, pred_area, label_area):
														
 
															+    """
														
 
															+    Calculate accuracy, calss precision and class recall.
														
 
															+
														
 
															+    Args:
														
 
															+        intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
														
 
															+        pred_area (Tensor): The prediction area on all classes.
														
 
															+        label_area (Tensor): The ground truth area on all classes.
														
 
															+
														
 
															+    Returns:
														
 
															+        float: The mean accuracy.
														
 
															+        np.ndarray: The precision of all classes.
														
 
															+        np.ndarray: The recall of all classes.
														
 
															+    """
														
 
															+    intersect_area = intersect_area.numpy()
														
 
															+    pred_area = pred_area.numpy()
														
 
															+    label_area = label_area.numpy()
														
 
															+
														
 
															+    mean_acc = np.sum(intersect_area) / np.sum(pred_area)
														
 
															+    class_precision = []
														
 
															+    class_recall = []
														
 
															+    for i in range(len(intersect_area)):
														
 
															+        precision = 0 if pred_area[i] == 0 \
														
 
															+            else intersect_area[i] / pred_area[i]
														
 
															+        recall = 0 if label_area[i] == 0 \
														
 
															+            else intersect_area[i] / label_area[i]
														
 
															+        class_precision.append(precision)
														
 
															+        class_recall.append(recall)
														
 
															+
														
 
															+    return mean_acc, np.array(class_precision), np.array(class_recall)
														
 
															+
														
 
															+
														
 
															 def kappa(intersect_area, pred_area, label_area):
														
 
															     """
														
 
															     Calculate kappa coefficient
														
@@ -231,9 +233,9 @@ def kappa(intersect_area, pred_area, label_area):
 
															     Returns:
														
 
															         float: kappa coefficient.
														
 
															     """
														
 
															-    intersect_area = intersect_area.numpy()
														
 
															-    pred_area = pred_area.numpy()
														
 
															-    label_area = label_area.numpy()
														
 
															+    intersect_area = intersect_area.numpy().astype(np.float64)
														
 
															+    pred_area = pred_area.numpy().astype(np.float64)
														
 
															+    label_area = label_area.numpy().astype(np.float64)
														
 
															     total_area = np.sum(label_area)
														
 
															     po = np.sum(intersect_area) / total_area
														
 
															     pe = np.sum(pred_area * label_area) / (total_area * total_area)
														
--- a/paddlers/models/ppseg/utils/train_profiler.py
+++ b/paddlers/models/ppseg/utils/train_profiler.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
--- a/paddlers/models/ppseg/utils/utils.py
+++ b/paddlers/models/ppseg/utils/utils.py
@@ -160,6 +160,8 @@ def get_image_list(image_path):
 
															             for f in files:
														
 
															                 if '.ipynb_checkpoints' in root:
														
 
															                     continue
														
 
															+                if f.startswith('.'):
														
 
															+                    continue
														
 
															                 if os.path.splitext(f)[-1] in valid_suffix:
														
 
															                     image_list.append(os.path.join(root, f))
														
 
															     else:
														
--- a/paddlers/models/ppseg/utils/visualize.py
+++ b/paddlers/models/ppseg/utils/visualize.py
@@ -63,7 +63,7 @@ def get_pseudo_color_map(pred, color_map=None):
 
															         pred (numpy.ndarray): the origin predicted image.
														
 
															         color_map (list, optional): the palette color map. Default: None,
														
 
															             use paddleseg's default color map.
														
 
															-    
														
 
															+
														
 
															     Returns:
														
 
															         (numpy.ndarray): the pseduo image.
														
 
															     """
														
@@ -103,3 +103,41 @@ def get_color_map_list(num_classes, custom_color=None):
 
															     if custom_color:
														
 
															         color_map[:len(custom_color)] = custom_color
														
 
															     return color_map
														
 
															+
														
 
															+
														
 
															+def paste_images(image_list):
														
 
															+    """
														
 
															+    Paste all image to a image.
														
 
															+    Args:
														
 
															+        image_list (List or Tuple): The images to be pasted and their size are the same.
														
 
															+    Returns:
														
 
															+        result_img (PIL.Image): The pasted image.
														
 
															+    """
														
 
															+    assert isinstance(image_list,
														
 
															+                      (list, tuple)), "image_list should be a list or tuple"
														
 
															+    assert len(
														
 
															+        image_list) > 1, "The length of image_list should be greater than 1"
														
 
															+
														
 
															+    pil_img_list = []
														
 
															+    for img in image_list:
														
 
															+        if isinstance(img, str):
														
 
															+            assert os.path.exists(img), "The image is not existed: {}".format(
														
 
															+                img)
														
 
															+            img = PILImage.open(img)
														
 
															+            img = np.array(img)
														
 
															+        elif isinstance(img, np.ndarray):
														
 
															+            img = PILImage.fromarray(img)
														
 
															+        pil_img_list.append(img)
														
 
															+
														
 
															+    sample_img = pil_img_list[0]
														
 
															+    size = sample_img.size
														
 
															+    for img in pil_img_list:
														
 
															+        assert size == img.size, "The image size in image_list should be the same"
														
 
															+
														
 
															+    width, height = sample_img.size
														
 
															+    result_img = PILImage.new(sample_img.mode,
														
 
															+                              (width * len(pil_img_list), height))
														
 
															+    for i, img in enumerate(pil_img_list):
														
 
															+        result_img.paste(img, box=(width * i, 0))
														
 
															+
														
 
															+    return result_img
														
--- a/paddlers/rs_models/cd/losses/fccdn_loss.py
+++ b/paddlers/rs_models/cd/losses/fccdn_loss.py
@@ -43,42 +43,13 @@ class DiceLoss(nn.Layer):
 
															         return self.soft_dice_loss(y_pred.astype(paddle.float32), y_true)
														
 
															-class MultiClassDiceLoss(nn.Layer):
														
 
															-    def __init__(
														
 
															-            self,
														
 
															-            weight,
														
 
															-            batch=True,
														
 
															-            ignore_index=-1,
														
 
															-            do_softmax=False,
														
 
															-            **kwargs, ):
														
 
															-        super(MultiClassDiceLoss, self).__init__()
														
 
															-        self.ignore_index = ignore_index
														
 
															-        self.weight = weight
														
 
															-        self.do_softmax = do_softmax
														
 
															-        self.binary_diceloss = DiceLoss(batch)
														
 
															-
														
 
															-    def forward(self, y_pred, y_true):
														
 
															-        if self.do_softmax:
														
 
															-            y_pred = paddle.nn.functional.softmax(y_pred, axis=1)
														
 
															-        y_true = F.one_hot(y_true.long(), y_pred.shape[1]).permute(0, 3, 1, 2)
														
 
															-        total_loss = 0.0
														
 
															-        tmp_i = 0.0
														
 
															-        for i in range(y_pred.shape[1]):
														
 
															-            if i != self.ignore_index:
														
 
															-                diceloss = self.binary_diceloss(y_pred[:, i, :, :],
														
 
															-                                                y_true[:, i, :, :])
														
 
															-                total_loss += paddle.multiply(diceloss, self.weight[i])
														
 
															-                tmp_i += 1.0
														
 
															-        return total_loss / tmp_i
														
 
															-
														
 
															-
														
 
															 class DiceBCELoss(nn.Layer):
														
 
															     """Binary change detection task loss"""
														
 
															     def __init__(self):
														
 
															         super(DiceBCELoss, self).__init__()
														
 
															         self.bce_loss = nn.BCELoss()
														
 
															-        self.binnary_dice = DiceLoss()
														
 
															+        self.binary_dice = DiceLoss()
														
 
															     def forward(self, scores, labels, do_sigmoid=True):
														
 
															         if len(scores.shape) > 3:
														
@@ -87,29 +58,11 @@ class DiceBCELoss(nn.Layer):
 
															             labels = labels.squeeze(1)
														
 
															         if do_sigmoid:
														
 
															             scores = paddle.nn.functional.sigmoid(scores.clone())
														
 
															-        diceloss = self.binnary_dice(scores, labels)
														
 
															+        diceloss = self.binary_dice(scores, labels)
														
 
															         bceloss = self.bce_loss(scores, labels)
														
 
															         return diceloss + bceloss
														
 
															-class McDiceBCELoss(nn.Layer):
														
 
															-    """Multi-class change detection task loss"""
														
 
															-
														
 
															-    def __init__(self, weight, do_sigmoid=True):
														
 
															-        super(McDiceBCELoss, self).__init__()
														
 
															-        self.ce_loss = nn.CrossEntropyLoss(weight)
														
 
															-        self.dice = MultiClassDiceLoss(weight, do_sigmoid)
														
 
															-
														
 
															-    def forward(self, scores, labels):
														
 
															-        if len(scores.shape) < 4:
														
 
															-            scores = scores.unsqueeze(1)
														
 
															-        if len(labels.shape) < 4:
														
 
															-            labels = labels.unsqueeze(1)
														
 
															-        diceloss = self.dice(scores, labels)
														
 
															-        bceloss = self.ce_loss(scores, labels)
														
 
															-        return diceloss + bceloss
														
 
															-
														
 
															-
														
 
															 def fccdn_ssl_loss(logits_list, labels):
														
 
															     """
														
 
															     Self-supervised learning loss for change detection.
														
@@ -160,11 +113,11 @@ def fccdn_ssl_loss(logits_list, labels):
 
															     # Seg loss
														
 
															     labels_downsample = labels_downsample.astype(paddle.float32)
														
 
															-    loss_aux = 0.2 * criterion_ssl(out1, pred_seg_post_tmp1, False)
														
 
															-    loss_aux += 0.2 * criterion_ssl(out2, pred_seg_pre_tmp1, False)
														
 
															-    loss_aux += 0.2 * criterion_ssl(
														
 
															-        out3, labels_downsample - pred_seg_post_tmp2, False)
														
 
															-    loss_aux += 0.2 * criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
														
 
															-                                    False)
														
 
															+    loss_aux = criterion_ssl(out1, pred_seg_post_tmp1, False)
														
 
															+    loss_aux += criterion_ssl(out2, pred_seg_pre_tmp1, False)
														
 
															+    loss_aux += criterion_ssl(out3, labels_downsample - pred_seg_post_tmp2,
														
 
															+                              False)
														
 
															+    loss_aux += criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
														
 
															+                              False)
														
 
															     return loss_aux
														
--- a/paddlers/rs_models/clas/__init__.py
+++ b/paddlers/rs_models/clas/__init__.py
@@ -12,4 +12,4 @@
 
															 # See the License for the specific language governing permissions and

														
 
															 # limitations under the License.

														
 
															-from .condensenet_v2 import CondenseNetV2_a, CondenseNetV2_b, CondenseNetV2_c

														
 
															+from .condensenetv2 import CondenseNetV2_A, CondenseNetV2_B, CondenseNetV2_C

														
--- a/paddlers/rs_models/clas/condensenet_v2.py
+++ b/paddlers/rs_models/clas/condensenet_v2.py
@@ -1,442 +1,442 @@
 
															-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

														
 
															-#

														
 
															-# Licensed under the Apache License, Version 2.0 (the "License");

														
 
															-# you may not use this file except in compliance with the License.

														
 
															-# You may obtain a copy of the License at

														
 
															-#

														
 
															-#     http://www.apache.org/licenses/LICENSE-2.0

														
 
															-#

														
 
															-# Unless required by applicable law or agreed to in writing, software

														
 
															-# distributed under the License is distributed on an "AS IS" BASIS,

														
 
															-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

														
 
															-# See the License for the specific language governing permissions and

														
 
															-# limitations under the License.

														
 
															-"""

														
 
															-This code is based on https://github.com/AgentMaker/Paddle-Image-Models

														
 
															-Ths copyright of AgentMaker/Paddle-Image-Models is as follows:

														
 
															-Apache License [see LICENSE for details]

														
 
															-"""

														
 
															-

														
 
															-import paddle

														
 
															-import paddle.nn as nn

														
 
															-

														
 
															-__all__ = ["CondenseNetV2_a", "CondenseNetV2_b", "CondenseNetV2_c"]

														
 
															-

														
 
															-

														
 
															-class SELayer(nn.Layer):

														
 
															-    def __init__(self, inplanes, reduction=16):

														
 
															-        super(SELayer, self).__init__()

														
 
															-        self.avg_pool = nn.AdaptiveAvgPool2D(1)

														
 
															-        self.fc = nn.Sequential(

														
 
															-            nn.Linear(

														
 
															-                inplanes, inplanes // reduction, bias_attr=False),

														
 
															-            nn.ReLU(),

														
 
															-            nn.Linear(

														
 
															-                inplanes // reduction, inplanes, bias_attr=False),

														
 
															-            nn.Sigmoid(), )

														
 
															-

														
 
															-    def forward(self, x):

														
 
															-        b, c, _, _ = x.shape

														
 
															-        y = self.avg_pool(x).reshape((b, c))

														
 
															-        y = self.fc(y).reshape((b, c, 1, 1))

														
 
															-        return x * paddle.expand(y, shape=x.shape)

														
 
															-

														
 
															-

														
 
															-class HS(nn.Layer):

														
 
															-    def __init__(self):

														
 
															-        super(HS, self).__init__()

														
 
															-        self.relu6 = nn.ReLU6()

														
 
															-

														
 
															-    def forward(self, inputs):

														
 
															-        return inputs * self.relu6(inputs + 3) / 6

														
 
															-

														
 
															-

														
 
															-class Conv(nn.Sequential):

														
 
															-    def __init__(

														
 
															-            self,

														
 
															-            in_channels,

														
 
															-            out_channels,

														
 
															-            kernel_size,

														
 
															-            stride=1,

														
 
															-            padding=0,

														
 
															-            groups=1,

														
 
															-            activation="ReLU",

														
 
															-            bn_momentum=0.9, ):

														
 
															-        super(Conv, self).__init__()

														
 
															-        self.add_sublayer(

														
 
															-            "norm", nn.BatchNorm2D(

														
 
															-                in_channels, momentum=bn_momentum))

														
 
															-        if activation == "ReLU":

														
 
															-            self.add_sublayer("activation", nn.ReLU())

														
 
															-        elif activation == "HS":

														
 
															-            self.add_sublayer("activation", HS())

														
 
															-        else:

														
 
															-            raise NotImplementedError

														
 
															-        self.add_sublayer(

														
 
															-            "conv",

														
 
															-            nn.Conv2D(

														
 
															-                in_channels,

														
 
															-                out_channels,

														
 
															-                kernel_size=kernel_size,

														
 
															-                stride=stride,

														
 
															-                padding=padding,

														
 
															-                bias_attr=False,

														
 
															-                groups=groups, ), )

														
 
															-

														
 
															-

														
 
															-def ShuffleLayer(x, groups):

														
 
															-    batchsize, num_channels, height, width = x.shape

														
 
															-    channels_per_group = num_channels // groups

														
 
															-    # Reshape

														
 
															-    x = x.reshape((batchsize, groups, channels_per_group, height, width))

														
 
															-    # Transpose

														
 
															-    x = x.transpose((0, 2, 1, 3, 4))

														
 
															-    # Reshape

														
 
															-    x = x.reshape((batchsize, groups * channels_per_group, height, width))

														
 
															-    return x

														
 
															-

														
 
															-

														
 
															-def ShuffleLayerTrans(x, groups):

														
 
															-    batchsize, num_channels, height, width = x.shape

														
 
															-    channels_per_group = num_channels // groups

														
 
															-    # Reshape

														
 
															-    x = x.reshape((batchsize, channels_per_group, groups, height, width))

														
 
															-    # Transpose

														
 
															-    x = x.transpose((0, 2, 1, 3, 4))

														
 
															-    # Reshape

														
 
															-    x = x.reshape((batchsize, channels_per_group * groups, height, width))

														
 
															-    return x

														
 
															-

														
 
															-

														
 
															-class CondenseLGC(nn.Layer):

														
 
															-    def __init__(

														
 
															-            self,

														
 
															-            in_channels,

														
 
															-            out_channels,

														
 
															-            kernel_size,

														
 
															-            stride=1,

														
 
															-            padding=0,

														
 
															-            groups=1,

														
 
															-            activation="ReLU", ):

														
 
															-        super(CondenseLGC, self).__init__()

														
 
															-        self.in_channels = in_channels

														
 
															-        self.out_channels = out_channels

														
 
															-        self.groups = groups

														
 
															-        self.norm = nn.BatchNorm2D(self.in_channels)

														
 
															-        if activation == "ReLU":

														
 
															-            self.activation = nn.ReLU()

														
 
															-        elif activation == "HS":

														
 
															-            self.activation = HS()

														
 
															-        else:

														
 
															-            raise NotImplementedError

														
 
															-        self.conv = nn.Conv2D(

														
 
															-            self.in_channels,

														
 
															-            self.out_channels,

														
 
															-            kernel_size=kernel_size,

														
 
															-            stride=stride,

														
 
															-            padding=padding,

														
 
															-            groups=self.groups,

														
 
															-            bias_attr=False, )

														
 
															-        self.register_buffer(

														
 
															-            "index", paddle.zeros(

														
 
															-                (self.in_channels, ), dtype="int64"))

														
 
															-

														
 
															-    def forward(self, x):

														
 
															-        x = paddle.index_select(x, self.index, axis=1)

														
 
															-        x = self.norm(x)

														
 
															-        x = self.activation(x)

														
 
															-        x = self.conv(x)

														
 
															-        x = ShuffleLayer(x, self.groups)

														
 
															-        return x

														
 
															-

														
 
															-

														
 
															-class CondenseSFR(nn.Layer):

														
 
															-    def __init__(

														
 
															-            self,

														
 
															-            in_channels,

														
 
															-            out_channels,

														
 
															-            kernel_size,

														
 
															-            stride=1,

														
 
															-            padding=0,

														
 
															-            groups=1,

														
 
															-            activation="ReLU", ):

														
 
															-        super(CondenseSFR, self).__init__()

														
 
															-        self.in_channels = in_channels

														
 
															-        self.out_channels = out_channels

														
 
															-        self.groups = groups

														
 
															-        self.norm = nn.BatchNorm2D(self.in_channels)

														
 
															-        if activation == "ReLU":

														
 
															-            self.activation = nn.ReLU()

														
 
															-        elif activation == "HS":

														
 
															-            self.activation = HS()

														
 
															-        else:

														
 
															-            raise NotImplementedError

														
 
															-        self.conv = nn.Conv2D(

														
 
															-            self.in_channels,

														
 
															-            self.out_channels,

														
 
															-            kernel_size=kernel_size,

														
 
															-            padding=padding,

														
 
															-            groups=self.groups,

														
 
															-            bias_attr=False,

														
 
															-            stride=stride, )

														
 
															-        self.register_buffer("index",

														
 
															-                             paddle.zeros(

														
 
															-                                 (self.out_channels, self.out_channels)))

														
 
															-

														
 
															-    def forward(self, x):

														
 
															-        x = self.norm(x)

														
 
															-        x = self.activation(x)

														
 
															-        x = ShuffleLayerTrans(x, self.groups)

														
 
															-        x = self.conv(x)  # SIZE: N, C, H, W

														
 
															-        N, C, H, W = x.shape

														
 
															-        x = x.reshape((N, C, H * W))

														
 
															-        x = x.transpose((0, 2, 1))  # SIZE: N, HW, C

														
 
															-        # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C

														
 
															-        x = paddle.matmul(x, self.index)

														
 
															-        x = x.transpose((0, 2, 1))  # SIZE: N, C, HW

														
 
															-        x = x.reshape((N, C, H, W))  # SIZE: N, C, HW

														
 
															-        return x

														
 
															-

														
 
															-

														
 
															-class _SFR_DenseLayer(nn.Layer):

														
 
															-    def __init__(

														
 
															-            self,

														
 
															-            in_channels,

														
 
															-            growth_rate,

														
 
															-            group_1x1,

														
 
															-            group_3x3,

														
 
															-            group_trans,

														
 
															-            bottleneck,

														
 
															-            activation,

														
 
															-            use_se=False, ):

														
 
															-        super(_SFR_DenseLayer, self).__init__()

														
 
															-        self.group_1x1 = group_1x1

														
 
															-        self.group_3x3 = group_3x3

														
 
															-        self.group_trans = group_trans

														
 
															-        self.use_se = use_se

														
 
															-        # 1x1 conv i --> b*k

														
 
															-        self.conv_1 = CondenseLGC(

														
 
															-            in_channels,

														
 
															-            bottleneck * growth_rate,

														
 
															-            kernel_size=1,

														
 
															-            groups=self.group_1x1,

														
 
															-            activation=activation, )

														
 
															-        # 3x3 conv b*k --> k

														
 
															-        self.conv_2 = Conv(

														
 
															-            bottleneck * growth_rate,

														
 
															-            growth_rate,

														
 
															-            kernel_size=3,

														
 
															-            padding=1,

														
 
															-            groups=self.group_3x3,

														
 
															-            activation=activation, )

														
 
															-        # 1x1 res conv k(8-16-32)--> i (k*l)

														
 
															-        self.sfr = CondenseSFR(

														
 
															-            growth_rate,

														
 
															-            in_channels,

														
 
															-            kernel_size=1,

														
 
															-            groups=self.group_trans,

														
 
															-            activation=activation, )

														
 
															-        if self.use_se:

														
 
															-            self.se = SELayer(inplanes=growth_rate, reduction=1)

														
 
															-

														
 
															-    def forward(self, x):

														
 
															-        x_ = x

														
 
															-        x = self.conv_1(x)

														
 
															-        x = self.conv_2(x)

														
 
															-        if self.use_se:

														
 
															-            x = self.se(x)

														
 
															-        sfr_feature = self.sfr(x)

														
 
															-        y = x_ + sfr_feature

														
 
															-        return paddle.concat([y, x], 1)

														
 
															-

														
 
															-

														
 
															-class _SFR_DenseBlock(nn.Sequential):

														
 
															-    def __init__(

														
 
															-            self,

														
 
															-            num_layers,

														
 
															-            in_channels,

														
 
															-            growth_rate,

														
 
															-            group_1x1,

														
 
															-            group_3x3,

														
 
															-            group_trans,

														
 
															-            bottleneck,

														
 
															-            activation,

														
 
															-            use_se, ):

														
 
															-        super(_SFR_DenseBlock, self).__init__()

														
 
															-        for i in range(num_layers):

														
 
															-            layer = _SFR_DenseLayer(

														
 
															-                in_channels + i * growth_rate,

														
 
															-                growth_rate,

														
 
															-                group_1x1,

														
 
															-                group_3x3,

														
 
															-                group_trans,

														
 
															-                bottleneck,

														
 
															-                activation,

														
 
															-                use_se, )

														
 
															-            self.add_sublayer("denselayer_%d" % (i + 1), layer)

														
 
															-

														
 
															-

														
 
															-class _Transition(nn.Layer):

														
 
															-    def __init__(self):

														
 
															-        super(_Transition, self).__init__()

														
 
															-        self.pool = nn.AvgPool2D(kernel_size=2, stride=2)

														
 
															-

														
 
															-    def forward(self, x):

														
 
															-        x = self.pool(x)

														
 
															-        return x

														
 
															-

														
 
															-

														
 
															-class CondenseNetV2(nn.Layer):

														
 
															-    def __init__(

														
 
															-            self,

														
 
															-            stages,

														
 
															-            growth,

														
 
															-            HS_start_block,

														
 
															-            SE_start_block,

														
 
															-            fc_channel,

														
 
															-            group_1x1,

														
 
															-            group_3x3,

														
 
															-            group_trans,

														
 
															-            bottleneck,

														
 
															-            last_se_reduction,

														
 
															-            in_channels=3,

														
 
															-            class_num=1000, ):

														
 
															-        super(CondenseNetV2, self).__init__()

														
 
															-        self.stages = stages

														
 
															-        self.growth = growth

														
 
															-        self.in_channels = in_channels

														
 
															-        self.class_num = class_num

														
 
															-        self.last_se_reduction = last_se_reduction

														
 
															-        assert len(self.stages) == len(self.growth)

														
 
															-        self.progress = 0.0

														
 
															-

														
 
															-        self.init_stride = 2

														
 
															-        self.pool_size = 7

														
 
															-

														
 
															-        self.features = nn.Sequential()

														
 
															-        # Initial nChannels should be 3

														
 
															-        self.num_features = 2 * self.growth[0]

														
 
															-        # Dense-block 1 (224x224)

														
 
															-        self.features.add_sublayer(

														
 
															-            "init_conv",

														
 
															-            nn.Conv2D(

														
 
															-                in_channels,

														
 
															-                self.num_features,

														
 
															-                kernel_size=3,

														
 
															-                stride=self.init_stride,

														
 
															-                padding=1,

														
 
															-                bias_attr=False, ), )

														
 
															-        for i in range(len(self.stages)):

														
 
															-            activation = "HS" if i >= HS_start_block else "ReLU"

														
 
															-            use_se = True if i >= SE_start_block else False

														
 
															-            # Dense-block i

														
 
															-            self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,

														
 
															-                           activation, use_se)

														
 
															-

														
 
															-        self.fc = nn.Linear(self.num_features, fc_channel)

														
 
															-        self.fc_act = HS()

														
 
															-

														
 
															-        # Classifier layer

														
 
															-        if class_num > 0:

														
 
															-            self.classifier = nn.Linear(fc_channel, class_num)

														
 
															-        self._initialize()

														
 
															-

														
 
															-    def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,

														
 
															-                  activation, use_se):

														
 
															-        # Check if ith is the last one

														
 
															-        last = i == len(self.stages) - 1

														
 
															-        block = _SFR_DenseBlock(

														
 
															-            num_layers=self.stages[i],

														
 
															-            in_channels=self.num_features,

														
 
															-            growth_rate=self.growth[i],

														
 
															-            group_1x1=group_1x1,

														
 
															-            group_3x3=group_3x3,

														
 
															-            group_trans=group_trans,

														
 
															-            bottleneck=bottleneck,

														
 
															-            activation=activation,

														
 
															-            use_se=use_se, )

														
 
															-        self.features.add_sublayer("denseblock_%d" % (i + 1), block)

														
 
															-        self.num_features += self.stages[i] * self.growth[i]

														
 
															-        if not last:

														
 
															-            trans = _Transition()

														
 
															-            self.features.add_sublayer("transition_%d" % (i + 1), trans)

														
 
															-        else:

														
 
															-            self.features.add_sublayer("norm_last",

														
 
															-                                       nn.BatchNorm2D(self.num_features))

														
 
															-            self.features.add_sublayer("relu_last", nn.ReLU())

														
 
															-            self.features.add_sublayer("pool_last",

														
 
															-                                       nn.AvgPool2D(self.pool_size))

														
 
															-            # if useSE:

														
 
															-            self.features.add_sublayer(

														
 
															-                "se_last",

														
 
															-                SELayer(

														
 
															-                    self.num_features, reduction=self.last_se_reduction))

														
 
															-

														
 
															-    def forward(self, x):

														
 
															-        features = self.features(x)

														
 
															-        out = features.reshape((features.shape[0], features.shape[1] *

														
 
															-                                features.shape[2] * features.shape[3]))

														
 
															-        out = self.fc(out)

														
 
															-        out = self.fc_act(out)

														
 
															-

														
 
															-        if self.class_num > 0:

														
 
															-            out = self.classifier(out)

														
 
															-

														
 
															-        return out

														
 
															-

														
 
															-    def _initialize(self):

														
 
															-        # Initialize

														
 
															-        for m in self.sublayers():

														
 
															-            if isinstance(m, nn.Conv2D):

														
 
															-                nn.initializer.KaimingNormal()(m.weight)

														
 
															-            elif isinstance(m, nn.BatchNorm2D):

														
 
															-                nn.initializer.Constant(value=1.0)(m.weight)

														
 
															-                nn.initializer.Constant(value=0.0)(m.bias)

														
 
															-

														
 
															-

														
 
															-def CondenseNetV2_a(**kwargs):

														
 
															-    model = CondenseNetV2(

														
 
															-        stages=[1, 1, 4, 6, 8],

														
 
															-        growth=[8, 8, 16, 32, 64],

														
 
															-        HS_start_block=2,

														
 
															-        SE_start_block=3,

														
 
															-        fc_channel=828,

														
 
															-        group_1x1=8,

														
 
															-        group_3x3=8,

														
 
															-        group_trans=8,

														
 
															-        bottleneck=4,

														
 
															-        last_se_reduction=16,

														
 
															-        **kwargs)

														
 
															-    return model

														
 
															-

														
 
															-

														
 
															-def CondenseNetV2_b(**kwargs):

														
 
															-    model = CondenseNetV2(

														
 
															-        stages=[2, 4, 6, 8, 6],

														
 
															-        growth=[6, 12, 24, 48, 96],

														
 
															-        HS_start_block=2,

														
 
															-        SE_start_block=3,

														
 
															-        fc_channel=1024,

														
 
															-        group_1x1=6,

														
 
															-        group_3x3=6,

														
 
															-        group_trans=6,

														
 
															-        bottleneck=4,

														
 
															-        last_se_reduction=16,

														
 
															-        **kwargs)

														
 
															-    return model

														
 
															-

														
 
															-

														
 
															-def CondenseNetV2_c(**kwargs):

														
 
															-    model = CondenseNetV2(

														
 
															-        stages=[4, 6, 8, 10, 8],

														
 
															-        growth=[8, 16, 32, 64, 128],

														
 
															-        HS_start_block=2,

														
 
															-        SE_start_block=3,

														
 
															-        fc_channel=1024,

														
 
															-        group_1x1=8,

														
 
															-        group_3x3=8,

														
 
															-        group_trans=8,

														
 
															-        bottleneck=4,

														
 
															-        last_se_reduction=16,

														
 
															-        **kwargs)

														
 
															-    return model

														
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+"""
														
 
															+This code is based on https://github.com/AgentMaker/Paddle-Image-Models
														
 
															+Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
														
 
															+Apache License [see LICENSE for details]
														
 
															+"""
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+
														
 
															+__all__ = ["CondenseNetV2_A", "CondenseNetV2_B", "CondenseNetV2_C"]
														
 
															+
														
 
															+
														
 
															+class SELayer(nn.Layer):
														
 
															+    def __init__(self, inplanes, reduction=16):
														
 
															+        super(SELayer, self).__init__()
														
 
															+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
														
 
															+        self.fc = nn.Sequential(
														
 
															+            nn.Linear(
														
 
															+                inplanes, inplanes // reduction, bias_attr=False),
														
 
															+            nn.ReLU(),
														
 
															+            nn.Linear(
														
 
															+                inplanes // reduction, inplanes, bias_attr=False),
														
 
															+            nn.Sigmoid(), )
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        b, c, _, _ = x.shape
														
 
															+        y = self.avg_pool(x).reshape((b, c))
														
 
															+        y = self.fc(y).reshape((b, c, 1, 1))
														
 
															+        return x * paddle.expand(y, shape=x.shape)
														
 
															+
														
 
															+
														
 
															+class HS(nn.Layer):
														
 
															+    def __init__(self):
														
 
															+        super(HS, self).__init__()
														
 
															+        self.relu6 = nn.ReLU6()
														
 
															+
														
 
															+    def forward(self, inputs):
														
 
															+        return inputs * self.relu6(inputs + 3) / 6
														
 
															+
														
 
															+
														
 
															+class Conv(nn.Sequential):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            in_channels,
														
 
															+            out_channels,
														
 
															+            kernel_size,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            groups=1,
														
 
															+            activation="ReLU",
														
 
															+            bn_momentum=0.9, ):
														
 
															+        super(Conv, self).__init__()
														
 
															+        self.add_sublayer(
														
 
															+            "norm", nn.BatchNorm2D(
														
 
															+                in_channels, momentum=bn_momentum))
														
 
															+        if activation == "ReLU":
														
 
															+            self.add_sublayer("activation", nn.ReLU())
														
 
															+        elif activation == "HS":
														
 
															+            self.add_sublayer("activation", HS())
														
 
															+        else:
														
 
															+            raise NotImplementedError
														
 
															+        self.add_sublayer(
														
 
															+            "conv",
														
 
															+            nn.Conv2D(
														
 
															+                in_channels,
														
 
															+                out_channels,
														
 
															+                kernel_size=kernel_size,
														
 
															+                stride=stride,
														
 
															+                padding=padding,
														
 
															+                bias_attr=False,
														
 
															+                groups=groups, ), )
														
 
															+
														
 
															+
														
 
															+def ShuffleLayer(x, groups):
														
 
															+    batchsize, num_channels, height, width = x.shape
														
 
															+    channels_per_group = num_channels // groups
														
 
															+    # Reshape
														
 
															+    x = x.reshape((batchsize, groups, channels_per_group, height, width))
														
 
															+    # Transpose
														
 
															+    x = x.transpose((0, 2, 1, 3, 4))
														
 
															+    # Reshape
														
 
															+    x = x.reshape((batchsize, groups * channels_per_group, height, width))
														
 
															+    return x
														
 
															+
														
 
															+
														
 
															+def ShuffleLayerTrans(x, groups):
														
 
															+    batchsize, num_channels, height, width = x.shape
														
 
															+    channels_per_group = num_channels // groups
														
 
															+    # Reshape
														
 
															+    x = x.reshape((batchsize, channels_per_group, groups, height, width))
														
 
															+    # Transpose
														
 
															+    x = x.transpose((0, 2, 1, 3, 4))
														
 
															+    # Reshape
														
 
															+    x = x.reshape((batchsize, channels_per_group * groups, height, width))
														
 
															+    return x
														
 
															+
														
 
															+
														
 
															+class CondenseLGC(nn.Layer):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            in_channels,
														
 
															+            out_channels,
														
 
															+            kernel_size,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            groups=1,
														
 
															+            activation="ReLU", ):
														
 
															+        super(CondenseLGC, self).__init__()
														
 
															+        self.in_channels = in_channels
														
 
															+        self.out_channels = out_channels
														
 
															+        self.groups = groups
														
 
															+        self.norm = nn.BatchNorm2D(self.in_channels)
														
 
															+        if activation == "ReLU":
														
 
															+            self.activation = nn.ReLU()
														
 
															+        elif activation == "HS":
														
 
															+            self.activation = HS()
														
 
															+        else:
														
 
															+            raise NotImplementedError
														
 
															+        self.conv = nn.Conv2D(
														
 
															+            self.in_channels,
														
 
															+            self.out_channels,
														
 
															+            kernel_size=kernel_size,
														
 
															+            stride=stride,
														
 
															+            padding=padding,
														
 
															+            groups=self.groups,
														
 
															+            bias_attr=False, )
														
 
															+        self.register_buffer(
														
 
															+            "index", paddle.zeros(
														
 
															+                (self.in_channels, ), dtype="int64"))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = paddle.index_select(x, self.index, axis=1)
														
 
															+        x = self.norm(x)
														
 
															+        x = self.activation(x)
														
 
															+        x = self.conv(x)
														
 
															+        x = ShuffleLayer(x, self.groups)
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+class CondenseSFR(nn.Layer):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            in_channels,
														
 
															+            out_channels,
														
 
															+            kernel_size,
														
 
															+            stride=1,
														
 
															+            padding=0,
														
 
															+            groups=1,
														
 
															+            activation="ReLU", ):
														
 
															+        super(CondenseSFR, self).__init__()
														
 
															+        self.in_channels = in_channels
														
 
															+        self.out_channels = out_channels
														
 
															+        self.groups = groups
														
 
															+        self.norm = nn.BatchNorm2D(self.in_channels)
														
 
															+        if activation == "ReLU":
														
 
															+            self.activation = nn.ReLU()
														
 
															+        elif activation == "HS":
														
 
															+            self.activation = HS()
														
 
															+        else:
														
 
															+            raise NotImplementedError
														
 
															+        self.conv = nn.Conv2D(
														
 
															+            self.in_channels,
														
 
															+            self.out_channels,
														
 
															+            kernel_size=kernel_size,
														
 
															+            padding=padding,
														
 
															+            groups=self.groups,
														
 
															+            bias_attr=False,
														
 
															+            stride=stride, )
														
 
															+        self.register_buffer("index",
														
 
															+                             paddle.zeros(
														
 
															+                                 (self.out_channels, self.out_channels)))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.norm(x)
														
 
															+        x = self.activation(x)
														
 
															+        x = ShuffleLayerTrans(x, self.groups)
														
 
															+        x = self.conv(x)  # SIZE: N, C, H, W
														
 
															+        N, C, H, W = x.shape
														
 
															+        x = x.reshape((N, C, H * W))
														
 
															+        x = x.transpose((0, 2, 1))  # SIZE: N, HW, C
														
 
															+        # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
														
 
															+        x = paddle.matmul(x, self.index)
														
 
															+        x = x.transpose((0, 2, 1))  # SIZE: N, C, HW
														
 
															+        x = x.reshape((N, C, H, W))  # SIZE: N, C, HW
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+class _SFR_DenseLayer(nn.Layer):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            in_channels,
														
 
															+            growth_rate,
														
 
															+            group_1x1,
														
 
															+            group_3x3,
														
 
															+            group_trans,
														
 
															+            bottleneck,
														
 
															+            activation,
														
 
															+            use_se=False, ):
														
 
															+        super(_SFR_DenseLayer, self).__init__()
														
 
															+        self.group_1x1 = group_1x1
														
 
															+        self.group_3x3 = group_3x3
														
 
															+        self.group_trans = group_trans
														
 
															+        self.use_se = use_se
														
 
															+        # 1x1 conv i --> b*k
														
 
															+        self.conv_1 = CondenseLGC(
														
 
															+            in_channels,
														
 
															+            bottleneck * growth_rate,
														
 
															+            kernel_size=1,
														
 
															+            groups=self.group_1x1,
														
 
															+            activation=activation, )
														
 
															+        # 3x3 conv b*k --> k
														
 
															+        self.conv_2 = Conv(
														
 
															+            bottleneck * growth_rate,
														
 
															+            growth_rate,
														
 
															+            kernel_size=3,
														
 
															+            padding=1,
														
 
															+            groups=self.group_3x3,
														
 
															+            activation=activation, )
														
 
															+        # 1x1 res conv k(8-16-32)--> i (k*l)
														
 
															+        self.sfr = CondenseSFR(
														
 
															+            growth_rate,
														
 
															+            in_channels,
														
 
															+            kernel_size=1,
														
 
															+            groups=self.group_trans,
														
 
															+            activation=activation, )
														
 
															+        if self.use_se:
														
 
															+            self.se = SELayer(inplanes=growth_rate, reduction=1)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x_ = x
														
 
															+        x = self.conv_1(x)
														
 
															+        x = self.conv_2(x)
														
 
															+        if self.use_se:
														
 
															+            x = self.se(x)
														
 
															+        sfr_feature = self.sfr(x)
														
 
															+        y = x_ + sfr_feature
														
 
															+        return paddle.concat([y, x], 1)
														
 
															+
														
 
															+
														
 
															+class _SFR_DenseBlock(nn.Sequential):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            num_layers,
														
 
															+            in_channels,
														
 
															+            growth_rate,
														
 
															+            group_1x1,
														
 
															+            group_3x3,
														
 
															+            group_trans,
														
 
															+            bottleneck,
														
 
															+            activation,
														
 
															+            use_se, ):
														
 
															+        super(_SFR_DenseBlock, self).__init__()
														
 
															+        for i in range(num_layers):
														
 
															+            layer = _SFR_DenseLayer(
														
 
															+                in_channels + i * growth_rate,
														
 
															+                growth_rate,
														
 
															+                group_1x1,
														
 
															+                group_3x3,
														
 
															+                group_trans,
														
 
															+                bottleneck,
														
 
															+                activation,
														
 
															+                use_se, )
														
 
															+            self.add_sublayer("denselayer_%d" % (i + 1), layer)
														
 
															+
														
 
															+
														
 
															+class _Transition(nn.Layer):
														
 
															+    def __init__(self):
														
 
															+        super(_Transition, self).__init__()
														
 
															+        self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.pool(x)
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+class CondenseNetV2(nn.Layer):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            stages,
														
 
															+            growth,
														
 
															+            HS_start_block,
														
 
															+            SE_start_block,
														
 
															+            fc_channel,
														
 
															+            group_1x1,
														
 
															+            group_3x3,
														
 
															+            group_trans,
														
 
															+            bottleneck,
														
 
															+            last_se_reduction,
														
 
															+            in_channels=3,
														
 
															+            class_num=1000, ):
														
 
															+        super(CondenseNetV2, self).__init__()
														
 
															+        self.stages = stages
														
 
															+        self.growth = growth
														
 
															+        self.in_channels = in_channels
														
 
															+        self.class_num = class_num
														
 
															+        self.last_se_reduction = last_se_reduction
														
 
															+        assert len(self.stages) == len(self.growth)
														
 
															+        self.progress = 0.0
														
 
															+
														
 
															+        self.init_stride = 2
														
 
															+        self.pool_size = 7
														
 
															+
														
 
															+        self.features = nn.Sequential()
														
 
															+        # Initial nChannels should be 3
														
 
															+        self.num_features = 2 * self.growth[0]
														
 
															+        # Dense-block 1 (224x224)
														
 
															+        self.features.add_sublayer(
														
 
															+            "init_conv",
														
 
															+            nn.Conv2D(
														
 
															+                in_channels,
														
 
															+                self.num_features,
														
 
															+                kernel_size=3,
														
 
															+                stride=self.init_stride,
														
 
															+                padding=1,
														
 
															+                bias_attr=False, ), )
														
 
															+        for i in range(len(self.stages)):
														
 
															+            activation = "HS" if i >= HS_start_block else "ReLU"
														
 
															+            use_se = True if i >= SE_start_block else False
														
 
															+            # Dense-block i
														
 
															+            self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
														
 
															+                           activation, use_se)
														
 
															+
														
 
															+        self.fc = nn.Linear(self.num_features, fc_channel)
														
 
															+        self.fc_act = HS()
														
 
															+
														
 
															+        # Classifier layer
														
 
															+        if class_num > 0:
														
 
															+            self.classifier = nn.Linear(fc_channel, class_num)
														
 
															+        self._initialize()
														
 
															+
														
 
															+    def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
														
 
															+                  activation, use_se):
														
 
															+        # Check if ith is the last one
														
 
															+        last = i == len(self.stages) - 1
														
 
															+        block = _SFR_DenseBlock(
														
 
															+            num_layers=self.stages[i],
														
 
															+            in_channels=self.num_features,
														
 
															+            growth_rate=self.growth[i],
														
 
															+            group_1x1=group_1x1,
														
 
															+            group_3x3=group_3x3,
														
 
															+            group_trans=group_trans,
														
 
															+            bottleneck=bottleneck,
														
 
															+            activation=activation,
														
 
															+            use_se=use_se, )
														
 
															+        self.features.add_sublayer("denseblock_%d" % (i + 1), block)
														
 
															+        self.num_features += self.stages[i] * self.growth[i]
														
 
															+        if not last:
														
 
															+            trans = _Transition()
														
 
															+            self.features.add_sublayer("transition_%d" % (i + 1), trans)
														
 
															+        else:
														
 
															+            self.features.add_sublayer("norm_last",
														
 
															+                                       nn.BatchNorm2D(self.num_features))
														
 
															+            self.features.add_sublayer("relu_last", nn.ReLU())
														
 
															+            self.features.add_sublayer("pool_last",
														
 
															+                                       nn.AvgPool2D(self.pool_size))
														
 
															+            # if useSE:
														
 
															+            self.features.add_sublayer(
														
 
															+                "se_last",
														
 
															+                SELayer(
														
 
															+                    self.num_features, reduction=self.last_se_reduction))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        features = self.features(x)
														
 
															+        out = features.reshape((features.shape[0], features.shape[1] *
														
 
															+                                features.shape[2] * features.shape[3]))
														
 
															+        out = self.fc(out)
														
 
															+        out = self.fc_act(out)
														
 
															+
														
 
															+        if self.class_num > 0:
														
 
															+            out = self.classifier(out)
														
 
															+
														
 
															+        return out
														
 
															+
														
 
															+    def _initialize(self):
														
 
															+        # Initialize
														
 
															+        for m in self.sublayers():
														
 
															+            if isinstance(m, nn.Conv2D):
														
 
															+                nn.initializer.KaimingNormal()(m.weight)
														
 
															+            elif isinstance(m, nn.BatchNorm2D):
														
 
															+                nn.initializer.Constant(value=1.0)(m.weight)
														
 
															+                nn.initializer.Constant(value=0.0)(m.bias)
														
 
															+
														
 
															+
														
 
															+def CondenseNetV2_A(**kwargs):
														
 
															+    model = CondenseNetV2(
														
 
															+        stages=[1, 1, 4, 6, 8],
														
 
															+        growth=[8, 8, 16, 32, 64],
														
 
															+        HS_start_block=2,
														
 
															+        SE_start_block=3,
														
 
															+        fc_channel=828,
														
 
															+        group_1x1=8,
														
 
															+        group_3x3=8,
														
 
															+        group_trans=8,
														
 
															+        bottleneck=4,
														
 
															+        last_se_reduction=16,
														
 
															+        **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+def CondenseNetV2_B(**kwargs):
														
 
															+    model = CondenseNetV2(
														
 
															+        stages=[2, 4, 6, 8, 6],
														
 
															+        growth=[6, 12, 24, 48, 96],
														
 
															+        HS_start_block=2,
														
 
															+        SE_start_block=3,
														
 
															+        fc_channel=1024,
														
 
															+        group_1x1=6,
														
 
															+        group_3x3=6,
														
 
															+        group_trans=6,
														
 
															+        bottleneck=4,
														
 
															+        last_se_reduction=16,
														
 
															+        **kwargs)
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+def CondenseNetV2_C(**kwargs):
														
 
															+    model = CondenseNetV2(
														
 
															+        stages=[4, 6, 8, 10, 8],
														
 
															+        growth=[8, 16, 32, 64, 128],
														
 
															+        HS_start_block=2,
														
 
															+        SE_start_block=3,
														
 
															+        fc_channel=1024,
														
 
															+        group_1x1=8,
														
 
															+        group_3x3=8,
														
 
															+        group_trans=8,
														
 
															+        bottleneck=4,
														
 
															+        last_se_reduction=16,
														
 
															+        **kwargs)
														
 
															+    return model
														
--- a/paddlers/tasks/change_detector.py
+++ b/paddlers/tasks/change_detector.py
@@ -1067,7 +1067,7 @@ class FCCDN(BaseChangeDetector):
 
															             return {
														
 
															                 'types':
														
 
															                 [seg_losses.CrossEntropyLoss(), cmcd.losses.fccdn_ssl_loss],
														
 
															-                'coef': [1.0, 1.0]
														
 
															+                'coef': [1.0, 0.2]
														
 
															             }
														
 
															         else:
														
 
															             raise ValueError(
														
--- a/paddlers/tasks/classifier.py
+++ b/paddlers/tasks/classifier.py
@@ -34,9 +34,7 @@ from paddlers.utils.checkpoint import cls_pretrain_weights_dict
 
															 from paddlers.transforms import Resize, decode_image
														
 
															 from .base import BaseModel
														
 
															-__all__ = [
														
 
															-    "ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b"
														
 
															-]
														
 
															+__all__ = ["ResNet50_vd", "MobileNetV3", "HRNet", "CondenseNetV2"]
														
 
															 class BaseClassifier(BaseModel):
														
@@ -600,13 +598,13 @@ class ResNet50_vd(BaseClassifier):
 
															             **params)
														
 
															-class MobileNetV3_small_x1_0(BaseClassifier):
														
 
															+class MobileNetV3(BaseClassifier):
														
 
															     def __init__(self,
														
 
															                  num_classes=2,
														
 
															                  use_mixed_loss=False,
														
 
															                  losses=None,
														
 
															                  **params):
														
 
															-        super(MobileNetV3_small_x1_0, self).__init__(
														
 
															+        super(MobileNetV3, self).__init__(
														
 
															             model_name='MobileNetV3_small_x1_0',
														
 
															             num_classes=num_classes,
														
 
															             use_mixed_loss=use_mixed_loss,
														
@@ -614,13 +612,13 @@ class MobileNetV3_small_x1_0(BaseClassifier):
 
															             **params)
														
 
															-class HRNet_W18_C(BaseClassifier):
														
 
															+class HRNet(BaseClassifier):
														
 
															     def __init__(self,
														
 
															                  num_classes=2,
														
 
															                  use_mixed_loss=False,
														
 
															                  losses=None,
														
 
															                  **params):
														
 
															-        super(HRNet_W18_C, self).__init__(
														
 
															+        super(HRNet, self).__init__(
														
 
															             model_name='HRNet_W18_C',
														
 
															             num_classes=num_classes,
														
 
															             use_mixed_loss=use_mixed_loss,
														
@@ -628,15 +626,21 @@ class HRNet_W18_C(BaseClassifier):
 
															             **params)
														
 
															-class CondenseNetV2_b(BaseClassifier):
														
 
															+class CondenseNetV2(BaseClassifier):
														
 
															     def __init__(self,
														
 
															                  num_classes=2,
														
 
															                  use_mixed_loss=False,
														
 
															                  losses=None,
														
 
															+                 in_channels=3,
														
 
															+                 arch='A',
														
 
															                  **params):
														
 
															-        super(CondenseNetV2_b, self).__init__(
														
 
															-            model_name='CondenseNetV2_b',
														
 
															+        if arch not in ('A', 'B', 'C'):
														
 
															+            raise ValueError("{} is not a supported architecture.".format(arch))
														
 
															+        model_name = 'CondenseNetV2_' + arch
														
 
															+        super(CondenseNetV2, self).__init__(
														
 
															+            model_name=model_name,
														
 
															             num_classes=num_classes,
														
 
															             use_mixed_loss=use_mixed_loss,
														
 
															             losses=losses,
														
 
															+            in_channels=in_channels,
														
 
															             **params)
														
--- a/paddlers/tasks/restorer.py
+++ b/paddlers/tasks/restorer.py
@@ -773,7 +773,7 @@ class LESRCNN(BaseRestorer):
 
															                  group=1,
														
 
															                  **params):
														
 
															         params.update({
														
 
															-            'scale': sr_factor,
														
 
															+            'scale': sr_factor if sr_factor is not None else 1,
														
 
															             'multi_scale': multi_scale,
														
 
															             'group': group
														
 
															         })
														
--- a/paddlers/tasks/segmenter.py
+++ b/paddlers/tasks/segmenter.py
@@ -185,14 +185,7 @@ class BaseSegmenter(BaseModel):
 
															                 )
														
 
															             losses = [getattr(seg_losses, loss)() for loss in losses]
														
 
															             loss_type = [seg_losses.MixedLoss(losses=losses, coef=list(coef))]
														
 
															-        if self.model_name == 'FastSCNN':
														
 
															-            loss_type *= 2
														
 
															-            loss_coef = [1.0, 0.4]
														
 
															-        elif self.model_name == 'BiSeNetV2':
														
 
															-            loss_type *= 5
														
 
															-            loss_coef = [1.0] * 5
														
 
															-        else:
														
 
															-            loss_coef = [1.0]
														
 
															+        loss_coef = [1.0]
														
 
															         losses = {'types': loss_type, 'coef': loss_coef}
														
 
															         return losses
														
@@ -761,7 +754,7 @@ class UNet(BaseSegmenter):
 
															         })
														
 
															         super(UNet, self).__init__(
														
 
															             model_name='UNet',
														
 
															-            input_channel=in_channels,
														
 
															+            in_channels=in_channels,
														
 
															             num_classes=num_classes,
														
 
															             use_mixed_loss=use_mixed_loss,
														
 
															             losses=losses,
														
@@ -789,7 +782,7 @@ class DeepLabV3P(BaseSegmenter):
 
															         if params.get('with_net', True):
														
 
															             with DisablePrint():
														
 
															                 backbone = getattr(ppseg.models, backbone)(
														
 
															-                    input_channel=in_channels, output_stride=output_stride)
														
 
															+                    in_channels=in_channels, output_stride=output_stride)
														
 
															         else:
														
 
															             backbone = None
														
 
															         params.update({
														
@@ -809,6 +802,7 @@ class DeepLabV3P(BaseSegmenter):
 
															 class FastSCNN(BaseSegmenter):
														
 
															     def __init__(self,
														
 
															+                 in_channels=3,
														
 
															                  num_classes=2,
														
 
															                  use_mixed_loss=False,
														
 
															                  losses=None,
														
@@ -817,14 +811,22 @@ class FastSCNN(BaseSegmenter):
 
															         params.update({'align_corners': align_corners})
														
 
															         super(FastSCNN, self).__init__(
														
 
															             model_name='FastSCNN',
														
 
															+            in_channels=in_channels,
														
 
															             num_classes=num_classes,
														
 
															             use_mixed_loss=use_mixed_loss,
														
 
															             losses=losses,
														
 
															             **params)
														
 
															+    def default_loss(self):
														
 
															+        losses = super(FastSCNN, self).default_loss()
														
 
															+        losses['types'] *= 2
														
 
															+        losses['coef'] = [1.0, 0.4]
														
 
															+        return losses
														
 
															+
														
 
															 class HRNet(BaseSegmenter):
														
 
															     def __init__(self,
														
 
															+                 in_channels=3,
														
 
															                  num_classes=2,
														
 
															                  width=48,
														
 
															                  use_mixed_loss=False,
														
@@ -839,7 +841,7 @@ class HRNet(BaseSegmenter):
 
															         if params.get('with_net', True):
														
 
															             with DisablePrint():
														
 
															                 backbone = getattr(ppseg.models, self.backbone_name)(
														
 
															-                    align_corners=align_corners)
														
 
															+                    in_channels=in_channels, align_corners=align_corners)
														
 
															         else:
														
 
															             backbone = None
														
@@ -855,6 +857,7 @@ class HRNet(BaseSegmenter):
 
															 class BiSeNetV2(BaseSegmenter):
														
 
															     def __init__(self,
														
 
															+                 in_channels=3,
														
 
															                  num_classes=2,
														
 
															                  use_mixed_loss=False,
														
 
															                  losses=None,
														
@@ -863,11 +866,18 @@ class BiSeNetV2(BaseSegmenter):
 
															         params.update({'align_corners': align_corners})
														
 
															         super(BiSeNetV2, self).__init__(
														
 
															             model_name='BiSeNetV2',
														
 
															+            in_channels=in_channels,
														
 
															             num_classes=num_classes,
														
 
															             use_mixed_loss=use_mixed_loss,
														
 
															             losses=losses,
														
 
															             **params)
														
 
															+    def default_loss(self):
														
 
															+        losses = super(BiSeNetV2, self).default_loss()
														
 
															+        losses['types'] *= 5
														
 
															+        losses['coef'] = [1.0] * 5
														
 
															+        return losses
														
 
															+
														
 
															 class FarSeg(BaseSegmenter):
														
 
															     def __init__(self,
														
--- a/paddlers/utils/checkpoint.py
+++ b/paddlers/utils/checkpoint.py
@@ -493,11 +493,12 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
 
															             num_params_loaded = 0
														
 
															             for k in model_state_dict:
														
 
															                 if k not in param_state_dict:
														
 
															-                    logging.warning("{} is not in pretrained model".format(k))
														
 
															+                    logging.warning("{} is not in the pretrained model.".format(
														
 
															+                        k))
														
 
															                 elif list(param_state_dict[k].shape) != list(model_state_dict[k]
														
 
															                                                              .shape):
														
 
															                     logging.warning(
														
 
															-                        "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
														
 
															+                        "[SKIP] Shape of parameters {} do not match. (pretrained: {} vs actual: {})"
														
 
															                         .format(k, param_state_dict[k].shape, model_state_dict[
														
 
															                             k].shape))
														
 
															                 else:
														
@@ -507,11 +508,11 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
 
															             logging.info("There are {}/{} variables loaded into {}.".format(
														
 
															                 num_params_loaded, len(model_state_dict), model_name))
														
 
															         else:
														
 
															-            raise ValueError('The pretrained model directory is not Found: {}'.
														
 
															+            raise ValueError('The pretrained model directory is not found: {}'.
														
 
															                              format(pretrain_weights))
														
 
															     else:
														
 
															         logging.info(
														
 
															-            'No pretrained model to load, {} will be trained from scratch.'.
														
 
															+            'No pretrained model to load. {} will be trained from scratch.'.
														
 
															             format(model_name))
														
--- a/test_tipc/README.md
+++ b/test_tipc/README.md
@@ -32,6 +32,7 @@
 
															 | 变化检测 | FC-Siam-conc | 支持 | - | - | - |
														
 
															 | 变化检测 | FC-Siam-diff | 支持 | - | - | - |
														
 
															 | 变化检测 | ChangeFormer | 支持 | - | - | - |
														
 
															+| 场景分类 | CondenseNet V2 | 支持 | - | - | - |
														
 
															 | 场景分类 | HRNet | 支持 | - | - | - |
														
 
															 | 场景分类 | MobileNetV3 | 支持 | - | - | - |
														
 
															 | 场景分类 | ResNet50-vd | 支持 | - | - | - |
														
@@ -43,8 +44,11 @@
 
															 | 目标检测 | PP-YOLO Tiny | 支持 | - | - | - |
														
 
															 | 目标检测 | PP-YOLOv2 | 支持 | - | - | - |
														
 
															 | 目标检测 | YOLOv3 | 支持 | - | - | - |
														
 
															+| 图像分割 | BiSeNet V2 | 支持 | - | - | - |
														
 
															 | 图像分割 | DeepLab V3+ | 支持 | - | - | - |
														
 
															 | 图像分割 | FarSeg | 支持 | - | - | - |
														
 
															+| 图像分割 | Fast-SCNN | 支持 | - | - | - |
														
 
															+| 图像分割 | HRNet | 支持 | - | - | - |
														
 
															 | 图像分割 | UNet | 支持 | - | - | - |
														
 
															 ## 3 测试工具简介
														
--- a/test_tipc/config_utils.py
+++ b/test_tipc/config_utils.py
@@ -119,6 +119,7 @@ def parse_args(*args, **kwargs):
 
															     # Global settings
														
 
															     parser.add_argument('cmd', choices=['train', 'eval'])
														
 
															     parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
														
 
															+    parser.add_argument('--seed', type=int, default=None)
														
 
															     # Data
														
 
															     parser.add_argument('--datasets', type=dict, default={})
														
--- a/test_tipc/configs/cd/_base_/airchange.yaml
+++ b/test_tipc/configs/cd/_base_/airchange.yaml
@@ -1,5 +1,7 @@
 
															 # Basic configurations of AirChange dataset
														
 
															+seed: 1024
														
 
															+
														
 
															 datasets:
														
 
															     train: !Node
														
 
															         type: CDDataset
														
--- a/test_tipc/configs/cd/_base_/levircd.yaml
+++ b/test_tipc/configs/cd/_base_/levircd.yaml
@@ -1,5 +1,7 @@
 
															 # Basic configurations of LEVIR-CD dataset
														
 
															+seed: 1024
														
 
															+
														
 
															 datasets:
														
 
															     train: !Node
														
 
															         type: CDDataset
														
--- a/test_tipc/configs/cd/bit/bit.yaml
+++ b/test_tipc/configs/cd/bit/bit.yaml
@@ -1,8 +0,0 @@
 
															-# Basic configurations of BIT
														
 
															-
														
 
															-_base_: ../_base_/airchange.yaml
														
 
															-
														
 
															-save_dir: ./test_tipc/output/cd/bit/
														
 
															-
														
 
															-model: !Node
														
 
															-    type: BIT
	`@@ -0,0 +1 @@`
			`+ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef`