소스 검색

Merge pull request #40 from Bobholamovic/update_ppseg

[Feat] Update ppseg and Add CondenseNet V2
cc 3 년 전
부모
커밋
ebceda8419
100개의 변경된 파일7575개의 추가작업 그리고 1807개의 파일을 삭제
  1. 1 1
      docs/apis/train.md
  2. 14 11
      docs/intro/model_zoo.md
  3. 1 0
      examples/README.md
  4. 1 0
      examples/rs_research/config_utils.py
  5. 7 0
      examples/rs_research/run_task.py
  6. 23 15
      paddlers/deploy/predictor.py
  7. 1 0
      paddlers/models/hash.txt
  8. 1 1
      paddlers/models/ppseg/__init__.py
  9. 15 92
      paddlers/models/ppseg/core/infer.py
  10. 14 16
      paddlers/models/ppseg/core/predict.py
  11. 40 31
      paddlers/models/ppseg/core/train.py
  12. 71 31
      paddlers/models/ppseg/core/val.py
  13. 176 29
      paddlers/models/ppseg/cvlibs/config.py
  14. 26 0
      paddlers/models/ppseg/cvlibs/param_init.py
  15. 1 0
      paddlers/models/ppseg/datasets/__init__.py
  16. 16 8
      paddlers/models/ppseg/datasets/ade.py
  17. 31 23
      paddlers/models/ppseg/datasets/dataset.py
  18. 135 0
      paddlers/models/ppseg/datasets/pssl.py
  19. 9 0
      paddlers/models/ppseg/models/__init__.py
  20. 3 3
      paddlers/models/ppseg/models/attention_unet.py
  21. 4 0
      paddlers/models/ppseg/models/backbones/__init__.py
  22. 318 0
      paddlers/models/ppseg/models/backbones/ghostnet.py
  23. 3 1
      paddlers/models/ppseg/models/backbones/hrnet.py
  24. 974 0
      paddlers/models/ppseg/models/backbones/lite_hrnet.py
  25. 3 3
      paddlers/models/ppseg/models/backbones/mix_transformer.py
  26. 215 114
      paddlers/models/ppseg/models/backbones/mobilenetv2.py
  27. 315 181
      paddlers/models/ppseg/models/backbones/mobilenetv3.py
  28. 4 3
      paddlers/models/ppseg/models/backbones/resnet_vd.py
  29. 315 0
      paddlers/models/ppseg/models/backbones/shufflenetv2.py
  30. 117 63
      paddlers/models/ppseg/models/backbones/stdcnet.py
  31. 4 4
      paddlers/models/ppseg/models/backbones/swin_transformer.py
  32. 716 0
      paddlers/models/ppseg/models/backbones/top_transformer.py
  33. 2 2
      paddlers/models/ppseg/models/backbones/transformer_utils.py
  34. 3 3
      paddlers/models/ppseg/models/backbones/vision_transformer.py
  35. 8 3
      paddlers/models/ppseg/models/backbones/xception_deeplab.py
  36. 10 8
      paddlers/models/ppseg/models/bisenet.py
  37. 174 0
      paddlers/models/ppseg/models/ccnet.py
  38. 403 0
      paddlers/models/ppseg/models/ddrnet.py
  39. 3 1
      paddlers/models/ppseg/models/emanet.py
  40. 3 1
      paddlers/models/ppseg/models/enet.py
  41. 13 3
      paddlers/models/ppseg/models/fast_scnn.py
  42. 1 1
      paddlers/models/ppseg/models/ginet.py
  43. 198 0
      paddlers/models/ppseg/models/glore.py
  44. 3 1
      paddlers/models/ppseg/models/hardnet.py
  45. 2 1
      paddlers/models/ppseg/models/layers/__init__.py
  46. 126 0
      paddlers/models/ppseg/models/layers/attention.py
  47. 57 0
      paddlers/models/ppseg/models/layers/layer_libs.py
  48. 285 0
      paddlers/models/ppseg/models/layers/tensor_fusion.py
  49. 133 0
      paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
  50. 1 1
      paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
  51. 1 3
      paddlers/models/ppseg/models/losses/cross_entropy_loss.py
  52. 1 1
      paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
  53. 1 1
      paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
  54. 43 22
      paddlers/models/ppseg/models/losses/dice_loss.py
  55. 97 25
      paddlers/models/ppseg/models/losses/focal_loss.py
  56. 22 0
      paddlers/models/ppseg/models/losses/l1_loss.py
  57. 12 4
      paddlers/models/ppseg/models/losses/lovasz_loss.py
  58. 1 1
      paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
  59. 4 1
      paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
  60. 7 5
      paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
  61. 162 0
      paddlers/models/ppseg/models/lraspp.py
  62. 1 1
      paddlers/models/ppseg/models/mla_transformer.py
  63. 289 0
      paddlers/models/ppseg/models/mobileseg.py
  64. 1 1
      paddlers/models/ppseg/models/pointrend.py
  65. 3 13
      paddlers/models/ppseg/models/portraitnet.py
  66. 273 0
      paddlers/models/ppseg/models/pp_liteseg.py
  67. 6 2
      paddlers/models/ppseg/models/pphumanseg_lite.py
  68. 0 48
      paddlers/models/ppseg/models/segformer.py
  69. 2 2
      paddlers/models/ppseg/models/segnet.py
  70. 449 0
      paddlers/models/ppseg/models/sinet.py
  71. 1 1
      paddlers/models/ppseg/models/stdcseg.py
  72. 155 0
      paddlers/models/ppseg/models/topformer.py
  73. 5 5
      paddlers/models/ppseg/models/u2net.py
  74. 5 5
      paddlers/models/ppseg/models/unet.py
  75. 2 2
      paddlers/models/ppseg/models/unet_plusplus.py
  76. 173 0
      paddlers/models/ppseg/models/upernet.py
  77. 8 1
      paddlers/models/ppseg/transforms/functional.py
  78. 261 374
      paddlers/models/ppseg/transforms/transforms.py
  79. 0 1
      paddlers/models/ppseg/utils/__init__.py
  80. 0 59
      paddlers/models/ppseg/utils/config_check.py
  81. 1 1
      paddlers/models/ppseg/utils/env/__init__.py
  82. 1 1
      paddlers/models/ppseg/utils/env/seg_env.py
  83. 9 3
      paddlers/models/ppseg/utils/env/sys_env.py
  84. 36 34
      paddlers/models/ppseg/utils/metrics.py
  85. 1 1
      paddlers/models/ppseg/utils/train_profiler.py
  86. 2 0
      paddlers/models/ppseg/utils/utils.py
  87. 39 1
      paddlers/models/ppseg/utils/visualize.py
  88. 8 55
      paddlers/rs_models/cd/losses/fccdn_loss.py
  89. 1 1
      paddlers/rs_models/clas/__init__.py
  90. 442 442
      paddlers/rs_models/clas/condensenetv2.py
  91. 1 1
      paddlers/tasks/change_detector.py
  92. 14 10
      paddlers/tasks/classifier.py
  93. 1 1
      paddlers/tasks/restorer.py
  94. 21 11
      paddlers/tasks/segmenter.py
  95. 5 4
      paddlers/utils/checkpoint.py
  96. 4 0
      test_tipc/README.md
  97. 1 0
      test_tipc/config_utils.py
  98. 2 0
      test_tipc/configs/cd/_base_/airchange.yaml
  99. 2 0
      test_tipc/configs/cd/_base_/levircd.yaml
  100. 0 8
      test_tipc/configs/cd/bit/bit.yaml

+ 1 - 1
docs/apis/train.md

@@ -34,7 +34,7 @@
 
 
 ### 初始化`BaseSegmenter`子类对象
 ### 初始化`BaseSegmenter`子类对象
 
 
-- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数,分别表示输入通道数、输出类别数以及是否使用预置的混合损失。部分模型如`FarSeg`暂不支持对`in_channels`参数的设置。
+- 一般支持设置`in_channels`、`num_classes`以及`use_mixed_loss`参数,分别表示输入通道数、输出类别数以及是否使用预置的混合损失。
 - `use_mixed_loss`参将在未来被弃用,因此不建议使用。
 - `use_mixed_loss`参将在未来被弃用,因此不建议使用。
 - 可通过`losses`参数指定模型训练时使用的损失函数。`losses`需为一个字典,其中`'types'`键和`'coef'`键对应的值为两个等长的列表,分别表示损失函数对象(一个可调用对象)和损失函数的权重。例如:`losses={'types': [LossType1(), LossType2()], 'coef': [1.0, 0.5]}`在训练过程中将等价于计算如下损失函数:`1.0*LossType1()(logits, labels)+0.5*LossType2()(logits, labels)`,其中`logits`和`labels`分别是模型输出和真值标签。
 - 可通过`losses`参数指定模型训练时使用的损失函数。`losses`需为一个字典,其中`'types'`键和`'coef'`键对应的值为两个等长的列表,分别表示损失函数对象(一个可调用对象)和损失函数的权重。例如:`losses={'types': [LossType1(), LossType2()], 'coef': [1.0, 0.5]}`在训练过程中将等价于计算如下损失函数:`1.0*LossType1()(logits, labels)+0.5*LossType2()(logits, labels)`,其中`logits`和`labels`分别是模型输出和真值标签。
 - 不同的子类支持与模型相关的输入参数,详情请参考[模型定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/rs_models/seg)和[训练器定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmentor.py)。
 - 不同的子类支持与模型相关的输入参数,详情请参考[模型定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/rs_models/seg)和[训练器定义](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/tasks/segmentor.py)。

+ 14 - 11
docs/intro/model_zoo.md

@@ -20,18 +20,21 @@ PaddleRS目前已支持的全部模型如下(标注\*的为遥感专用模型
 | 变化检测 | \*FCCDN | 是 |
 | 变化检测 | \*FCCDN | 是 |
 | 变化检测 | \*SNUNet | 是 |
 | 变化检测 | \*SNUNet | 是 |
 | 变化检测 | \*STANet | 是 |
 | 变化检测 | \*STANet | 是 |
-| 场景分类 | CondenseNetV2 | 是 |
-| 场景分类 | HRNet |  |
-| 场景分类 | MobileNetV3 |  |
-| 场景分类 | ResNet50-vd |  |
+| 场景分类 | CondenseNet V2 | 是 |
+| 场景分类 | HRNet |  |
+| 场景分类 | MobileNetV3 |  |
+| 场景分类 | ResNet50-vd |  |
 | 图像复原 | DRN | 否 |
 | 图像复原 | DRN | 否 |
-| 图像复原 | ESRGAN |  |
+| 图像复原 | ESRGAN |  |
 | 图像复原 | LESRCNN | 否 |
 | 图像复原 | LESRCNN | 否 |
-| 目标检测 | Faster R-CNN | 是 |
-| 目标检测 | PP-YOLO | 是 |
-| 目标检测 | PP-YOLO Tiny | 是 |
-| 目标检测 | PP-YOLOv2 | 是 |
-| 目标检测 | YOLOv3 | 是 |
+| 目标检测 | Faster R-CNN | 否 |
+| 目标检测 | PP-YOLO | 否 |
+| 目标检测 | PP-YOLO Tiny | 否 |
+| 目标检测 | PP-YOLOv2 | 否 |
+| 目标检测 | YOLOv3 | 否 |
+| 图像分割 | BiSeNet V2 | 是 |
 | 图像分割 | DeepLab V3+ | 是 |
 | 图像分割 | DeepLab V3+ | 是 |
-| 图像分割 | \*FarSeg | 否 |
+| 图像分割 | \*FarSeg | 是 |
+| 图像分割 | Fast-SCNN | 是 |
+| 图像分割 | HRNet | 是 |
 | 图像分割 | UNet | 是 |
 | 图像分割 | UNet | 是 |

+ 1 - 0
examples/README.md

@@ -53,3 +53,4 @@ PaddleRS提供从科学研究到产业应用的丰富示例,希望帮助遥感
 |[【官方】第十一届 “中国软件杯”百度遥感赛项:目标检测功能](https://aistudio.baidu.com/aistudio/projectdetail/3792609)|古代飞|竞赛打榜|目标检测,比赛基线|
 |[【官方】第十一届 “中国软件杯”百度遥感赛项:目标检测功能](https://aistudio.baidu.com/aistudio/projectdetail/3792609)|古代飞|竞赛打榜|目标检测,比赛基线|
 |[【十一届软件杯】遥感解译赛道:变化检测任务——预赛第四名方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4116895)|lzzzzzm|竞赛打榜|变化检测,高分方案|
 |[【十一届软件杯】遥感解译赛道:变化检测任务——预赛第四名方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4116895)|lzzzzzm|竞赛打榜|变化检测,高分方案|
 |[【方案分享】第十一届 “中国软件杯”大学生软件设计大赛遥感解译赛道 比赛方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4146154)|trainer|竞赛打榜|变化检测,高分方案|
 |[【方案分享】第十一届 “中国软件杯”大学生软件设计大赛遥感解译赛道 比赛方案分享](https://aistudio.baidu.com/aistudio/projectdetail/4146154)|trainer|竞赛打榜|变化检测,高分方案|
+|[遥感变化检测助力信贷场景下工程进度管控](https://aistudio.baidu.com/aistudio/projectdetail/4543160)|古代飞|产业范例|变化检测,金融风控|

+ 1 - 0
examples/rs_research/config_utils.py

@@ -133,6 +133,7 @@ def parse_args(*args, **kwargs):
     # Global settings
     # Global settings
     parser.add_argument('cmd', choices=['train', 'eval'])
     parser.add_argument('cmd', choices=['train', 'eval'])
     parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
     parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
+    parser.add_argument('--seed', type=int, default=None)
 
 
     # Data
     # Data
     parser.add_argument('--datasets', type=dict, default={})
     parser.add_argument('--datasets', type=dict, default={})

+ 7 - 0
examples/rs_research/run_task.py

@@ -15,7 +15,9 @@
 # limitations under the License.
 # limitations under the License.
 
 
 import os
 import os
+import random
 
 
+import numpy as np
 # Import cv2 and sklearn before paddlers to solve the
 # Import cv2 and sklearn before paddlers to solve the
 # "ImportError: dlopen: cannot load any more object with static TLS" issue.
 # "ImportError: dlopen: cannot load any more object with static TLS" issue.
 import cv2
 import cv2
@@ -62,6 +64,11 @@ if __name__ == '__main__':
     cfg = parse_args()
     cfg = parse_args()
     print(format_cfg(cfg))
     print(format_cfg(cfg))
 
 
+    if cfg['seed'] is not None:
+        random.seed(cfg['seed'])
+        np.random.seed(cfg['seed'])
+        paddle.seed(cfg['seed'])
+
     # Automatically download data
     # Automatically download data
     if cfg['download_on']:
     if cfg['download_on']:
         paddlers.utils.download_and_decompress(
         paddlers.utils.download_and_decompress(

+ 23 - 15
paddlers/deploy/predictor.py

@@ -103,11 +103,11 @@ class Predictor(object):
             config.enable_use_gpu(200, gpu_id)
             config.enable_use_gpu(200, gpu_id)
             config.switch_ir_optim(True)
             config.switch_ir_optim(True)
             if use_trt:
             if use_trt:
-                if self._model.model_type == 'segmenter':
+                if self.model_type == 'segmenter':
                     logging.warning(
                     logging.warning(
                         "Semantic segmentation models do not support TensorRT acceleration, "
                         "Semantic segmentation models do not support TensorRT acceleration, "
                         "TensorRT is forcibly disabled.")
                         "TensorRT is forcibly disabled.")
-                elif self._model.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
+                elif self.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
                     logging.warning(
                     logging.warning(
                         "RCNN models do not support TensorRT acceleration, "
                         "RCNN models do not support TensorRT acceleration, "
                         "TensorRT is forcibly disabled.")
                         "TensorRT is forcibly disabled.")
@@ -150,30 +150,29 @@ class Predictor(object):
     def preprocess(self, images, transforms):
     def preprocess(self, images, transforms):
         preprocessed_samples = self._model.preprocess(
         preprocessed_samples = self._model.preprocess(
             images, transforms, to_tensor=False)
             images, transforms, to_tensor=False)
-        if self._model.model_type == 'classifier':
+        if self.model_type == 'classifier':
             preprocessed_samples = {'image': preprocessed_samples[0]}
             preprocessed_samples = {'image': preprocessed_samples[0]}
-        elif self._model.model_type == 'segmenter':
+        elif self.model_type == 'segmenter':
             preprocessed_samples = {
             preprocessed_samples = {
                 'image': preprocessed_samples[0],
                 'image': preprocessed_samples[0],
                 'ori_shape': preprocessed_samples[1]
                 'ori_shape': preprocessed_samples[1]
             }
             }
-        elif self._model.model_type == 'detector':
+        elif self.model_type == 'detector':
             pass
             pass
-        elif self._model.model_type == 'change_detector':
+        elif self.model_type == 'change_detector':
             preprocessed_samples = {
             preprocessed_samples = {
                 'image': preprocessed_samples[0],
                 'image': preprocessed_samples[0],
                 'image2': preprocessed_samples[1],
                 'image2': preprocessed_samples[1],
                 'ori_shape': preprocessed_samples[2]
                 'ori_shape': preprocessed_samples[2]
             }
             }
-        elif self._model.model_type == 'restorer':
+        elif self.model_type == 'restorer':
             preprocessed_samples = {
             preprocessed_samples = {
                 'image': preprocessed_samples[0],
                 'image': preprocessed_samples[0],
                 'tar_shape': preprocessed_samples[1]
                 'tar_shape': preprocessed_samples[1]
             }
             }
         else:
         else:
             logging.error(
             logging.error(
-                "Invalid model type {}".format(self._model.model_type),
-                exit=True)
+                "Invalid model type {}".format(self.model_type), exit=True)
         return preprocessed_samples
         return preprocessed_samples
 
 
     def postprocess(self,
     def postprocess(self,
@@ -182,7 +181,7 @@ class Predictor(object):
                     ori_shape=None,
                     ori_shape=None,
                     tar_shape=None,
                     tar_shape=None,
                     transforms=None):
                     transforms=None):
-        if self._model.model_type == 'classifier':
+        if self.model_type == 'classifier':
             true_topk = min(self._model.num_classes, topk)
             true_topk = min(self._model.num_classes, topk)
             if self._model.postprocess is None:
             if self._model.postprocess is None:
                 self._model.build_postprocess_from_labels(topk)
                 self._model.build_postprocess_from_labels(topk)
@@ -198,7 +197,7 @@ class Predictor(object):
                 'scores_map': s,
                 'scores_map': s,
                 'label_names_map': n,
                 'label_names_map': n,
             } for l, s, n in zip(class_ids, scores, label_names)]
             } for l, s, n in zip(class_ids, scores, label_names)]
-        elif self._model.model_type in ('segmenter', 'change_detector'):
+        elif self.model_type in ('segmenter', 'change_detector'):
             label_map, score_map = self._model.postprocess(
             label_map, score_map = self._model.postprocess(
                 net_outputs,
                 net_outputs,
                 batch_origin_shape=ori_shape,
                 batch_origin_shape=ori_shape,
@@ -207,13 +206,13 @@ class Predictor(object):
                 'label_map': l,
                 'label_map': l,
                 'score_map': s
                 'score_map': s
             } for l, s in zip(label_map, score_map)]
             } for l, s in zip(label_map, score_map)]
-        elif self._model.model_type == 'detector':
+        elif self.model_type == 'detector':
             net_outputs = {
             net_outputs = {
                 k: v
                 k: v
                 for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
                 for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
             }
             }
             preds = self._model.postprocess(net_outputs)
             preds = self._model.postprocess(net_outputs)
-        elif self._model.model_type == 'restorer':
+        elif self.model_type == 'restorer':
             res_maps = self._model.postprocess(
             res_maps = self._model.postprocess(
                 net_outputs[0],
                 net_outputs[0],
                 batch_tar_shape=tar_shape,
                 batch_tar_shape=tar_shape,
@@ -221,8 +220,7 @@ class Predictor(object):
             preds = [{'res_map': res_map} for res_map in res_maps]
             preds = [{'res_map': res_map} for res_map in res_maps]
         else:
         else:
             logging.error(
             logging.error(
-                "Invalid model type {}.".format(self._model.model_type),
-                exit=True)
+                "Invalid model type {}.".format(self.model_type), exit=True)
 
 
         return preds
         return preds
 
 
@@ -360,6 +358,12 @@ class Predictor(object):
             batch_size (int, optional): Batch size used in inference. Defaults to 1.
             batch_size (int, optional): Batch size used in inference. Defaults to 1.
             quiet (bool, optional): If True, disable the progress bar. Defaults to False.
             quiet (bool, optional): If True, disable the progress bar. Defaults to False.
         """
         """
+
+        if self.model_type not in ('segmenter', 'change_detector'):
+            raise RuntimeError(
+                "Model type is {}, which does not support inference with sliding windows.".
+                format(self.model_type))
+
         slider_predict(
         slider_predict(
             partial(
             partial(
                 self.predict, quiet=True),
                 self.predict, quiet=True),
@@ -375,3 +379,7 @@ class Predictor(object):
 
 
     def batch_predict(self, image_list, **params):
     def batch_predict(self, image_list, **params):
         return self.predict(img_file=image_list, **params)
         return self.predict(img_file=image_list, **params)
+
+    @property
+    def model_type(self):
+        return self._model.model_type

+ 1 - 0
paddlers/models/hash.txt

@@ -0,0 +1 @@
+ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef

+ 1 - 1
paddlers/models/ppseg/__init__.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 15 - 92
paddlers/models/ppseg/core/infer.py

@@ -21,88 +21,16 @@ import paddle
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
 
 
-def get_reverse_list(ori_shape, transforms):
-    """
-    get reverse list of transform.
-
-    Args:
-        ori_shape (list): Origin shape of image.
-        transforms (list): List of transform.
-
-    Returns:
-        list: List of tuple, there are two format:
-            ('resize', (h, w)) The image shape before resize,
-            ('padding', (h, w)) The image shape before padding.
-    """
-    reverse_list = []
-    h, w = ori_shape[0], ori_shape[1]
-    for op in transforms:
-        if op.__class__.__name__ in ['Resize']:
-            reverse_list.append(('resize', (h, w)))
-            h, w = op.target_size[0], op.target_size[1]
-        if op.__class__.__name__ in ['ResizeByLong']:
-            reverse_list.append(('resize', (h, w)))
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            short_edge = int(round(short_edge * op.long_size / long_edge))
-            long_edge = op.long_size
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-        if op.__class__.__name__ in ['ResizeByShort']:
-            reverse_list.append(('resize', (h, w)))
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            long_edge = int(round(long_edge * op.short_size / short_edge))
-            short_edge = op.short_size
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-        if op.__class__.__name__ in ['Pad']:
-            reverse_list.append(('padding', (h, w)))
-            w, h = op.target_size[0], op.target_size[1]
-        if op.__class__.__name__ in ['PadByAspectRatio']:
-            reverse_list.append(('padding', (h, w)))
-            ratio = w / h
-            if ratio == op.aspect_ratio:
-                pass
-            elif ratio > op.aspect_ratio:
-                h = int(w / op.aspect_ratio)
-            else:
-                w = int(h * op.aspect_ratio)
-        if op.__class__.__name__ in ['LimitLong']:
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            if ((op.max_long is not None) and (long_edge > op.max_long)):
-                reverse_list.append(('resize', (h, w)))
-                long_edge = op.max_long
-                short_edge = int(round(short_edge * op.max_long / long_edge))
-            elif ((op.min_long is not None) and (long_edge < op.min_long)):
-                reverse_list.append(('resize', (h, w)))
-                long_edge = op.min_long
-                short_edge = int(round(short_edge * op.min_long / long_edge))
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-    return reverse_list
-
-
-def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
+def reverse_transform(pred, trans_info, mode='nearest'):
     """recover pred to origin shape"""
     """recover pred to origin shape"""
-    reverse_list = get_reverse_list(ori_shape, transforms)
     intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
     intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
     dtype = pred.dtype
     dtype = pred.dtype
-    for item in reverse_list[::-1]:
-        if item[0] == 'resize':
+    for item in trans_info[::-1]:
+        if isinstance(item[0], list):
+            trans_mode = item[0][0]
+        else:
+            trans_mode = item[0]
+        if trans_mode == 'resize':
             h, w = item[1][0], item[1][1]
             h, w = item[1][0], item[1][1]
             if paddle.get_device() == 'cpu' and dtype in intTypeList:
             if paddle.get_device() == 'cpu' and dtype in intTypeList:
                 pred = paddle.cast(pred, 'float32')
                 pred = paddle.cast(pred, 'float32')
@@ -110,7 +38,7 @@ def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
                 pred = paddle.cast(pred, dtype)
                 pred = paddle.cast(pred, dtype)
             else:
             else:
                 pred = F.interpolate(pred, (h, w), mode=mode)
                 pred = F.interpolate(pred, (h, w), mode=mode)
-        elif item[0] == 'padding':
+        elif trans_mode == 'padding':
             h, w = item[1][0], item[1][1]
             h, w = item[1][0], item[1][1]
             pred = pred[:, :, 0:h, 0:w]
             pred = pred[:, :, 0:h, 0:w]
         else:
         else:
@@ -205,8 +133,7 @@ def slide_inference(model, im, crop_size, stride):
 
 
 def inference(model,
 def inference(model,
               im,
               im,
-              ori_shape=None,
-              transforms=None,
+              trans_info=None,
               is_slide=False,
               is_slide=False,
               stride=None,
               stride=None,
               crop_size=None):
               crop_size=None):
@@ -216,8 +143,7 @@ def inference(model,
     Args:
     Args:
         model (paddle.nn.Layer): model to get logits of image.
         model (paddle.nn.Layer): model to get logits of image.
         im (Tensor): the input image.
         im (Tensor): the input image.
-        ori_shape (list): Origin shape of image.
-        transforms (list): Transforms for image.
+        trans_info (list): Image shape informating changed process. Default: None.
         is_slide (bool): Whether to infer by sliding window. Default: False.
         is_slide (bool): Whether to infer by sliding window. Default: False.
         crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
         crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
         stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
         stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
@@ -239,8 +165,8 @@ def inference(model,
         logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
         logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
     if hasattr(model, 'data_format') and model.data_format == 'NHWC':
     if hasattr(model, 'data_format') and model.data_format == 'NHWC':
         logit = logit.transpose((0, 3, 1, 2))
         logit = logit.transpose((0, 3, 1, 2))
-    if ori_shape is not None:
-        logit = reverse_transform(logit, ori_shape, transforms, mode='bilinear')
+    if trans_info is not None:
+        logit = reverse_transform(logit, trans_info, mode='bilinear')
         pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
         pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
         return pred, logit
         return pred, logit
     else:
     else:
@@ -249,8 +175,7 @@ def inference(model,
 
 
 def aug_inference(model,
 def aug_inference(model,
                   im,
                   im,
-                  ori_shape,
-                  transforms,
+                  trans_info,
                   scales=1.0,
                   scales=1.0,
                   flip_horizontal=False,
                   flip_horizontal=False,
                   flip_vertical=False,
                   flip_vertical=False,
@@ -263,8 +188,7 @@ def aug_inference(model,
     Args:
     Args:
         model (paddle.nn.Layer): model to get logits of image.
         model (paddle.nn.Layer): model to get logits of image.
         im (Tensor): the input image.
         im (Tensor): the input image.
-        ori_shape (list): Origin shape of image.
-        transforms (list): Transforms for image.
+        trans_info (list): Transforms for image.
         scales (float|tuple|list):  Scales for resize. Default: 1.
         scales (float|tuple|list):  Scales for resize. Default: 1.
         flip_horizontal (bool): Whether to flip horizontally. Default: False.
         flip_horizontal (bool): Whether to flip horizontally. Default: False.
         flip_vertical (bool): Whether to flip vertically. Default: False.
         flip_vertical (bool): Whether to flip vertically. Default: False.
@@ -302,8 +226,7 @@ def aug_inference(model,
             logit = F.softmax(logit, axis=1)
             logit = F.softmax(logit, axis=1)
             final_logit = final_logit + logit
             final_logit = final_logit + logit
 
 
-    final_logit = reverse_transform(
-        final_logit, ori_shape, transforms, mode='bilinear')
+    final_logit = reverse_transform(final_logit, trans_info, mode='bilinear')
     pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
     pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
 
 
     return pred, final_logit
     return pred, final_logit

+ 14 - 16
paddlers/models/ppseg/core/predict.py

@@ -36,6 +36,15 @@ def partition_list(arr, m):
     return [arr[i:i + n] for i in range(0, len(arr), n)]
     return [arr[i:i + n] for i in range(0, len(arr), n)]
 
 
 
 
+def preprocess(im_path, transforms):
+    data = {}
+    data['img'] = im_path
+    data = transforms(data)
+    data['img'] = data['img'][np.newaxis, ...]
+    data['img'] = paddle.to_tensor(data['img'])
+    return data
+
+
 def predict(model,
 def predict(model,
             model_path,
             model_path,
             transforms,
             transforms,
@@ -89,18 +98,13 @@ def predict(model,
     color_map = visualize.get_color_map_list(256, custom_color=custom_color)
     color_map = visualize.get_color_map_list(256, custom_color=custom_color)
     with paddle.no_grad():
     with paddle.no_grad():
         for i, im_path in enumerate(img_lists[local_rank]):
         for i, im_path in enumerate(img_lists[local_rank]):
-            im = cv2.imread(im_path)
-            ori_shape = im.shape[:2]
-            im, _ = transforms(im)
-            im = im[np.newaxis, ...]
-            im = paddle.to_tensor(im)
+            data = preprocess(im_path, transforms)
 
 
             if aug_pred:
             if aug_pred:
                 pred, _ = infer.aug_inference(
                 pred, _ = infer.aug_inference(
                     model,
                     model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=transforms.transforms,
+                    data['img'],
+                    trans_info=data['trans_info'],
                     scales=scales,
                     scales=scales,
                     flip_horizontal=flip_horizontal,
                     flip_horizontal=flip_horizontal,
                     flip_vertical=flip_vertical,
                     flip_vertical=flip_vertical,
@@ -110,9 +114,8 @@ def predict(model,
             else:
             else:
                 pred, _ = infer.inference(
                 pred, _ = infer.inference(
                     model,
                     model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=transforms.transforms,
+                    data['img'],
+                    trans_info=data['trans_info'],
                     is_slide=is_slide,
                     is_slide=is_slide,
                     stride=stride,
                     stride=stride,
                     crop_size=crop_size)
                     crop_size=crop_size)
@@ -141,9 +144,4 @@ def predict(model,
             mkdir(pred_saved_path)
             mkdir(pred_saved_path)
             pred_mask.save(pred_saved_path)
             pred_mask.save(pred_saved_path)
 
 
-            # pred_im = utils.visualize(im_path, pred, weight=0.0)
-            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
-            # mkdir(pred_saved_path)
-            # cv2.imwrite(pred_saved_path, pred_im)
-
             progbar_pred.update(i + 1)
             progbar_pred.update(i + 1)

+ 40 - 31
paddlers/models/ppseg/core/train.py

@@ -35,17 +35,15 @@ def check_logits_losses(logits_list, losses):
             .format(len_logits, len_losses))
             .format(len_logits, len_losses))
 
 
 
 
-def loss_computation(logits_list, labels, losses, edges=None):
+def loss_computation(logits_list, labels, edges, losses):
     check_logits_losses(logits_list, losses)
     check_logits_losses(logits_list, losses)
     loss_list = []
     loss_list = []
     for i in range(len(logits_list)):
     for i in range(len(logits_list)):
         logits = logits_list[i]
         logits = logits_list[i]
         loss_i = losses['types'][i]
         loss_i = losses['types'][i]
         coef_i = losses['coef'][i]
         coef_i = losses['coef'][i]
-
-        if loss_i.__class__.__name__ in ('BCELoss', 'FocalLoss'
-                                         ) and loss_i.edge_label:
-            # If use edges as labels According to loss type.
+        if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
+            # Use edges as labels According to loss type.
             loss_list.append(coef_i * loss_i(logits, edges))
             loss_list.append(coef_i * loss_i(logits, edges))
         elif loss_i.__class__.__name__ == 'MixedLoss':
         elif loss_i.__class__.__name__ == 'MixedLoss':
             mixed_loss_list = loss_i(logits, labels)
             mixed_loss_list = loss_i(logits, labels)
@@ -75,13 +73,14 @@ def train(model,
           keep_checkpoint_max=5,
           keep_checkpoint_max=5,
           test_config=None,
           test_config=None,
           precision='fp32',
           precision='fp32',
+          amp_level='O1',
           profiler_options=None,
           profiler_options=None,
           to_static_training=False):
           to_static_training=False):
     """
     """
     Launch training.
     Launch training.
 
 
     Args:
     Args:
-        model(nn.Layer): A sementic segmentation model.
+        model(nn.Layer): A semantic segmentation model.
         train_dataset (paddle.io.Dataset): Used to read and process training datasets.
         train_dataset (paddle.io.Dataset): Used to read and process training datasets.
         val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
         val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
         optimizer (paddle.optimizer.Optimizer): The optimizer.
         optimizer (paddle.optimizer.Optimizer): The optimizer.
@@ -98,6 +97,9 @@ def train(model,
         keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
         keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
         test_config(dict, optional): Evaluation config.
         test_config(dict, optional): Evaluation config.
         precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
         precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
+        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, 
+            the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators 
+            parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
         profiler_options (str, optional): The option of train profiler.
         profiler_options (str, optional): The option of train profiler.
         to_static_training (bool, optional): Whether to use @to_static for training.
         to_static_training (bool, optional): Whether to use @to_static for training.
     """
     """
@@ -112,7 +114,18 @@ def train(model,
     if not os.path.isdir(save_dir):
     if not os.path.isdir(save_dir):
         if os.path.exists(save_dir):
         if os.path.exists(save_dir):
             os.remove(save_dir)
             os.remove(save_dir)
-        os.makedirs(save_dir)
+        os.makedirs(save_dir, exist_ok=True)
+
+    # use amp
+    if precision == 'fp16':
+        logger.info('use AMP to train. AMP level = {}'.format(amp_level))
+        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
+        if amp_level == 'O2':
+            model, optimizer = paddle.amp.decorate(
+                models=model,
+                optimizers=optimizer,
+                level='O2',
+                save_dtype='float32')
 
 
     if nranks > 1:
     if nranks > 1:
         paddle.distributed.fleet.init(is_collective=True)
         paddle.distributed.fleet.init(is_collective=True)
@@ -130,18 +143,13 @@ def train(model,
         return_list=True,
         return_list=True,
         worker_init_fn=worker_init_fn, )
         worker_init_fn=worker_init_fn, )
 
 
-    # use amp
-    if precision == 'fp16':
-        logger.info('use amp to train')
-        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
-
     if use_vdl:
     if use_vdl:
         from visualdl import LogWriter
         from visualdl import LogWriter
         log_writer = LogWriter(save_dir)
         log_writer = LogWriter(save_dir)
 
 
     if to_static_training:
     if to_static_training:
         model = paddle.jit.to_static(model)
         model = paddle.jit.to_static(model)
-        logger.info("Successfully to apply @to_static")
+        logger.info("Successfully applied @to_static")
 
 
     avg_loss = 0.0
     avg_loss = 0.0
     avg_loss_list = []
     avg_loss_list = []
@@ -164,30 +172,29 @@ def train(model,
                 else:
                 else:
                     break
                     break
             reader_cost_averager.record(time.time() - batch_start)
             reader_cost_averager.record(time.time() - batch_start)
-            images = data[0]
-            labels = data[1].astype('int64')
+            images = data['img']
+            labels = data['label'].astype('int64')
             edges = None
             edges = None
-            if len(data) == 3:
-                edges = data[2].astype('int64')
+            if 'edge' in data.keys():
+                edges = data['edge'].astype('int64')
             if hasattr(model, 'data_format') and model.data_format == 'NHWC':
             if hasattr(model, 'data_format') and model.data_format == 'NHWC':
                 images = images.transpose((0, 2, 3, 1))
                 images = images.transpose((0, 2, 3, 1))
 
 
             if precision == 'fp16':
             if precision == 'fp16':
                 with paddle.amp.auto_cast(
                 with paddle.amp.auto_cast(
+                        level=amp_level,
                         enable=True,
                         enable=True,
                         custom_white_list={
                         custom_white_list={
                             "elementwise_add", "batch_norm", "sync_batch_norm"
                             "elementwise_add", "batch_norm", "sync_batch_norm"
                         },
                         },
                         custom_black_list={'bilinear_interp_v2'}):
                         custom_black_list={'bilinear_interp_v2'}):
-                    if nranks > 1:
-                        logits_list = ddp_model(images)
-                    else:
-                        logits_list = model(images)
+                    logits_list = ddp_model(images) if nranks > 1 else model(
+                        images)
                     loss_list = loss_computation(
                     loss_list = loss_computation(
                         logits_list=logits_list,
                         logits_list=logits_list,
                         labels=labels,
                         labels=labels,
-                        losses=losses,
-                        edges=edges)
+                        edges=edges,
+                        losses=losses)
                     loss = sum(loss_list)
                     loss = sum(loss_list)
 
 
                 scaled = scaler.scale(loss)  # scale the loss
                 scaled = scaler.scale(loss)  # scale the loss
@@ -197,15 +204,12 @@ def train(model,
                 else:
                 else:
                     scaler.minimize(optimizer, scaled)  # update parameters
                     scaler.minimize(optimizer, scaled)  # update parameters
             else:
             else:
-                if nranks > 1:
-                    logits_list = ddp_model(images)
-                else:
-                    logits_list = model(images)
+                logits_list = ddp_model(images) if nranks > 1 else model(images)
                 loss_list = loss_computation(
                 loss_list = loss_computation(
                     logits_list=logits_list,
                     logits_list=logits_list,
                     labels=labels,
                     labels=labels,
-                    losses=losses,
-                    edges=edges)
+                    edges=edges,
+                    losses=losses)
                 loss = sum(loss_list)
                 loss = sum(loss_list)
                 loss.backward()
                 loss.backward()
                 # if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
                 # if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
@@ -278,7 +282,12 @@ def train(model,
                     test_config = {}
                     test_config = {}
 
 
                 mean_iou, acc, _, _, _ = evaluate(
                 mean_iou, acc, _, _, _ = evaluate(
-                    model, val_dataset, num_workers=num_workers, **test_config)
+                    model,
+                    val_dataset,
+                    num_workers=num_workers,
+                    precision=precision,
+                    amp_level=amp_level,
+                    **test_config)
 
 
                 model.train()
                 model.train()
 
 
@@ -314,7 +323,7 @@ def train(model,
             batch_start = time.time()
             batch_start = time.time()
 
 
     # Calculate flops.
     # Calculate flops.
-    if local_rank == 0:
+    if local_rank == 0 and not (precision == 'fp16' and amp_level == 'O2'):
         _, c, h, w = images.shape
         _, c, h, w = images.shape
         _ = paddle.flops(
         _ = paddle.flops(
             model, [1, c, h, w],
             model, [1, c, h, w],

+ 71 - 31
paddlers/models/ppseg/core/val.py

@@ -34,6 +34,8 @@ def evaluate(model,
              is_slide=False,
              is_slide=False,
              stride=None,
              stride=None,
              crop_size=None,
              crop_size=None,
+             precision='fp32',
+             amp_level='O1',
              num_workers=0,
              num_workers=0,
              print_detail=True,
              print_detail=True,
              auc_roc=False):
              auc_roc=False):
@@ -41,7 +43,7 @@ def evaluate(model,
     Launch evalution.
     Launch evalution.
 
 
     Args:
     Args:
-        model(nn.Layer): A sementic segmentation model.
+        model(nn.Layer): A semantic segmentation model.
         eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
         eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
         aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
         aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
         scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
         scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
@@ -52,6 +54,8 @@ def evaluate(model,
             It should be provided when `is_slide` is True.
             It should be provided when `is_slide` is True.
         crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
         crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
             It should be provided when `is_slide` is True.
             It should be provided when `is_slide` is True.
+        precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the evaluation is normal.
+        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
         num_workers (int, optional): Num workers for data loader. Default: 0.
         num_workers (int, optional): Num workers for data loader. Default: 0.
         print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
         print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
         auc_roc(bool, optional): whether add auc_roc metric
         auc_roc(bool, optional): whether add auc_roc metric
@@ -93,32 +97,66 @@ def evaluate(model,
     batch_cost_averager = TimeAverager()
     batch_cost_averager = TimeAverager()
     batch_start = time.time()
     batch_start = time.time()
     with paddle.no_grad():
     with paddle.no_grad():
-        for iter, (im, label) in enumerate(loader):
+        for iter, data in enumerate(loader):
             reader_cost_averager.record(time.time() - batch_start)
             reader_cost_averager.record(time.time() - batch_start)
-            label = label.astype('int64')
+            label = data['label'].astype('int64')
 
 
-            ori_shape = label.shape[-2:]
             if aug_eval:
             if aug_eval:
-                pred, logits = infer.aug_inference(
-                    model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=eval_dataset.transforms.transforms,
-                    scales=scales,
-                    flip_horizontal=flip_horizontal,
-                    flip_vertical=flip_vertical,
-                    is_slide=is_slide,
-                    stride=stride,
-                    crop_size=crop_size)
+                if precision == 'fp16':
+                    with paddle.amp.auto_cast(
+                            level=amp_level,
+                            enable=True,
+                            custom_white_list={
+                                "elementwise_add", "batch_norm",
+                                "sync_batch_norm"
+                            },
+                            custom_black_list={'bilinear_interp_v2'}):
+                        pred, logits = infer.aug_inference(
+                            model,
+                            data['img'],
+                            trans_info=data['trans_info'],
+                            scales=scales,
+                            flip_horizontal=flip_horizontal,
+                            flip_vertical=flip_vertical,
+                            is_slide=is_slide,
+                            stride=stride,
+                            crop_size=crop_size)
+                else:
+                    pred, logits = infer.aug_inference(
+                        model,
+                        data['img'],
+                        trans_info=data['trans_info'],
+                        scales=scales,
+                        flip_horizontal=flip_horizontal,
+                        flip_vertical=flip_vertical,
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
             else:
             else:
-                pred, logits = infer.inference(
-                    model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=eval_dataset.transforms.transforms,
-                    is_slide=is_slide,
-                    stride=stride,
-                    crop_size=crop_size)
+                if precision == 'fp16':
+                    with paddle.amp.auto_cast(
+                            level=amp_level,
+                            enable=True,
+                            custom_white_list={
+                                "elementwise_add", "batch_norm",
+                                "sync_batch_norm"
+                            },
+                            custom_black_list={'bilinear_interp_v2'}):
+                        pred, logits = infer.inference(
+                            model,
+                            data['img'],
+                            trans_info=data['trans_info'],
+                            is_slide=is_slide,
+                            stride=stride,
+                            crop_size=crop_size)
+                else:
+                    pred, logits = infer.inference(
+                        model,
+                        data['img'],
+                        trans_info=data['trans_info'],
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
 
 
             intersect_area, pred_area, label_area = metrics.calculate_area(
             intersect_area, pred_area, label_area = metrics.calculate_area(
                 pred,
                 pred,
@@ -175,12 +213,12 @@ def evaluate(model,
             batch_cost_averager.reset()
             batch_cost_averager.reset()
             batch_start = time.time()
             batch_start = time.time()
 
 
-    class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
-                                       label_area_all)
-    class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
-    kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
-    class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all,
-                                     label_area_all)
+    metrics_input = (intersect_area_all, pred_area_all, label_area_all)
+    class_iou, miou = metrics.mean_iou(*metrics_input)
+    acc, class_precision, class_recall = metrics.class_measurement(
+        *metrics_input)
+    kappa = metrics.kappa(*metrics_input)
+    class_dice, mdice = metrics.dice(*metrics_input)
 
 
     if auc_roc:
     if auc_roc:
         auc_roc = metrics.auc_roc(
         auc_roc = metrics.auc_roc(
@@ -193,5 +231,7 @@ def evaluate(model,
         infor = infor + auc_infor if auc_roc else infor
         infor = infor + auc_infor if auc_roc else infor
         logger.info(infor)
         logger.info(infor)
         logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
         logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
-        logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
-    return miou, acc, class_iou, class_acc, kappa
+        logger.info("[EVAL] Class Precision: \n" + str(
+            np.round(class_precision, 4)))
+        logger.info("[EVAL] Class Recall: \n" + str(np.round(class_recall, 4)))
+    return miou, acc, class_iou, class_precision, kappa

+ 176 - 29
paddlers/models/ppseg/cvlibs/config.py

@@ -15,9 +15,12 @@
 import codecs
 import codecs
 import os
 import os
 from typing import Any, Dict, Generic
 from typing import Any, Dict, Generic
+import warnings
+from ast import literal_eval
 
 
 import paddle
 import paddle
 import yaml
 import yaml
+import six
 
 
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.utils import logger
 from paddlers.models.ppseg.utils import logger
@@ -69,7 +72,8 @@ class Config(object):
                  path: str,
                  path: str,
                  learning_rate: float=None,
                  learning_rate: float=None,
                  batch_size: int=None,
                  batch_size: int=None,
-                 iters: int=None):
+                 iters: int=None,
+                 opts: list=None):
         if not path:
         if not path:
             raise ValueError('Please specify the configuration file path.')
             raise ValueError('Please specify the configuration file path.')
 
 
@@ -84,7 +88,18 @@ class Config(object):
             raise RuntimeError('Config file should in yaml format!')
             raise RuntimeError('Config file should in yaml format!')
 
 
         self.update(
         self.update(
-            learning_rate=learning_rate, batch_size=batch_size, iters=iters)
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            iters=iters,
+            opts=opts)
+
+        model_cfg = self.dic.get('model', None)
+        if model_cfg is None:
+            raise RuntimeError('No model specified in the configuration file.')
+        if (not self.train_dataset_config) and (not self.val_dataset_config):
+            raise ValueError(
+                'One of `train_dataset` or `val_dataset should be given, but there are none.'
+            )
 
 
     def _update_dic(self, dic, base_dic):
     def _update_dic(self, dic, base_dic):
         """
         """
@@ -121,7 +136,8 @@ class Config(object):
     def update(self,
     def update(self,
                learning_rate: float=None,
                learning_rate: float=None,
                batch_size: int=None,
                batch_size: int=None,
-               iters: int=None):
+               iters: int=None,
+               opts: list=None):
         '''Update config'''
         '''Update config'''
         if learning_rate:
         if learning_rate:
             if 'lr_scheduler' in self.dic:
             if 'lr_scheduler' in self.dic:
@@ -135,6 +151,27 @@ class Config(object):
         if iters:
         if iters:
             self.dic['iters'] = iters
             self.dic['iters'] = iters
 
 
+        # fix parameters by --opts of command
+        if opts is not None:
+            if len(opts) % 2 != 0 or len(opts) == 0:
+                raise ValueError(
+                    "Command line options config `--opts` format error! It should be even length like: k1 v1 k2 v2 ... Please check it: {}".
+                    format(opts))
+            for key, value in zip(opts[0::2], opts[1::2]):
+                if isinstance(value, six.string_types):
+                    try:
+                        value = literal_eval(value)
+                    except ValueError:
+                        pass
+                    except SyntaxError:
+                        pass
+                key_list = key.split('.')
+                dic = self.dic
+                for subkey in key_list[:-1]:
+                    dic.setdefault(subkey, dict())
+                    dic = dic[subkey]
+                dic[key_list[-1]] = value
+
     @property
     @property
     def batch_size(self) -> int:
     def batch_size(self) -> int:
         return self.dic.get('batch_size', 1)
         return self.dic.get('batch_size', 1)
@@ -153,13 +190,32 @@ class Config(object):
                 'No `lr_scheduler` specified in the configuration file.')
                 'No `lr_scheduler` specified in the configuration file.')
         params = self.dic.get('lr_scheduler')
         params = self.dic.get('lr_scheduler')
 
 
+        use_warmup = False
+        if 'warmup_iters' in params:
+            use_warmup = True
+            warmup_iters = params.pop('warmup_iters')
+            assert 'warmup_start_lr' in params, \
+                "When use warmup, please set warmup_start_lr and warmup_iters in lr_scheduler"
+            warmup_start_lr = params.pop('warmup_start_lr')
+            end_lr = params['learning_rate']
+
         lr_type = params.pop('type')
         lr_type = params.pop('type')
         if lr_type == 'PolynomialDecay':
         if lr_type == 'PolynomialDecay':
-            params.setdefault('decay_steps', self.iters)
+            iters = self.iters - warmup_iters if use_warmup else self.iters
+            iters = max(iters, 1)
+            params.setdefault('decay_steps', iters)
             params.setdefault('end_lr', 0)
             params.setdefault('end_lr', 0)
             params.setdefault('power', 0.9)
             params.setdefault('power', 0.9)
+        lr_sche = getattr(paddle.optimizer.lr, lr_type)(**params)
+
+        if use_warmup:
+            lr_sche = paddle.optimizer.lr.LinearWarmup(
+                learning_rate=lr_sche,
+                warmup_steps=warmup_iters,
+                start_lr=warmup_start_lr,
+                end_lr=end_lr)
 
 
-        return getattr(paddle.optimizer.lr, lr_type)(**params)
+        return lr_sche
 
 
     @property
     @property
     def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
     def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
@@ -202,15 +258,33 @@ class Config(object):
         args = self.optimizer_args
         args = self.optimizer_args
         optimizer_type = args.pop('type')
         optimizer_type = args.pop('type')
 
 
+        params = self.model.parameters()
+        if 'backbone_lr_mult' in args:
+            if not hasattr(self.model, 'backbone'):
+                logger.warning('The backbone_lr_mult is not effective because'
+                               ' the model does not have backbone')
+            else:
+                backbone_lr_mult = args.pop('backbone_lr_mult')
+                backbone_params = self.model.backbone.parameters()
+                backbone_params_id = [id(x) for x in backbone_params]
+                other_params = [
+                    x for x in params if id(x) not in backbone_params_id
+                ]
+                params = [{
+                    'params': backbone_params,
+                    'learning_rate': backbone_lr_mult
+                }, {
+                    'params': other_params
+                }]
+
         if optimizer_type == 'sgd':
         if optimizer_type == 'sgd':
-            return paddle.optimizer.Momentum(
-                lr, parameters=self.model.parameters(), **args)
+            return paddle.optimizer.Momentum(lr, parameters=params, **args)
         elif optimizer_type == 'adam':
         elif optimizer_type == 'adam':
-            return paddle.optimizer.Adam(
-                lr, parameters=self.model.parameters(), **args)
+            return paddle.optimizer.Adam(lr, parameters=params, **args)
         elif optimizer_type in paddle.optimizer.__all__:
         elif optimizer_type in paddle.optimizer.__all__:
-            return getattr(paddle.optimizer, optimizer_type)(
-                lr, parameters=self.model.parameters(), **args)
+            return getattr(paddle.optimizer, optimizer_type)(lr,
+                                                             parameters=params,
+                                                             **args)
 
 
         raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))
         raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))
 
 
@@ -295,24 +369,6 @@ class Config(object):
     @property
     @property
     def model(self) -> paddle.nn.Layer:
     def model(self) -> paddle.nn.Layer:
         model_cfg = self.dic.get('model').copy()
         model_cfg = self.dic.get('model').copy()
-        if not model_cfg:
-            raise RuntimeError('No model specified in the configuration file.')
-        if not 'num_classes' in model_cfg:
-            num_classes = None
-            if self.train_dataset_config:
-                if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
-                    num_classes = self.train_dataset_class.NUM_CLASSES
-                elif hasattr(self.train_dataset, 'num_classes'):
-                    num_classes = self.train_dataset.num_classes
-            elif self.val_dataset_config:
-                if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
-                    num_classes = self.val_dataset_class.NUM_CLASSES
-                elif hasattr(self.val_dataset, 'num_classes'):
-                    num_classes = self.val_dataset.num_classes
-
-            if num_classes is not None:
-                model_cfg['num_classes'] = num_classes
-
         if not self._model:
         if not self._model:
             self._model = self._load_object(model_cfg)
             self._model = self._load_object(model_cfg)
         return self._model
         return self._model
@@ -401,3 +457,94 @@ class Config(object):
 
 
     def __str__(self) -> str:
     def __str__(self) -> str:
         return yaml.dump(self.dic)
         return yaml.dump(self.dic)
+
+    @property
+    def val_transforms(self) -> list:
+        """Get val_transform from val_dataset"""
+        _val_dataset = self.val_dataset_config
+        if not _val_dataset:
+            return []
+        _transforms = _val_dataset.get('transforms', [])
+        transforms = []
+        for i in _transforms:
+            transforms.append(self._load_object(i))
+        return transforms
+
+    def check_sync_info(self) -> None:
+        """
+        Check and sync the info, such as num_classes and img_channels, 
+        between the config of model, train_dataset and val_dataset.
+        """
+        self._check_sync_num_classes()
+        self._check_sync_img_channels()
+
+    def _check_sync_num_classes(self):
+        num_classes_set = set()
+
+        if self.dic['model'].get('num_classes', None) is not None:
+            num_classes_set.add(self.dic['model'].get('num_classes'))
+        if self.train_dataset_config:
+            if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
+                num_classes_set.add(self.train_dataset_class.NUM_CLASSES)
+            elif 'num_classes' in self.train_dataset_config:
+                num_classes_set.add(self.train_dataset_config['num_classes'])
+        if self.val_dataset_config:
+            if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
+                num_classes_set.add(self.val_dataset_class.NUM_CLASSES)
+            elif 'num_classes' in self.val_dataset_config:
+                num_classes_set.add(self.val_dataset_config['num_classes'])
+
+        if len(num_classes_set) == 0:
+            raise ValueError(
+                '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
+            )
+        elif len(num_classes_set) > 1:
+            raise ValueError(
+                '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
+                .format(num_classes_set))
+
+        num_classes = num_classes_set.pop()
+        self.dic['model']['num_classes'] = num_classes
+        if self.train_dataset_config and \
+            (not hasattr(self.train_dataset_class, 'NUM_CLASSES')):
+            self.dic['train_dataset']['num_classes'] = num_classes
+        if self.val_dataset_config and \
+            (not hasattr(self.val_dataset_class, 'NUM_CLASSES')):
+            self.dic['val_dataset']['num_classes'] = num_classes
+
+    def _check_sync_img_channels(self):
+        img_channels_set = set()
+        model_cfg = self.dic['model']
+
+        # If the model has backbone, in_channels is the input params of backbone.
+        # Otherwise, in_channels is the input params of the model.
+        if 'backbone' in model_cfg:
+            x = model_cfg['backbone'].get('in_channels', None)
+            if x is not None:
+                img_channels_set.add(x)
+        elif model_cfg.get('in_channels', None) is not None:
+            img_channels_set.add(model_cfg.get('in_channels'))
+        if self.train_dataset_config and \
+            ('img_channels' in self.train_dataset_config):
+            img_channels_set.add(self.train_dataset_config['img_channels'])
+        if self.val_dataset_config and \
+            ('img_channels' in self.val_dataset_config):
+            img_channels_set.add(self.val_dataset_config['img_channels'])
+
+        if len(img_channels_set) > 1:
+            raise ValueError(
+                '`img_channels` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
+                .format(img_channels_set))
+
+        img_channels = 3 if len(img_channels_set) == 0 \
+            else img_channels_set.pop()
+        if 'backbone' in model_cfg:
+            self.dic['model']['backbone']['in_channels'] = img_channels
+        else:
+            self.dic['model']['in_channels'] = img_channels
+        if self.train_dataset_config and \
+            self.train_dataset_config['type'] == "Dataset":
+            self.dic['train_dataset']['img_channels'] = img_channels
+        if self.val_dataset_config and \
+            self.val_dataset_config['type'] == "Dataset":
+            self.dic['val_dataset']['img_channels'] = img_channels

+ 26 - 0
paddlers/models/ppseg/cvlibs/param_init.py

@@ -118,3 +118,29 @@ def kaiming_uniform(param, **kwargs):
 
 
     initializer = nn.initializer.KaimingUniform(**kwargs)
     initializer = nn.initializer.KaimingUniform(**kwargs)
     initializer(param, param.block)
     initializer(param, param.block)
+
+
+def xavier_uniform(param, **kwargs):
+    r"""
+    This implements the Xavier weight initializer from the paper
+    `Understanding the difficulty of training deep feedforward neural
+    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
+    by Xavier Glorot and Yoshua Bengio.
+    This initializer is designed to keep the scale of the gradients
+    approximately same in all the layers. In case of Uniform distribution,
+    the range is [-x, x], where
+    .. math::
+        x = \sqrt{\frac{6.0}{fan\_in + fan\_out}}
+    Args:
+        param (Tensor): Tensor that needs to be initialized.
+
+    Examples:
+
+        from paddlers.models.ppseg.cvlibs import param_init
+        import paddle.nn as nn
+
+        linear = nn.Linear(2, 4)
+        param_init.xavier_uniform(linear.weight)
+    """
+    initializer = nn.initializer.XavierUniform(**kwargs)
+    initializer(param, param.block)

+ 1 - 0
paddlers/models/ppseg/datasets/__init__.py

@@ -27,3 +27,4 @@ from .drive import DRIVE
 from .hrf import HRF
 from .hrf import HRF
 from .chase_db1 import CHASEDB1
 from .chase_db1 import CHASEDB1
 from .pp_humanseg14k import PPHumanSeg14K
 from .pp_humanseg14k import PPHumanSeg14K
+from .pssl import PSSLDataset

+ 16 - 8
paddlers/models/ppseg/datasets/ade.py

@@ -89,23 +89,31 @@ class ADE20K(Dataset):
             self.file_list.append([img_path, label_path])
             self.file_list.append([img_path, label_path])
 
 
     def __getitem__(self, idx):
     def __getitem__(self, idx):
+        data = {}
+        data['trans_info'] = []
         image_path, label_path = self.file_list[idx]
         image_path, label_path = self.file_list[idx]
+        data['img'] = image_path
+        data['gt_fields'] = [
+        ]  # If key in gt_fields, the data[key] have transforms synchronous.
+
         if self.mode == 'val':
         if self.mode == 'val':
-            im, _ = self.transforms(im=image_path)
+            data = self.transforms(data)
             label = np.asarray(Image.open(label_path))
             label = np.asarray(Image.open(label_path))
             # The class 0 is ignored. And it will equal to 255 after
             # The class 0 is ignored. And it will equal to 255 after
             # subtracted 1, because the dtype of label is uint8.
             # subtracted 1, because the dtype of label is uint8.
             label = label - 1
             label = label - 1
             label = label[np.newaxis, :, :]
             label = label[np.newaxis, :, :]
-            return im, label
+            data['label'] = label
+            return data
         else:
         else:
-            im, label = self.transforms(im=image_path, label=label_path)
-            label = label - 1
+            data['label'] = label_path
+            data['gt_fields'].append('label')
+            data = self.transforms(data)
+            data['label'] = data['label'] - 1
             # Recover the ignore pixels adding by transform
             # Recover the ignore pixels adding by transform
-            label[label == 254] = 255
+            data['label'][data['label'] == 254] = 255
             if self.edge:
             if self.edge:
                 edge_mask = F.mask_to_binary_edge(
                 edge_mask = F.mask_to_binary_edge(
                     label, radius=2, num_classes=self.num_classes)
                     label, radius=2, num_classes=self.num_classes)
-                return im, label, edge_mask
-            else:
-                return im, label
+                data['edge'] = edge_mask
+            return data

+ 31 - 23
paddlers/models/ppseg/datasets/dataset.py

@@ -46,10 +46,10 @@ class Dataset(paddle.io.Dataset):
 
 
         Examples:
         Examples:
 
 
-            import paddlers.models.ppseg.transforms as T
+            import paddlers.models.ppseg as ppseg.transforms as T
             from paddlers.models.ppseg.datasets import Dataset
             from paddlers.models.ppseg.datasets import Dataset
 
 
-            transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
+            transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
             dataset_root = 'dataset_root_path'
             dataset_root = 'dataset_root_path'
             train_path = 'train_path'
             train_path = 'train_path'
             num_classes = 2
             num_classes = 2
@@ -62,10 +62,11 @@ class Dataset(paddle.io.Dataset):
     """
     """
 
 
     def __init__(self,
     def __init__(self,
-                 transforms,
+                 mode,
                  dataset_root,
                  dataset_root,
+                 transforms,
                  num_classes,
                  num_classes,
-                 mode='train',
+                 img_channels=3,
                  train_path=None,
                  train_path=None,
                  val_path=None,
                  val_path=None,
                  test_path=None,
                  test_path=None,
@@ -73,10 +74,11 @@ class Dataset(paddle.io.Dataset):
                  ignore_index=255,
                  ignore_index=255,
                  edge=False):
                  edge=False):
         self.dataset_root = dataset_root
         self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
+        self.transforms = Compose(transforms, img_channels=img_channels)
         self.file_list = list()
         self.file_list = list()
         self.mode = mode.lower()
         self.mode = mode.lower()
         self.num_classes = num_classes
         self.num_classes = num_classes
+        self.img_channels = img_channels
         self.ignore_index = ignore_index
         self.ignore_index = ignore_index
         self.edge = edge
         self.edge = edge
 
 
@@ -84,13 +86,18 @@ class Dataset(paddle.io.Dataset):
             raise ValueError(
             raise ValueError(
                 "mode should be 'train', 'val' or 'test', but got {}.".format(
                 "mode should be 'train', 'val' or 'test', but got {}.".format(
                     self.mode))
                     self.mode))
-
-        if self.transforms is None:
-            raise ValueError("`transforms` is necessary, but it is None.")
-
         if not os.path.exists(self.dataset_root):
         if not os.path.exists(self.dataset_root):
             raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
             raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
                 self.dataset_root))
                 self.dataset_root))
+        if self.transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+        if num_classes < 1:
+            raise ValueError(
+                "`num_classes` should be greater than 1, but got {}".format(
+                    num_classes))
+        if img_channels not in [1, 3]:
+            raise ValueError("`img_channels` should in [1, 3], but got {}".
+                             format(img_channels))
 
 
         if self.mode == 'train':
         if self.mode == 'train':
             if train_path is None:
             if train_path is None:
@@ -139,24 +146,25 @@ class Dataset(paddle.io.Dataset):
                 self.file_list.append([image_path, label_path])
                 self.file_list.append([image_path, label_path])
 
 
     def __getitem__(self, idx):
     def __getitem__(self, idx):
+        data = {}
+        data['trans_info'] = []
         image_path, label_path = self.file_list[idx]
         image_path, label_path = self.file_list[idx]
-        if self.mode == 'test':
-            im, _ = self.transforms(im=image_path)
-            im = im[np.newaxis, ...]
-            return im, image_path
-        elif self.mode == 'val':
-            im, _ = self.transforms(im=image_path)
-            label = np.asarray(Image.open(label_path))
-            label = label[np.newaxis, :, :]
-            return im, label
+        data['img'] = image_path
+        data['label'] = label_path
+        # If key in gt_fields, the data[key] have transforms synchronous.
+        data['gt_fields'] = []
+        if self.mode == 'val':
+            data = self.transforms(data)
+            data['label'] = data['label'][np.newaxis, :, :]
+
         else:
         else:
-            im, label = self.transforms(im=image_path, label=label_path)
+            data['gt_fields'].append('label')
+            data = self.transforms(data)
             if self.edge:
             if self.edge:
                 edge_mask = F.mask_to_binary_edge(
                 edge_mask = F.mask_to_binary_edge(
-                    label, radius=2, num_classes=self.num_classes)
-                return im, label, edge_mask
-            else:
-                return im, label
+                    data['label'], radius=2, num_classes=self.num_classes)
+                data['edge'] = edge_mask
+        return data
 
 
     def __len__(self):
     def __len__(self):
         return len(self.file_list)
         return len(self.file_list)

+ 135 - 0
paddlers/models/ppseg/datasets/pssl.py

@@ -0,0 +1,135 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+from paddlers.models.ppseg.datasets import Dataset
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.transforms import Compose
+
+
+@manager.DATASETS.add_component
+class PSSLDataset(Dataset):
+    """
+    The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label
+    is computed by the Consensus explanation algorithm.
+
+    The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation 
+    Models" (https://arxiv.org/abs/2207.03335). 
+    
+    The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification 
+    Models: An Empirical Study" (https://arxiv.org/abs/2109.00707).
+
+    To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure
+    as follows:
+
+        imagenet_root
+        |
+        |--train
+        |  |--n01440764
+        |  |  |--n01440764_10026.JPEG
+        |  |  |--...
+        |  |--nxxxxxxxx
+        |  |--...
+
+    where only the "train" set is needed.
+
+    The PSSL dataset has the folder structure as follows:
+
+        pssl_root
+        |
+        |--train
+        |  |--n01440764
+        |  |  |--n01440764_10026.JPEG_eiseg.npz
+        |  |  |--...
+        |  |--nxxxxxxxx
+        |  |--...
+        |
+        |--imagenet_lsvrc_2015_synsets.txt
+        |--train.txt
+
+    where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset.
+
+    Args:
+        transforms (list): Transforms for image.
+        imagenet_root (str): The path to the original ImageNet dataset.
+        pssl_root (str): The path to the PSSL dataset.
+        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        edge (bool, optional): Whether to compute edge while training. Default: False.
+    """
+    ignore_index = 1001  # 0~999 is target class, 1000 is bg
+    NUM_CLASSES = 1001  # consider target class and bg
+
+    def __init__(self,
+                 transforms,
+                 imagenet_root,
+                 pssl_root,
+                 mode='train',
+                 edge=False):
+        mode = mode.lower()
+        if mode not in ['train']:
+            raise ValueError("mode should be 'train', but got {}.".format(mode))
+        if transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+
+        self.transforms = Compose(transforms)
+        self.mode = mode
+        self.edge = edge
+
+        self.num_classes = self.NUM_CLASSES
+        self.ignore_index = self.num_classes  # 1001
+        self.file_list = []
+        self.class_id_dict = {}
+
+        if imagenet_root is None or not os.path.isdir(pssl_root):
+            raise ValueError(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+
+        train_list_file = os.path.join(pssl_root, "train.txt")
+        if not os.path.exists(train_list_file):
+            raise ValueError("Train list file isn't exists.")
+        for idx, line in enumerate(open(train_list_file)):
+            # line: train/n04118776/n04118776_45912.JPEG_eiseg.npz
+            label_path = line.strip()
+            img_path = label_path.split('.JPEG')[0] + '.JPEG'
+            label_path = os.path.join(pssl_root, label_path)
+            img_path = os.path.join(imagenet_root, img_path)
+            self.file_list.append([img_path, label_path])
+
+        # mapping class name to class id.
+        class_id_file = os.path.join(pssl_root,
+                                     "imagenet_lsvrc_2015_synsets.txt")
+        if not os.path.exists(class_id_file):
+            raise ValueError("Class id file isn't exists.")
+        for idx, line in enumerate(open(class_id_file)):
+            class_name = line.strip()
+            self.class_id_dict[class_name] = idx
+
+    def __getitem__(self, idx):
+        image_path, label_path = self.file_list[idx]
+
+        # transform label
+        class_name = (image_path.split('/')[-1]).split('_')[0]
+        class_id = self.class_id_dict[class_name]
+
+        pssl_seg = np.load(label_path)['arr_0']
+        gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000
+        # [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training.
+        gt_semantic_seg[pssl_seg == 1] = class_id
+
+        im, label = self.transforms(im=image_path, label=gt_semantic_seg)
+
+        return im, label

+ 9 - 0
paddlers/models/ppseg/models/__init__.py

@@ -49,9 +49,18 @@ from .segnet import SegNet
 from .encnet import ENCNet
 from .encnet import ENCNet
 from .hrnet_contrast import HRNetW48Contrast
 from .hrnet_contrast import HRNetW48Contrast
 from .espnet import ESPNetV2
 from .espnet import ESPNetV2
+from .pp_liteseg import PPLiteSeg
 from .dmnet import DMNet
 from .dmnet import DMNet
 from .espnetv1 import ESPNetV1
 from .espnetv1 import ESPNetV1
 from .enet import ENet
 from .enet import ENet
 from .bisenetv1 import BiseNetV1
 from .bisenetv1 import BiseNetV1
 from .fastfcn import FastFCN
 from .fastfcn import FastFCN
 from .pfpnnet import PFPNNet
 from .pfpnnet import PFPNNet
+from .glore import GloRe
+from .ddrnet import DDRNet_23
+from .ccnet import CCNet
+from .mobileseg import MobileSeg
+from .upernet import UPerNet
+from .sinet import SINet
+from .lraspp import LRASPP
+from .topformer import TopFormer

+ 3 - 3
paddlers/models/ppseg/models/attention_unet.py

@@ -35,13 +35,13 @@ class AttentionUNet(nn.Layer):
 
 
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
     """
     """
 
 
-    def __init__(self, num_classes, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
         super().__init__()
         super().__init__()
-        n_channels = 3
-        self.encoder = Encoder(n_channels, [64, 128, 256, 512])
+        self.encoder = Encoder(in_channels, [64, 128, 256, 512])
         filters = np.array([64, 128, 256, 512, 1024])
         filters = np.array([64, 128, 256, 512, 1024])
         self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
         self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
         self.att5 = AttentionBlock(
         self.att5 = AttentionBlock(

+ 4 - 0
paddlers/models/ppseg/models/backbones/__init__.py

@@ -21,3 +21,7 @@ from .swin_transformer import *
 from .mobilenetv2 import *
 from .mobilenetv2 import *
 from .mix_transformer import *
 from .mix_transformer import *
 from .stdcnet import *
 from .stdcnet import *
+from .lite_hrnet import *
+from .shufflenetv2 import *
+from .ghostnet import *
+from .top_transformer import *

+ 318 - 0
paddlers/models/ppseg/models/backbones/ghostnet.py

@@ -0,0 +1,318 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
+
+import math
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Uniform, KaimingNormal
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.utils import utils, logger
+
+__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 act="relu",
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=KaimingNormal(), name=name + "_weights"),
+            bias_attr=False)
+        bn_name = name + "_bn"
+
+        self._batch_norm = BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(
+                name=bn_name + "_scale", regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(
+                name=bn_name + "_offset", regularizer=L2Decay(0.0)),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class SEBlock(nn.Layer):
+    def __init__(self, num_channels, reduction_ratio=4, name=None):
+        super(SEBlock, self).__init__()
+        self.pool2d_gap = AdaptiveAvgPool2D(1)
+        self._num_channels = num_channels
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        med_ch = num_channels // reduction_ratio
+        self.squeeze = Linear(
+            num_channels,
+            med_ch,
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
+            bias_attr=ParamAttr(name=name + "_1_offset"))
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = Linear(
+            med_ch,
+            num_channels,
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
+            bias_attr=ParamAttr(name=name + "_2_offset"))
+
+    def forward(self, inputs):
+        pool = self.pool2d_gap(inputs)
+        pool = paddle.squeeze(pool, axis=[2, 3])
+        squeeze = self.squeeze(pool)
+        squeeze = F.relu(squeeze)
+        excitation = self.excitation(squeeze)
+        excitation = paddle.clip(x=excitation, min=0, max=1)
+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
+        out = paddle.multiply(inputs, excitation)
+        return out
+
+
+class GhostModule(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 output_channels,
+                 kernel_size=1,
+                 ratio=2,
+                 dw_size=3,
+                 stride=1,
+                 relu=True,
+                 name=None):
+        super(GhostModule, self).__init__()
+        init_channels = int(math.ceil(output_channels / ratio))
+        new_channels = int(init_channels * (ratio - 1))
+        self.primary_conv = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=init_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            groups=1,
+            act="relu" if relu else None,
+            name=name + "_primary_conv")
+        self.cheap_operation = ConvBNLayer(
+            in_channels=init_channels,
+            out_channels=new_channels,
+            kernel_size=dw_size,
+            stride=1,
+            groups=init_channels,
+            act="relu" if relu else None,
+            name=name + "_cheap_operation")
+
+    def forward(self, inputs):
+        x = self.primary_conv(inputs)
+        y = self.cheap_operation(x)
+        out = paddle.concat([x, y], axis=1)
+        return out
+
+
+class GhostBottleneck(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 hidden_dim,
+                 output_channels,
+                 kernel_size,
+                 stride,
+                 use_se,
+                 name=None):
+        super(GhostBottleneck, self).__init__()
+        self._stride = stride
+        self._use_se = use_se
+        self._num_channels = in_channels
+        self._output_channels = output_channels
+        self.ghost_module_1 = GhostModule(
+            in_channels=in_channels,
+            output_channels=hidden_dim,
+            kernel_size=1,
+            stride=1,
+            relu=True,
+            name=name + "_ghost_module_1")
+        if stride == 2:
+            self.depthwise_conv = ConvBNLayer(
+                in_channels=hidden_dim,
+                out_channels=hidden_dim,
+                kernel_size=kernel_size,
+                stride=stride,
+                groups=hidden_dim,
+                act=None,
+                name=name +
+                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
+            )
+        if use_se:
+            self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
+        self.ghost_module_2 = GhostModule(
+            in_channels=hidden_dim,
+            output_channels=output_channels,
+            kernel_size=1,
+            relu=False,
+            name=name + "_ghost_module_2")
+        if stride != 1 or in_channels != output_channels:
+            self.shortcut_depthwise = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                groups=in_channels,
+                act=None,
+                name=name +
+                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
+            )
+            self.shortcut_conv = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=output_channels,
+                kernel_size=1,
+                stride=1,
+                groups=1,
+                act=None,
+                name=name + "_shortcut_conv")
+
+    def forward(self, inputs):
+        x = self.ghost_module_1(inputs)
+        if self._stride == 2:
+            x = self.depthwise_conv(x)
+        if self._use_se:
+            x = self.se_block(x)
+        x = self.ghost_module_2(x)
+        if self._stride == 1 and self._num_channels == self._output_channels:
+            shortcut = inputs
+        else:
+            shortcut = self.shortcut_depthwise(inputs)
+            shortcut = self.shortcut_conv(shortcut)
+        return paddle.add(x=x, y=shortcut)
+
+
+class GhostNet(nn.Layer):
+    def __init__(self, scale, in_channels=3, pretrained=None):
+        super(GhostNet, self).__init__()
+        self.cfgs = [
+            # k, t, c, SE, s
+            [3, 16, 16, 0, 1],
+            [3, 48, 24, 0, 2],
+            [3, 72, 24, 0, 1],  # x4
+            [5, 72, 40, 1, 2],
+            [5, 120, 40, 1, 1],  # x8
+            [3, 240, 80, 0, 2],
+            [3, 200, 80, 0, 1],
+            [3, 184, 80, 0, 1],
+            [3, 184, 80, 0, 1],
+            [3, 480, 112, 1, 1],
+            [3, 672, 112, 1, 1],  # x16
+            [5, 672, 160, 1, 2],
+            [5, 960, 160, 0, 1],
+            [5, 960, 160, 1, 1],
+            [5, 960, 160, 0, 1],
+            [5, 960, 160, 1, 1]  # x32
+        ]
+        self.scale = scale
+        self.pretrained = pretrained
+
+        output_channels = int(self._make_divisible(16 * self.scale, 4))
+        self.conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=output_channels,
+            kernel_size=3,
+            stride=2,
+            groups=1,
+            act="relu",
+            name="conv1")
+
+        # build inverted residual blocks
+        self.out_index = [2, 4, 10, 15]
+        self.feat_channels = []
+        self.ghost_bottleneck_list = []
+        for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs):
+            in_channels = output_channels
+            output_channels = int(self._make_divisible(c * self.scale, 4))
+            hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
+            ghost_bottleneck = self.add_sublayer(
+                name="_ghostbottleneck_" + str(idx),
+                sublayer=GhostBottleneck(
+                    in_channels=in_channels,
+                    hidden_dim=hidden_dim,
+                    output_channels=output_channels,
+                    kernel_size=k,
+                    stride=s,
+                    use_se=use_se,
+                    name="_ghostbottleneck_" + str(idx)))
+            self.ghost_bottleneck_list.append(ghost_bottleneck)
+            if idx in self.out_index:
+                self.feat_channels.append(output_channels)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, inputs):
+        feat_list = []
+        x = self.conv1(inputs)
+        for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
+            x = ghost_bottleneck(x)
+            if idx in self.out_index:
+                feat_list.append(x)
+        return feat_list
+
+    def _make_divisible(self, v, divisor, min_value=None):
+        """
+        This function is taken from the original tf repo.
+        It ensures that all layers have a channel number that is divisible by 8
+        It can be seen here:
+        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+        """
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+        # Make sure that round down does not go down by more than 10%.
+        if new_v < 0.9 * v:
+            new_v += divisor
+        return new_v
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x0_5(**kwargs):
+    model = GhostNet(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x1_0(**kwargs):
+    model = GhostNet(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x1_3(**kwargs):
+    model = GhostNet(scale=1.3, **kwargs)
+    return model

+ 3 - 1
paddlers/models/ppseg/models/backbones/hrnet.py

@@ -37,6 +37,7 @@ class HRNet(nn.Layer):
     (https://arxiv.org/pdf/1908.07919.pdf).
     (https://arxiv.org/pdf/1908.07919.pdf).
 
 
     Args:
     Args:
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path of pretrained model.
         pretrained (str, optional): The path of pretrained model.
         stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
         stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
         stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
         stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
@@ -56,6 +57,7 @@ class HRNet(nn.Layer):
     """
     """
 
 
     def __init__(self,
     def __init__(self,
+                 in_channels=3,
                  pretrained=None,
                  pretrained=None,
                  stage1_num_modules=1,
                  stage1_num_modules=1,
                  stage1_num_blocks=(4, ),
                  stage1_num_blocks=(4, ),
@@ -91,7 +93,7 @@ class HRNet(nn.Layer):
         self.feat_channels = [sum(stage4_num_channels)]
         self.feat_channels = [sum(stage4_num_channels)]
 
 
         self.conv_layer1_1 = layers.ConvBNReLU(
         self.conv_layer1_1 = layers.ConvBNReLU(
-            in_channels=3,
+            in_channels=in_channels,
             out_channels=64,
             out_channels=64,
             kernel_size=3,
             kernel_size=3,
             stride=2,
             stride=2,

+ 974 - 0
paddlers/models/ppseg/models/backbones/lite_hrnet.py

@@ -0,0 +1,974 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on
+https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
+"""
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from numbers import Integral
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Normal, Constant
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg import utils
+
+__all__ = [
+    "Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive",
+    "Lite_HRNet_wider_naive", "LiteHRNet"
+]
+
+
+def Conv2d(in_channels,
+           out_channels,
+           kernel_size,
+           stride=1,
+           padding=0,
+           dilation=1,
+           groups=1,
+           bias=True,
+           weight_init=Normal(std=0.001),
+           bias_init=Constant(0.)):
+    weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
+    if bias:
+        bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
+    else:
+        bias_attr = False
+    conv = nn.Conv2D(
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups,
+        weight_attr=weight_attr,
+        bias_attr=bias_attr)
+    return conv
+
+
+def channel_shuffle(x, groups):
+    x_shape = paddle.shape(x)
+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
+    num_channels = x.shape[1]
+    channels_per_group = num_channels // groups
+
+    x = paddle.reshape(
+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
+    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
+    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
+
+    return x
+
+
+class ConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride=1,
+                 groups=1,
+                 norm_type=None,
+                 norm_groups=32,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 act=None):
+        super(ConvNormLayer, self).__init__()
+        self.act = act
+        norm_lr = 0. if freeze_norm else 1.
+        if norm_type is not None:
+            assert norm_type in ['bn', 'sync_bn', 'gn'], \
+                "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
+            param_attr = ParamAttr(
+                initializer=Constant(1.0),
+                learning_rate=norm_lr,
+                regularizer=L2Decay(norm_decay), )
+            bias_attr = ParamAttr(
+                learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
+            global_stats = True if freeze_norm else None
+            if norm_type in ['bn', 'sync_bn']:
+                self.norm = nn.BatchNorm2D(
+                    ch_out,
+                    weight_attr=param_attr,
+                    bias_attr=bias_attr,
+                    use_global_stats=global_stats, )
+            elif norm_type == 'gn':
+                self.norm = nn.GroupNorm(
+                    num_groups=norm_groups,
+                    num_channels=ch_out,
+                    weight_attr=param_attr,
+                    bias_attr=bias_attr)
+            norm_params = self.norm.parameters()
+            if freeze_norm:
+                for param in norm_params:
+                    param.stop_gradient = True
+            conv_bias_attr = False
+        else:
+            conv_bias_attr = True
+            self.norm = None
+
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=Normal(
+                mean=0., std=0.001)),
+            bias_attr=conv_bias_attr)
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        if self.norm is not None:
+            out = self.norm(out)
+
+        if self.act == 'relu':
+            out = F.relu(out)
+        elif self.act == 'sigmoid':
+            out = F.sigmoid(out)
+        return out
+
+
+class DepthWiseSeparableConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride=1,
+                 dw_norm_type=None,
+                 pw_norm_type=None,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 dw_act=None,
+                 pw_act=None):
+        super(DepthWiseSeparableConvNormLayer, self).__init__()
+        self.depthwise_conv = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=ch_in,
+            filter_size=filter_size,
+            stride=stride,
+            groups=ch_in,
+            norm_type=dw_norm_type,
+            act=dw_act,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm, )
+        self.pointwise_conv = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=ch_out,
+            filter_size=1,
+            stride=1,
+            norm_type=pw_norm_type,
+            act=pw_act,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm, )
+
+    def forward(self, x):
+        x = self.depthwise_conv(x)
+        x = self.pointwise_conv(x)
+        return x
+
+
+class CrossResolutionWeightingModule(nn.Layer):
+    def __init__(self,
+                 channels,
+                 ratio=16,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(CrossResolutionWeightingModule, self).__init__()
+        self.channels = channels
+        total_channel = sum(channels)
+        self.conv1 = ConvNormLayer(
+            ch_in=total_channel,
+            ch_out=total_channel // ratio,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.conv2 = ConvNormLayer(
+            ch_in=total_channel // ratio,
+            ch_out=total_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='sigmoid',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        out = []
+        for idx, xi in enumerate(x[:-1]):
+            kernel_size = stride = pow(2, len(x) - idx - 1)
+            xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride)
+            out.append(xi)
+        out.append(x[-1])
+
+        out = paddle.concat(out, 1)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        out = paddle.split(out, self.channels, 1)
+        out = [
+            s * F.interpolate(
+                a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out)
+        ]
+        return out
+
+
+class SpatialWeightingModule(nn.Layer):
+    def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
+        super(SpatialWeightingModule, self).__init__()
+        self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
+        self.conv1 = ConvNormLayer(
+            ch_in=in_channel,
+            ch_out=in_channel // ratio,
+            filter_size=1,
+            stride=1,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.conv2 = ConvNormLayer(
+            ch_in=in_channel // ratio,
+            ch_out=in_channel,
+            filter_size=1,
+            stride=1,
+            act='sigmoid',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        out = self.global_avgpooling(x)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        return x * out
+
+
+class ConditionalChannelWeightingBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 stride,
+                 reduce_ratio,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(ConditionalChannelWeightingBlock, self).__init__()
+        assert stride in [1, 2]
+        branch_channels = [channel // 2 for channel in in_channels]
+
+        self.cross_resolution_weighting = CrossResolutionWeightingModule(
+            branch_channels,
+            ratio=reduce_ratio,
+            norm_type=norm_type,
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.depthwise_convs = nn.LayerList([
+            ConvNormLayer(
+                channel,
+                channel,
+                filter_size=3,
+                stride=stride,
+                groups=channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay) for channel in branch_channels
+        ])
+
+        self.spatial_weighting = nn.LayerList([
+            SpatialWeightingModule(
+                channel,
+                ratio=4,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay) for channel in branch_channels
+        ])
+
+    def forward(self, x):
+        x = [s.chunk(2, axis=1) for s in x]
+        x1 = [s[0] for s in x]
+        x2 = [s[1] for s in x]
+
+        x2 = self.cross_resolution_weighting(x2)
+        x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
+        x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
+
+        out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
+        out = [channel_shuffle(s, groups=2) for s in out]
+        return out
+
+
+class ShuffleUnit(nn.Layer):
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 stride,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(ShuffleUnit, self).__init__()
+        branch_channel = out_channel // 2
+        self.stride = stride
+        if self.stride == 1:
+            assert in_channel == branch_channel * 2, \
+                "when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
+        if stride > 1:
+            self.branch1 = nn.Sequential(
+                ConvNormLayer(
+                    ch_in=in_channel,
+                    ch_out=in_channel,
+                    filter_size=3,
+                    stride=self.stride,
+                    groups=in_channel,
+                    norm_type=norm_type,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay),
+                ConvNormLayer(
+                    ch_in=in_channel,
+                    ch_out=branch_channel,
+                    filter_size=1,
+                    stride=1,
+                    norm_type=norm_type,
+                    act='relu',
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay), )
+        self.branch2 = nn.Sequential(
+            ConvNormLayer(
+                ch_in=branch_channel if stride == 1 else in_channel,
+                ch_out=branch_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=3,
+                stride=self.stride,
+                groups=branch_channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay), )
+
+    def forward(self, x):
+        if self.stride > 1:
+            x1 = self.branch1(x)
+            x2 = self.branch2(x)
+        else:
+            x1, x2 = x.chunk(2, axis=1)
+            x2 = self.branch2(x2)
+        out = paddle.concat([x1, x2], axis=1)
+        out = channel_shuffle(out, groups=2)
+        return out
+
+
+class IterativeHead(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(IterativeHead, self).__init__()
+        num_branches = len(in_channels)
+        self.in_channels = in_channels[::-1]
+
+        projects = []
+        for i in range(num_branches):
+            if i != num_branches - 1:
+                projects.append(
+                    DepthWiseSeparableConvNormLayer(
+                        ch_in=self.in_channels[i],
+                        ch_out=self.in_channels[i + 1],
+                        filter_size=3,
+                        stride=1,
+                        dw_act=None,
+                        pw_act='relu',
+                        dw_norm_type=norm_type,
+                        pw_norm_type=norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+            else:
+                projects.append(
+                    DepthWiseSeparableConvNormLayer(
+                        ch_in=self.in_channels[i],
+                        ch_out=self.in_channels[i],
+                        filter_size=3,
+                        stride=1,
+                        dw_act=None,
+                        pw_act='relu',
+                        dw_norm_type=norm_type,
+                        pw_norm_type=norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+        self.projects = nn.LayerList(projects)
+
+    def forward(self, x):
+        x = x[::-1]
+        y = []
+        last_x = None
+        for i, s in enumerate(x):
+            if last_x is not None:
+                last_x = F.interpolate(
+                    last_x,
+                    size=paddle.shape(s)[-2:],
+                    mode='bilinear',
+                    align_corners=True)
+                s = s + last_x
+            s = self.projects[i](s)
+            y.append(s)
+            last_x = s
+
+        return y[::-1]
+
+
+class Stem(nn.Layer):
+    def __init__(self,
+                 in_channel,
+                 stem_channel,
+                 out_channel,
+                 expand_ratio,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(Stem, self).__init__()
+        self.conv1 = ConvNormLayer(
+            in_channel,
+            stem_channel,
+            filter_size=3,
+            stride=2,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        mid_channel = int(round(stem_channel * expand_ratio))
+        branch_channel = stem_channel // 2
+        if stem_channel == out_channel:
+            inc_channel = out_channel - branch_channel
+        else:
+            inc_channel = out_channel - stem_channel
+        self.branch1 = nn.Sequential(
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=3,
+                stride=2,
+                groups=branch_channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=inc_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay), )
+        self.expand_conv = ConvNormLayer(
+            ch_in=branch_channel,
+            ch_out=mid_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.depthwise_conv = ConvNormLayer(
+            ch_in=mid_channel,
+            ch_out=mid_channel,
+            filter_size=3,
+            stride=2,
+            groups=mid_channel,
+            norm_type=norm_type,
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.linear_conv = ConvNormLayer(
+            ch_in=mid_channel,
+            ch_out=branch_channel
+            if stem_channel == out_channel else stem_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x1, x2 = x.chunk(2, axis=1)
+        x1 = self.branch1(x1)
+        x2 = self.expand_conv(x2)
+        x2 = self.depthwise_conv(x2)
+        x2 = self.linear_conv(x2)
+        out = paddle.concat([x1, x2], axis=1)
+        out = channel_shuffle(out, groups=2)
+
+        return out
+
+
+class LiteHRNetModule(nn.Layer):
+    def __init__(self,
+                 num_branches,
+                 num_blocks,
+                 in_channels,
+                 reduce_ratio,
+                 module_type,
+                 multiscale_output=False,
+                 with_fuse=True,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(LiteHRNetModule, self).__init__()
+        assert num_branches == len(in_channels),\
+            "num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
+        assert module_type in [
+            'LITE', 'NAIVE'
+        ], "module_type should be one of ['LITE', 'NAIVE']"
+        self.num_branches = num_branches
+        self.in_channels = in_channels
+        self.multiscale_output = multiscale_output
+        self.with_fuse = with_fuse
+        self.norm_type = 'bn'
+        self.module_type = module_type
+
+        if self.module_type == 'LITE':
+            self.layers = self._make_weighting_blocks(
+                num_blocks,
+                reduce_ratio,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay)
+        elif self.module_type == 'NAIVE':
+            self.layers = self._make_naive_branches(
+                num_branches,
+                num_blocks,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay)
+
+        if self.with_fuse:
+            self.fuse_layers = self._make_fuse_layers(
+                freeze_norm=freeze_norm, norm_decay=norm_decay)
+            self.relu = nn.ReLU()
+
+    def _make_weighting_blocks(self,
+                               num_blocks,
+                               reduce_ratio,
+                               stride=1,
+                               freeze_norm=False,
+                               norm_decay=0.):
+        layers = []
+        for i in range(num_blocks):
+            layers.append(
+                ConditionalChannelWeightingBlock(
+                    self.in_channels,
+                    stride=stride,
+                    reduce_ratio=reduce_ratio,
+                    norm_type=self.norm_type,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay))
+        return nn.Sequential(*layers)
+
+    def _make_naive_branches(self,
+                             num_branches,
+                             num_blocks,
+                             freeze_norm=False,
+                             norm_decay=0.):
+        branches = []
+        for branch_idx in range(num_branches):
+            layers = []
+            for i in range(num_blocks):
+                layers.append(
+                    ShuffleUnit(
+                        self.in_channels[branch_idx],
+                        self.in_channels[branch_idx],
+                        stride=1,
+                        norm_type=self.norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+            branches.append(nn.Sequential(*layers))
+        return nn.LayerList(branches)
+
+    def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
+        if self.num_branches == 1:
+            return None
+        fuse_layers = []
+        num_out_branches = self.num_branches if self.multiscale_output else 1
+        for i in range(num_out_branches):
+            fuse_layer = []
+            for j in range(self.num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            Conv2d(
+                                self.in_channels[j],
+                                self.in_channels[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(self.in_channels[i]),
+                            nn.Upsample(
+                                scale_factor=2**(j - i), mode='nearest')))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv_downsamples = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        groups=self.in_channels[j],
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[i],
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0,
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[i])))
+                        else:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        groups=self.in_channels[j],
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0,
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    nn.ReLU()))
+
+                    fuse_layer.append(nn.Sequential(*conv_downsamples))
+            fuse_layers.append(nn.LayerList(fuse_layer))
+
+        return nn.LayerList(fuse_layers)
+
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.layers[0](x[0])]
+        if self.module_type == 'LITE':
+            out = self.layers(x)
+        elif self.module_type == 'NAIVE':
+            for i in range(self.num_branches):
+                x[i] = self.layers[i](x[i])
+            out = x
+        if self.with_fuse:
+            out_fuse = []
+            for i in range(len(self.fuse_layers)):
+                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
+                for j in range(self.num_branches):
+                    if j == 0:
+                        y += y
+                    elif i == j:
+                        y += out[j]
+                    else:
+                        y += self.fuse_layers[i][j](out[j])
+                    if i == 0:
+                        out[i] = y
+                out_fuse.append(self.relu(y))
+            out = out_fuse
+        elif not self.multiscale_output:
+            out = [out[0]]
+        return out
+
+
+class LiteHRNet(nn.Layer):
+    """
+    @inproceedings{Yulitehrnet21,
+    title={Lite-HRNet: A Lightweight High-Resolution Network},
+        author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+        booktitle={CVPR},year={2021}
+    }
+
+    Args:
+        network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
+            "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
+            "wider_naive": Naive network with wider channels in each block.
+            "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
+            "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
+        in_channels (int, optional): The channels of input image. Default: 3.
+        freeze_at (int): the stage to freeze
+        freeze_norm (bool): whether to freeze norm in HRNet
+        norm_decay (float): weight decay for normalization layer weights
+        return_idx (List): the stage to return
+    """
+
+    def __init__(self,
+                 network_type,
+                 in_channels=3,
+                 freeze_at=0,
+                 freeze_norm=True,
+                 norm_decay=0.,
+                 return_idx=[0, 1, 2, 3],
+                 use_head=False,
+                 pretrained=None):
+        super(LiteHRNet, self).__init__()
+        if isinstance(return_idx, Integral):
+            return_idx = [return_idx]
+        assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
+            "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
+        assert len(return_idx) > 0, "need one or more return index"
+        self.freeze_at = freeze_at
+        self.freeze_norm = freeze_norm
+        self.norm_decay = norm_decay
+        self.return_idx = return_idx
+        self.norm_type = 'bn'
+        self.use_head = use_head
+        self.pretrained = pretrained
+
+        self.module_configs = {
+            "lite_18": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["LITE", "LITE", "LITE"],
+                "reduce_ratios": [8, 8, 8],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+            "lite_30": {
+                "num_modules": [3, 8, 3],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["LITE", "LITE", "LITE"],
+                "reduce_ratios": [8, 8, 8],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+            "naive": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
+                "reduce_ratios": [1, 1, 1],
+                "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
+            },
+            "wider_naive": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
+                "reduce_ratios": [1, 1, 1],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+        }
+
+        self.stages_config = self.module_configs[network_type]
+
+        self.stem = Stem(in_channels, 32, 32, 1)
+        num_channels_pre_layer = [32]
+        for stage_idx in range(3):
+            num_channels = self.stages_config["num_channels"][stage_idx]
+            setattr(self, 'transition{}'.format(stage_idx),
+                    self._make_transition_layer(num_channels_pre_layer,
+                                                num_channels, self.freeze_norm,
+                                                self.norm_decay))
+            stage, num_channels_pre_layer = self._make_stage(
+                self.stages_config, stage_idx, num_channels, True,
+                self.freeze_norm, self.norm_decay)
+            setattr(self, 'stage{}'.format(stage_idx), stage)
+
+        num_channels = self.stages_config["num_channels"][-1]
+        self.feat_channels = num_channels
+
+        if self.use_head:
+            self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
+                                            self.freeze_norm, self.norm_decay)
+
+            self.feat_channels = [num_channels[0]]
+            for i in range(1, len(num_channels)):
+                self.feat_channels.append(num_channels[i] // 2)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def _make_transition_layer(self,
+                               num_channels_pre_layer,
+                               num_channels_cur_layer,
+                               freeze_norm=False,
+                               norm_decay=0.):
+        num_branches_pre = len(num_channels_pre_layer)
+        num_branches_cur = len(num_channels_cur_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_pre_layer[i],
+                                kernel_size=3,
+                                stride=1,
+                                padding=1,
+                                groups=num_channels_pre_layer[i],
+                                bias=False),
+                            nn.BatchNorm2D(num_channels_pre_layer[i]),
+                            Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]),
+                            nn.ReLU()))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv_downsamples = []
+                for j in range(i + 1 - num_branches_pre):
+                    conv_downsamples.append(
+                        nn.Sequential(
+                            Conv2d(
+                                num_channels_pre_layer[-1],
+                                num_channels_pre_layer[-1],
+                                groups=num_channels_pre_layer[-1],
+                                kernel_size=3,
+                                stride=2,
+                                padding=1,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_pre_layer[-1]),
+                            Conv2d(
+                                num_channels_pre_layer[-1],
+                                num_channels_cur_layer[i]
+                                if j == i - num_branches_pre else
+                                num_channels_pre_layer[-1],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]
+                                           if j == i - num_branches_pre else
+                                           num_channels_pre_layer[-1]),
+                            nn.ReLU()))
+                transition_layers.append(nn.Sequential(*conv_downsamples))
+        return nn.LayerList(transition_layers)
+
+    def _make_stage(self,
+                    stages_config,
+                    stage_idx,
+                    in_channels,
+                    multiscale_output,
+                    freeze_norm=False,
+                    norm_decay=0.):
+        num_modules = stages_config["num_modules"][stage_idx]
+        num_branches = stages_config["num_branches"][stage_idx]
+        num_blocks = stages_config["num_blocks"][stage_idx]
+        reduce_ratio = stages_config['reduce_ratios'][stage_idx]
+        module_type = stages_config['module_type'][stage_idx]
+
+        modules = []
+        for i in range(num_modules):
+            if not multiscale_output and i == num_modules - 1:
+                reset_multiscale_output = False
+            else:
+                reset_multiscale_output = True
+            modules.append(
+                LiteHRNetModule(
+                    num_branches,
+                    num_blocks,
+                    in_channels,
+                    reduce_ratio,
+                    module_type,
+                    multiscale_output=reset_multiscale_output,
+                    with_fuse=True,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay))
+            in_channels = modules[-1].in_channels
+        return nn.Sequential(*modules), in_channels
+
+    def forward(self, x):
+        x = self.stem(x)
+
+        y_list = [x]
+        for stage_idx in range(3):
+            x_list = []
+            transition = getattr(self, 'transition{}'.format(stage_idx))
+            for j in range(self.stages_config["num_branches"][stage_idx]):
+                if transition[j] is not None:
+                    if j >= len(y_list):
+                        x_list.append(transition[j](y_list[-1]))
+                    else:
+                        x_list.append(transition[j](y_list[j]))
+                else:
+                    x_list.append(y_list[j])
+            y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
+
+        if self.use_head:
+            y_list = self.head_layer(y_list)
+
+        res = []
+        for i, layer in enumerate(y_list):
+            if i == self.freeze_at:
+                layer.stop_gradient = True
+            if i in self.return_idx:
+                res.append(layer)
+        return res
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_18(**kwargs):
+    model = LiteHRNet(network_type="lite_18", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_30(**kwargs):
+    model = LiteHRNet(network_type="lite_30", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_naive(**kwargs):
+    model = LiteHRNet(network_type="naive", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_wider_naive(**kwargs):
+    model = LiteHRNet(network_type="wider_naive", **kwargs)
+    return model

+ 3 - 3
paddlers/models/ppseg/models/backbones/mix_transformer.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -260,7 +260,7 @@ class MixVisionTransformer(nn.Layer):
     def __init__(self,
     def __init__(self,
                  img_size=224,
                  img_size=224,
                  patch_size=16,
                  patch_size=16,
-                 in_chans=3,
+                 in_channels=3,
                  num_classes=1000,
                  num_classes=1000,
                  embed_dims=[64, 128, 256, 512],
                  embed_dims=[64, 128, 256, 512],
                  num_heads=[1, 2, 4, 8],
                  num_heads=[1, 2, 4, 8],
@@ -284,7 +284,7 @@ class MixVisionTransformer(nn.Layer):
             img_size=img_size,
             img_size=img_size,
             patch_size=7,
             patch_size=7,
             stride=4,
             stride=4,
-            in_chans=in_chans,
+            in_chans=in_channels,
             embed_dim=embed_dims[0])
             embed_dim=embed_dims[0])
         self.patch_embed2 = OverlapPatchEmbed(
         self.patch_embed2 = OverlapPatchEmbed(
             img_size=img_size // 4,
             img_size=img_size // 4,

+ 215 - 114
paddlers/models/ppseg/models/backbones/mobilenetv2.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -12,13 +12,26 @@
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
 
 
+import paddle
+from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 
 
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg import utils
 from paddlers.models.ppseg import utils
 
 
+__all__ = [
+    "MobileNetV2_x0_25",
+    "MobileNetV2_x0_5",
+    "MobileNetV2_x0_75",
+    "MobileNetV2_x1_0",
+    "MobileNetV2_x1_5",
+    "MobileNetV2_x2_0",
+]
+
 
 
-@manager.BACKBONES.add_component
 class MobileNetV2(nn.Layer):
 class MobileNetV2(nn.Layer):
     """
     """
         The MobileNetV2 implementation based on PaddlePaddle.
         The MobileNetV2 implementation based on PaddlePaddle.
@@ -29,69 +42,70 @@ class MobileNetV2(nn.Layer):
         (https://arxiv.org/abs/1801.04381).
         (https://arxiv.org/abs/1801.04381).
 
 
         Args:
         Args:
-            channel_ratio (float, optional): The ratio of channel. Default: 1.0
-            min_channel (int, optional): The minimum of channel. Default: 16
+            scale (float, optional): The scale of channel. Default: 1.0
+            in_channels (int, optional): The channels of input image. Default: 3.
             pretrained (str, optional): The path or url of pretrained model. Default: None
             pretrained (str, optional): The path or url of pretrained model. Default: None
         """
         """
 
 
-    def __init__(self, channel_ratio=1.0, min_channel=16, pretrained=None):
-        super(MobileNetV2, self).__init__()
-        self.channel_ratio = channel_ratio
-        self.min_channel = min_channel
+    def __init__(self, scale=1.0, in_channels=3, pretrained=None):
+        super().__init__()
+        self.scale = scale
         self.pretrained = pretrained
         self.pretrained = pretrained
+        prefix_name = ""
 
 
-        self.stage0 = conv_bn(3, self.depth(32), 3, 2)
-
-        self.stage1 = InvertedResidual(self.depth(32), self.depth(16), 1, 1)
-
-        self.stage2 = nn.Sequential(
-            InvertedResidual(self.depth(16), self.depth(24), 2, 6),
-            InvertedResidual(self.depth(24), self.depth(24), 1, 6), )
-
-        self.stage3 = nn.Sequential(
-            InvertedResidual(self.depth(24), self.depth(32), 2, 6),
-            InvertedResidual(self.depth(32), self.depth(32), 1, 6),
-            InvertedResidual(self.depth(32), self.depth(32), 1, 6), )
+        bottleneck_params_list = [
+            (1, 16, 1, 1),
+            (6, 24, 2, 2),  # x4
+            (6, 32, 3, 2),  # x8
+            (6, 64, 4, 2),
+            (6, 96, 3, 1),  # x16
+            (6, 160, 3, 2),
+            (6, 320, 1, 1),  # x32
+        ]
+        self.out_index = [1, 2, 4, 6]
 
 
-        self.stage4 = nn.Sequential(
-            InvertedResidual(self.depth(32), self.depth(64), 2, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6), )
+        self.conv1 = ConvBNLayer(
+            num_channels=in_channels,
+            num_filters=int(32 * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            name=prefix_name + "conv1_1")
 
 
-        self.stage5 = nn.Sequential(
-            InvertedResidual(self.depth(64), self.depth(96), 1, 6),
-            InvertedResidual(self.depth(96), self.depth(96), 1, 6),
-            InvertedResidual(self.depth(96), self.depth(96), 1, 6), )
+        self.block_list = []
+        i = 1
+        in_c = int(32 * scale)
+        for layer_setting in bottleneck_params_list:
+            t, c, n, s = layer_setting
+            i += 1
+            block = self.add_sublayer(
+                prefix_name + "conv" + str(i),
+                sublayer=InvresiBlocks(
+                    in_c=in_c,
+                    t=t,
+                    c=int(c * scale),
+                    n=n,
+                    s=s,
+                    name=prefix_name + "conv" + str(i)))
+            self.block_list.append(block)
+            in_c = int(c * scale)
 
 
-        self.stage6 = nn.Sequential(
-            InvertedResidual(self.depth(96), self.depth(160), 2, 6),
-            InvertedResidual(self.depth(160), self.depth(160), 1, 6),
-            InvertedResidual(self.depth(160), self.depth(160), 1, 6), )
-
-        self.stage7 = InvertedResidual(self.depth(160), self.depth(320), 1, 6)
+        out_channels = [
+            bottleneck_params_list[idx][1] for idx in self.out_index
+        ]
+        self.feat_channels = [int(c * scale) for c in out_channels]
 
 
         self.init_weight()
         self.init_weight()
 
 
-    def depth(self, channels):
-        min_channel = min(channels, self.min_channel)
-        return max(min_channel, int(channels * self.channel_ratio))
-
-    def forward(self, x):
+    def forward(self, inputs):
         feat_list = []
         feat_list = []
 
 
-        feature_1_2 = self.stage0(x)
-        feature_1_2 = self.stage1(feature_1_2)
-        feature_1_4 = self.stage2(feature_1_2)
-        feature_1_8 = self.stage3(feature_1_4)
-        feature_1_16 = self.stage4(feature_1_8)
-        feature_1_16 = self.stage5(feature_1_16)
-        feature_1_32 = self.stage6(feature_1_16)
-        feature_1_32 = self.stage7(feature_1_32)
-        feat_list.append(feature_1_4)
-        feat_list.append(feature_1_8)
-        feat_list.append(feature_1_16)
-        feat_list.append(feature_1_32)
+        y = self.conv1(inputs, if_act=True)
+        for idx, block in enumerate(self.block_list):
+            y = block(y)
+            if idx in self.out_index:
+                feat_list.append(y)
+
         return feat_list
         return feat_list
 
 
     def init_weight(self):
     def init_weight(self):
@@ -99,66 +113,153 @@ class MobileNetV2(nn.Layer):
             utils.load_entire_model(self, self.pretrained)
             utils.load_entire_model(self, self.pretrained)
 
 
 
 
-def conv_bn(inp, oup, kernel, stride):
-    return nn.Sequential(
-        nn.Conv2D(
-            in_channels=inp,
-            out_channels=oup,
-            kernel_size=kernel,
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 filter_size,
+                 num_filters,
+                 stride,
+                 padding,
+                 channels=None,
+                 num_groups=1,
+                 name=None,
+                 use_cudnn=True):
+        super(ConvBNLayer, self).__init__()
+
+        self._conv = Conv2D(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+
+        self._batch_norm = BatchNorm(
+            num_filters,
+            param_attr=ParamAttr(name=name + "_bn_scale"),
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
+            moving_mean_name=name + "_bn_mean",
+            moving_variance_name=name + "_bn_variance")
+
+    def forward(self, inputs, if_act=True):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        if if_act:
+            y = F.relu6(y)
+        return y
+
+
+class InvertedResidualUnit(nn.Layer):
+    def __init__(self, num_channels, num_in_filter, num_filters, stride,
+                 filter_size, padding, expansion_factor, name):
+        super(InvertedResidualUnit, self).__init__()
+        num_expfilter = int(round(num_in_filter * expansion_factor))
+        self._expand_conv = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_expfilter,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            name=name + "_expand")
+
+        self._bottleneck_conv = ConvBNLayer(
+            num_channels=num_expfilter,
+            num_filters=num_expfilter,
+            filter_size=filter_size,
             stride=stride,
             stride=stride,
-            padding=(kernel - 1) // 2,
-            bias_attr=False),
-        nn.BatchNorm2D(
-            num_features=oup, epsilon=1e-05, momentum=0.1),
-        nn.ReLU())
-
-
-class InvertedResidual(nn.Layer):
-    def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-        self.use_res_connect = self.stride == 1 and inp == oup
-
-        self.conv = nn.Sequential(
-            nn.Conv2D(
-                inp,
-                inp * expand_ratio,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                dilation=1,
-                groups=1,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
-            nn.ReLU(),
-            nn.Conv2D(
-                inp * expand_ratio,
-                inp * expand_ratio,
-                kernel_size=3,
-                stride=stride,
-                padding=dilation,
-                dilation=dilation,
-                groups=inp * expand_ratio,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
-            nn.ReLU(),
-            nn.Conv2D(
-                inp * expand_ratio,
-                oup,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                dilation=1,
-                groups=1,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=oup, epsilon=1e-05, momentum=0.1), )
-
-    def forward(self, x):
-        if self.use_res_connect:
-            return x + self.conv(x)
-        else:
-            return self.conv(x)
+            padding=padding,
+            num_groups=num_expfilter,
+            use_cudnn=False,
+            name=name + "_dwise")
+
+        self._linear_conv = ConvBNLayer(
+            num_channels=num_expfilter,
+            num_filters=num_filters,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            name=name + "_linear")
+
+    def forward(self, inputs, ifshortcut):
+        y = self._expand_conv(inputs, if_act=True)
+        y = self._bottleneck_conv(y, if_act=True)
+        y = self._linear_conv(y, if_act=False)
+        if ifshortcut:
+            y = paddle.add(inputs, y)
+        return y
+
+
+class InvresiBlocks(nn.Layer):
+    def __init__(self, in_c, t, c, n, s, name):
+        super(InvresiBlocks, self).__init__()
+
+        self._first_block = InvertedResidualUnit(
+            num_channels=in_c,
+            num_in_filter=in_c,
+            num_filters=c,
+            stride=s,
+            filter_size=3,
+            padding=1,
+            expansion_factor=t,
+            name=name + "_1")
+
+        self._block_list = []
+        for i in range(1, n):
+            block = self.add_sublayer(
+                name + "_" + str(i + 1),
+                sublayer=InvertedResidualUnit(
+                    num_channels=c,
+                    num_in_filter=c,
+                    num_filters=c,
+                    stride=1,
+                    filter_size=3,
+                    padding=1,
+                    expansion_factor=t,
+                    name=name + "_" + str(i + 1)))
+            self._block_list.append(block)
+
+    def forward(self, inputs):
+        y = self._first_block(inputs, ifshortcut=False)
+        for block in self._block_list:
+            y = block(y, ifshortcut=True)
+        return y
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_25(**kwargs):
+    model = MobileNetV2(scale=0.25, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_5(**kwargs):
+    model = MobileNetV2(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_75(**kwargs):
+    model = MobileNetV2(scale=0.75, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x1_0(**kwargs):
+    model = MobileNetV2(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x1_5(**kwargs):
+    model = MobileNetV2(scale=1.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x2_0(**kwargs):
+    model = MobileNetV2(scale=2.0, **kwargs)
+    return model

+ 315 - 181
paddlers/models/ppseg/models/backbones/mobilenetv3.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -14,10 +14,12 @@
 
 
 import paddle
 import paddle
 import paddle.nn as nn
 import paddle.nn as nn
-import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
 
 
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
+from paddlers.models.ppseg.utils import utils, logger
 from paddlers.models.ppseg.models import layers
 from paddlers.models.ppseg.models import layers
 
 
 __all__ = [
 __all__ = [
@@ -28,8 +30,92 @@ __all__ = [
     "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
     "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
 ]
 ]
 
 
-
-def make_divisible(v, divisor=8, min_value=None):
+MODEL_STAGES_PATTERN = {
+    "MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
+    "MobileNetV3_large":
+    ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
+}
+
+# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
+# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
+# k: kernel_size
+# exp: middle channel number in depthwise block
+# c: output channel number in depthwise block
+# se: whether to use SE block
+# act: which activation to use
+# s: stride in depthwise block
+# d: dilation rate in depthwise block
+NET_CONFIG = {
+    "large": [
+        # k, exp, c, se, act, s
+        [3, 16, 16, False, "relu", 1],
+        [3, 64, 24, False, "relu", 2],
+        [3, 72, 24, False, "relu", 1],  # x4
+        [5, 72, 40, True, "relu", 2],
+        [5, 120, 40, True, "relu", 1],
+        [5, 120, 40, True, "relu", 1],  # x8
+        [3, 240, 80, False, "hardswish", 2],
+        [3, 200, 80, False, "hardswish", 1],
+        [3, 184, 80, False, "hardswish", 1],
+        [3, 184, 80, False, "hardswish", 1],
+        [3, 480, 112, True, "hardswish", 1],
+        [3, 672, 112, True, "hardswish", 1],  # x16
+        [5, 672, 160, True, "hardswish", 2],
+        [5, 960, 160, True, "hardswish", 1],
+        [5, 960, 160, True, "hardswish", 1],  # x32
+    ],
+    "small": [
+        # k, exp, c, se, act, s
+        [3, 16, 16, True, "relu", 2],
+        [3, 72, 24, False, "relu", 2],
+        [3, 88, 24, False, "relu", 1],
+        [5, 96, 40, True, "hardswish", 2],
+        [5, 240, 40, True, "hardswish", 1],
+        [5, 240, 40, True, "hardswish", 1],
+        [5, 120, 48, True, "hardswish", 1],
+        [5, 144, 48, True, "hardswish", 1],
+        [5, 288, 96, True, "hardswish", 2],
+        [5, 576, 96, True, "hardswish", 1],
+        [5, 576, 96, True, "hardswish", 1],
+    ],
+    "large_os8": [
+        # k, exp, c, se, act, s, {d}
+        [3, 16, 16, False, "relu", 1],
+        [3, 64, 24, False, "relu", 2],
+        [3, 72, 24, False, "relu", 1],  # x4
+        [5, 72, 40, True, "relu", 2],
+        [5, 120, 40, True, "relu", 1],
+        [5, 120, 40, True, "relu", 1],  # x8
+        [3, 240, 80, False, "hardswish", 1],
+        [3, 200, 80, False, "hardswish", 1, 2],
+        [3, 184, 80, False, "hardswish", 1, 2],
+        [3, 184, 80, False, "hardswish", 1, 2],
+        [3, 480, 112, True, "hardswish", 1, 2],
+        [3, 672, 112, True, "hardswish", 1, 2],
+        [5, 672, 160, True, "hardswish", 1, 2],
+        [5, 960, 160, True, "hardswish", 1, 4],
+        [5, 960, 160, True, "hardswish", 1, 4],
+    ],
+    "small_os8": [
+        # k, exp, c, se, act, s, {d}
+        [3, 16, 16, True, "relu", 2],
+        [3, 72, 24, False, "relu", 2],
+        [3, 88, 24, False, "relu", 1],
+        [5, 96, 40, True, "hardswish", 1],
+        [5, 240, 40, True, "hardswish", 1, 2],
+        [5, 240, 40, True, "hardswish", 1, 2],
+        [5, 120, 48, True, "hardswish", 1, 2],
+        [5, 144, 48, True, "hardswish", 1, 2],
+        [5, 288, 96, True, "hardswish", 1, 2],
+        [5, 576, 96, True, "hardswish", 1, 4],
+        [5, 576, 96, True, "hardswish", 1, 4],
+    ]
+}
+
+OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]}
+
+
+def _make_divisible(v, divisor=8, min_value=None):
     if min_value is None:
     if min_value is None:
         min_value = divisor
         min_value = divisor
     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@@ -38,156 +124,113 @@ def make_divisible(v, divisor=8, min_value=None):
     return new_v
     return new_v
 
 
 
 
-class MobileNetV3(nn.Layer):
-    """
-    The MobileNetV3 implementation based on PaddlePaddle.
+def _create_act(act):
+    if act == "hardswish":
+        return nn.Hardswish()
+    elif act == "relu":
+        return nn.ReLU()
+    elif act is None:
+        return None
+    else:
+        raise RuntimeError(
+            "The activation function is not supported: {}".format(act))
 
 
-    The original article refers to Jingdong
-    Andrew Howard, et, al. "Searching for MobileNetV3"
-    (https://arxiv.org/pdf/1905.02244.pdf).
 
 
+class MobileNetV3(nn.Layer):
+    """
+    MobileNetV3
     Args:
     Args:
-        pretrained (str, optional): The path of pretrained model.
-        scale (float, optional): The scale of channels . Default: 1.0.
-        model_name (str, optional): Model name. It determines the type of MobileNetV3. The value is 'small' or 'large'. Defualt: 'small'.
-        output_stride (int, optional): The stride of output features compared to input images. The value should be one of (2, 4, 8, 16, 32). Default: None.
-
+        config: list. MobileNetV3 depthwise blocks config.
+        in_channels (int, optional): The channels of input image. Default: 3.
+        scale: float=1.0. The coefficient that controls the size of network parameters. 
+    Returns:
+        model: nn.Layer. Specific MobileNetV3 model depends on args.
     """
     """
 
 
     def __init__(self,
     def __init__(self,
-                 pretrained=None,
+                 config,
+                 stages_pattern,
+                 out_index,
+                 in_channels=3,
                  scale=1.0,
                  scale=1.0,
-                 model_name="small",
-                 output_stride=None):
-        super(MobileNetV3, self).__init__()
+                 pretrained=None):
+        super().__init__()
 
 
+        self.cfg = config
+        self.out_index = out_index
+        self.scale = scale
+        self.pretrained = pretrained
         inplanes = 16
         inplanes = 16
-        if model_name == "large":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, "relu", 1],
-                [3, 64, 24, False, "relu", 2],
-                [3, 72, 24, False, "relu", 1],  # output 1 -> out_index=2
-                [5, 72, 40, True, "relu", 2],
-                [5, 120, 40, True, "relu", 1],
-                [5, 120, 40, True, "relu", 1],  # output 2 -> out_index=5
-                [3, 240, 80, False, "hard_swish", 2],
-                [3, 200, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 480, 112, True, "hard_swish", 1],
-                [3, 672, 112, True, "hard_swish",
-                 1],  # output 3 -> out_index=11
-                [5, 672, 160, True, "hard_swish", 2],
-                [5, 960, 160, True, "hard_swish", 1],
-                [5, 960, 160, True, "hard_swish",
-                 1],  # output 3 -> out_index=14
-            ]
-            self.out_indices = [2, 5, 11, 14]
-            self.feat_channels = [
-                make_divisible(i * scale) for i in [24, 40, 112, 160]
-            ]
-
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
-        elif model_name == "small":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, "relu", 2],  # output 1 -> out_index=0
-                [3, 72, 24, False, "relu", 2],
-                [3, 88, 24, False, "relu", 1],  # output 2 -> out_index=3
-                [5, 96, 40, True, "hard_swish", 2],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 120, 48, True, "hard_swish", 1],
-                [5, 144, 48, True, "hard_swish", 1],  # output 3 -> out_index=7
-                [5, 288, 96, True, "hard_swish", 2],
-                [5, 576, 96, True, "hard_swish", 1],
-                [5, 576, 96, True, "hard_swish", 1],  # output 4 -> out_index=10
-            ]
-            self.out_indices = [0, 3, 7, 10]
-            self.feat_channels = [
-                make_divisible(i * scale) for i in [16, 24, 48, 96]
-            ]
-
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
-        else:
-            raise NotImplementedError(
-                "mode[{}_model] is not implemented!".format(model_name))
-
-        ###################################################
-        # modify stride and dilation based on output_stride
-        self.dilation_cfg = [1] * len(self.cfg)
-        self.modify_bottle_params(output_stride=output_stride)
-        ###################################################
-
-        self.conv1 = ConvBNLayer(
-            in_c=3,
-            out_c=make_divisible(inplanes * scale),
+
+        self.conv = ConvBNLayer(
+            in_c=in_channels,
+            out_c=_make_divisible(inplanes * self.scale),
             filter_size=3,
             filter_size=3,
             stride=2,
             stride=2,
             padding=1,
             padding=1,
             num_groups=1,
             num_groups=1,
             if_act=True,
             if_act=True,
-            act="hard_swish")
-
-        self.block_list = []
-
-        inplanes = make_divisible(inplanes * scale)
-        for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
-            ######################################
-            # add dilation rate
-            dilation_rate = self.dilation_cfg[i]
-            ######################################
-            self.block_list.append(
-                ResidualUnit(
-                    in_c=inplanes,
-                    mid_c=make_divisible(scale * exp),
-                    out_c=make_divisible(scale * c),
-                    filter_size=k,
-                    stride=s,
-                    dilation=dilation_rate,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2)))
-            self.add_sublayer(
-                sublayer=self.block_list[-1], name="conv" + str(i + 2))
-            inplanes = make_divisible(scale * c)
-
-        self.pretrained = pretrained
+            act="hardswish")
+        self.blocks = nn.Sequential(*[
+            ResidualUnit(
+                in_c=_make_divisible(inplanes * self.scale if i == 0 else
+                                     self.cfg[i - 1][2] * self.scale),
+                mid_c=_make_divisible(self.scale * exp),
+                out_c=_make_divisible(self.scale * c),
+                filter_size=k,
+                stride=s,
+                use_se=se,
+                act=act,
+                dilation=td[0] if td else 1)
+            for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg)
+        ])
+
+        out_channels = [config[idx][2] for idx in self.out_index]
+        self.feat_channels = [
+            _make_divisible(self.scale * c) for c in out_channels
+        ]
+
+        self.init_res(stages_pattern)
         self.init_weight()
         self.init_weight()
 
 
-    def modify_bottle_params(self, output_stride=None):
-
-        if output_stride is not None and output_stride % 2 != 0:
-            raise ValueError("output stride must to be even number")
-        if output_stride is not None:
-            stride = 2
-            rate = 1
-            for i, _cfg in enumerate(self.cfg):
-                stride = stride * _cfg[-1]
-                if stride > output_stride:
-                    rate = rate * _cfg[-1]
-                    self.cfg[i][-1] = 1
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def init_res(self, stages_pattern, return_patterns=None,
+                 return_stages=None):
+        if return_patterns and return_stages:
+            msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
+            logger.warning(msg)
+            return_stages = None
+
+        if return_stages is True:
+            return_patterns = stages_pattern
+        # return_stages is int or bool
+        if type(return_stages) is int:
+            return_stages = [return_stages]
+        if isinstance(return_stages, list):
+            if max(return_stages) > len(stages_pattern) or min(
+                    return_stages) < 0:
+                msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
+                logger.warning(msg)
+                return_stages = [
+                    val for val in return_stages
+                    if val >= 0 and val < len(stages_pattern)
+                ]
+            return_patterns = [stages_pattern[i] for i in return_stages]
 
 
-                self.dilation_cfg[i] = rate
+    def forward(self, x):
+        x = self.conv(x)
 
 
-    def forward(self, inputs, label=None):
-        x = self.conv1(inputs)
-        # A feature list saves each downsampling feature.
         feat_list = []
         feat_list = []
-        for i, block in enumerate(self.block_list):
+        for idx, block in enumerate(self.blocks):
             x = block(x)
             x = block(x)
-            if i in self.out_indices:
+            if idx in self.out_index:
                 feat_list.append(x)
                 feat_list.append(x)
 
 
         return feat_list
         return feat_list
 
 
-    def init_weight(self):
-        if self.pretrained is not None:
-            utils.load_pretrained_model(self, self.pretrained)
-
 
 
 class ConvBNLayer(nn.Layer):
 class ConvBNLayer(nn.Layer):
     def __init__(self,
     def __init__(self,
@@ -196,36 +239,34 @@ class ConvBNLayer(nn.Layer):
                  filter_size,
                  filter_size,
                  stride,
                  stride,
                  padding,
                  padding,
-                 dilation=1,
                  num_groups=1,
                  num_groups=1,
                  if_act=True,
                  if_act=True,
-                 act=None):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.act = act
+                 act=None,
+                 dilation=1):
+        super().__init__()
 
 
-        self.conv = nn.Conv2D(
+        self.conv = Conv2D(
             in_channels=in_c,
             in_channels=in_c,
             out_channels=out_c,
             out_channels=out_c,
             kernel_size=filter_size,
             kernel_size=filter_size,
             stride=stride,
             stride=stride,
             padding=padding,
             padding=padding,
-            dilation=dilation,
             groups=num_groups,
             groups=num_groups,
-            bias_attr=False)
-        self.bn = layers.SyncBatchNorm(
-            num_features=out_c,
-            weight_attr=paddle.ParamAttr(
-                regularizer=paddle.regularizer.L2Decay(0.0)),
-            bias_attr=paddle.ParamAttr(
-                regularizer=paddle.regularizer.L2Decay(0.0)))
-        self._act_op = layers.Activation(act='hardswish')
+            bias_attr=False,
+            dilation=dilation)
+        self.bn = BatchNorm(
+            num_channels=out_c,
+            act=None,
+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+        self.if_act = if_act
+        self.act = _create_act(act)
 
 
     def forward(self, x):
     def forward(self, x):
         x = self.conv(x)
         x = self.conv(x)
         x = self.bn(x)
         x = self.bn(x)
         if self.if_act:
         if self.if_act:
-            x = self._act_op(x)
+            x = self.act(x)
         return x
         return x
 
 
 
 
@@ -237,10 +278,9 @@ class ResidualUnit(nn.Layer):
                  filter_size,
                  filter_size,
                  stride,
                  stride,
                  use_se,
                  use_se,
-                 dilation=1,
                  act=None,
                  act=None,
-                 name=''):
-        super(ResidualUnit, self).__init__()
+                 dilation=1):
+        super().__init__()
         self.if_shortcut = stride == 1 and in_c == out_c
         self.if_shortcut = stride == 1 and in_c == out_c
         self.if_se = use_se
         self.if_se = use_se
 
 
@@ -252,19 +292,18 @@ class ResidualUnit(nn.Layer):
             padding=0,
             padding=0,
             if_act=True,
             if_act=True,
             act=act)
             act=act)
-
         self.bottleneck_conv = ConvBNLayer(
         self.bottleneck_conv = ConvBNLayer(
             in_c=mid_c,
             in_c=mid_c,
             out_c=mid_c,
             out_c=mid_c,
             filter_size=filter_size,
             filter_size=filter_size,
             stride=stride,
             stride=stride,
-            padding='same',
-            dilation=dilation,
+            padding=int((filter_size - 1) // 2) * dilation,
             num_groups=mid_c,
             num_groups=mid_c,
             if_act=True,
             if_act=True,
-            act=act)
+            act=act,
+            dilation=dilation)
         if self.if_se:
         if self.if_se:
-            self.mid_se = SEModule(mid_c, name=name + "_se")
+            self.mid_se = SEModule(mid_c)
         self.linear_conv = ConvBNLayer(
         self.linear_conv = ConvBNLayer(
             in_c=mid_c,
             in_c=mid_c,
             out_c=out_c,
             out_c=out_c,
@@ -273,92 +312,187 @@ class ResidualUnit(nn.Layer):
             padding=0,
             padding=0,
             if_act=False,
             if_act=False,
             act=None)
             act=None)
-        self.dilation = dilation
 
 
-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
+    def forward(self, x):
+        identity = x
+        x = self.expand_conv(x)
         x = self.bottleneck_conv(x)
         x = self.bottleneck_conv(x)
         if self.if_se:
         if self.if_se:
             x = self.mid_se(x)
             x = self.mid_se(x)
         x = self.linear_conv(x)
         x = self.linear_conv(x)
         if self.if_shortcut:
         if self.if_shortcut:
-            x = inputs + x
+            x = paddle.add(identity, x)
         return x
         return x
 
 
 
 
+# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
+class Hardsigmoid(nn.Layer):
+    def __init__(self, slope=0.2, offset=0.5):
+        super().__init__()
+        self.slope = slope
+        self.offset = offset
+
+    def forward(self, x):
+        return nn.functional.hardsigmoid(
+            x, slope=self.slope, offset=self.offset)
+
+
 class SEModule(nn.Layer):
 class SEModule(nn.Layer):
-    def __init__(self, channel, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
-        self.conv1 = nn.Conv2D(
+    def __init__(self, channel, reduction=4):
+        super().__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
             in_channels=channel,
             in_channels=channel,
             out_channels=channel // reduction,
             out_channels=channel // reduction,
             kernel_size=1,
             kernel_size=1,
             stride=1,
             stride=1,
             padding=0)
             padding=0)
-        self.conv2 = nn.Conv2D(
+        self.relu = nn.ReLU()
+        self.conv2 = Conv2D(
             in_channels=channel // reduction,
             in_channels=channel // reduction,
             out_channels=channel,
             out_channels=channel,
             kernel_size=1,
             kernel_size=1,
             stride=1,
             stride=1,
             padding=0)
             padding=0)
+        self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
 
 
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = F.relu(outputs)
-        outputs = self.conv2(outputs)
-        outputs = F.hardsigmoid(outputs)
-        return paddle.multiply(x=inputs, y=outputs)
+    def forward(self, x):
+        identity = x
+        x = self.avg_pool(x)
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.hardsigmoid(x)
+        return paddle.multiply(x=identity, y=x)
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_35(**kwargs):
 def MobileNetV3_small_x0_35(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.35,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_5(**kwargs):
 def MobileNetV3_small_x0_5(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.5,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_75(**kwargs):
 def MobileNetV3_small_x0_75(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.75,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
 @manager.BACKBONES.add_component
 @manager.BACKBONES.add_component
 def MobileNetV3_small_x1_0(**kwargs):
 def MobileNetV3_small_x1_0(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_small_x1_25(**kwargs):
 def MobileNetV3_small_x1_25(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=1.25,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_35(**kwargs):
 def MobileNetV3_large_x0_35(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.35,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_5(**kwargs):
 def MobileNetV3_large_x0_5(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.5,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_75(**kwargs):
 def MobileNetV3_large_x0_75(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.75,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
     return model
     return model
 
 
 
 
 @manager.BACKBONES.add_component
 @manager.BACKBONES.add_component
 def MobileNetV3_large_x1_0(**kwargs):
 def MobileNetV3_large_x1_0(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_large_x1_25(**kwargs):
 def MobileNetV3_large_x1_25(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=1.25,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV3_large_x1_0_os8(**kwargs):
+    model = MobileNetV3(
+        config=NET_CONFIG["large_os8"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV3_small_x1_0_os8(**kwargs):
+    model = MobileNetV3(
+        config=NET_CONFIG["small_os8"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model

+ 4 - 3
paddlers/models/ppseg/models/backbones/resnet_vd.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -206,15 +206,16 @@ class ResNet_vd(nn.Layer):
         layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
         layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
         output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
         output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
         multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
         multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path of pretrained model.
         pretrained (str, optional): The path of pretrained model.
 
 
     """
     """
 
 
     def __init__(self,
     def __init__(self,
-                 input_channel=3,
                  layers=50,
                  layers=50,
                  output_stride=8,
                  output_stride=8,
                  multi_grid=(1, 1, 1),
                  multi_grid=(1, 1, 1),
+                 in_channels=3,
                  pretrained=None,
                  pretrained=None,
                  data_format='NCHW'):
                  data_format='NCHW'):
         super(ResNet_vd, self).__init__()
         super(ResNet_vd, self).__init__()
@@ -252,7 +253,7 @@ class ResNet_vd(nn.Layer):
             dilation_dict = {3: 2}
             dilation_dict = {3: 2}
 
 
         self.conv1_1 = ConvBNLayer(
         self.conv1_1 = ConvBNLayer(
-            in_channels=input_channel,
+            in_channels=in_channels,
             out_channels=32,
             out_channels=32,
             kernel_size=3,
             kernel_size=3,
             stride=2,
             stride=2,

+ 315 - 0
paddlers/models/ppseg/models/backbones/shufflenetv2.py

@@ -0,0 +1,315 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import ParamAttr, reshape, transpose, concat, split
+from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
+from paddle.nn.initializer import KaimingNormal
+from paddle.nn.functional import swish
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.utils import utils, logger
+
+__all__ = [
+    'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5',
+    'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0',
+    'ShuffleNetV2_swish'
+]
+
+
+def channel_shuffle(x, groups):
+    x_shape = paddle.shape(x)
+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
+    num_channels = x.shape[1]
+    channels_per_group = num_channels // groups
+
+    # reshape
+    x = reshape(
+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
+
+    # transpose
+    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
+
+    # flatten
+    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
+
+    return x
+
+
+class ConvBNLayer(Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            groups=1,
+            act=None,
+            name=None, ):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=KaimingNormal(), name=name + "_weights"),
+            bias_attr=False)
+
+        self._batch_norm = BatchNorm(
+            out_channels,
+            param_attr=ParamAttr(name=name + "_bn_scale"),
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
+            act=act,
+            moving_mean_name=name + "_bn_mean",
+            moving_variance_name=name + "_bn_variance")
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class InvertedResidual(Layer):
+    def __init__(self, in_channels, out_channels, stride, act="relu",
+                 name=None):
+        super(InvertedResidual, self).__init__()
+        self._conv_pw = ConvBNLayer(
+            in_channels=in_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv1')
+        self._conv_dw = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=out_channels // 2,
+            act=None,
+            name='stage_' + name + '_conv2')
+        self._conv_linear = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv3')
+
+    def forward(self, inputs):
+        x1, x2 = split(
+            inputs,
+            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
+            axis=1)
+        x2 = self._conv_pw(x2)
+        x2 = self._conv_dw(x2)
+        x2 = self._conv_linear(x2)
+        out = concat([x1, x2], axis=1)
+        return channel_shuffle(out, 2)
+
+
+class InvertedResidualDS(Layer):
+    def __init__(self, in_channels, out_channels, stride, act="relu",
+                 name=None):
+        super(InvertedResidualDS, self).__init__()
+
+        # branch1
+        self._conv_dw_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=in_channels,
+            act=None,
+            name='stage_' + name + '_conv4')
+        self._conv_linear_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv5')
+        # branch2
+        self._conv_pw_2 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv1')
+        self._conv_dw_2 = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=out_channels // 2,
+            act=None,
+            name='stage_' + name + '_conv2')
+        self._conv_linear_2 = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv3')
+
+    def forward(self, inputs):
+        x1 = self._conv_dw_1(inputs)
+        x1 = self._conv_linear_1(x1)
+        x2 = self._conv_pw_2(inputs)
+        x2 = self._conv_dw_2(x2)
+        x2 = self._conv_linear_2(x2)
+        out = concat([x1, x2], axis=1)
+
+        return channel_shuffle(out, 2)
+
+
+class ShuffleNet(Layer):
+    def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None):
+        super(ShuffleNet, self).__init__()
+        self.scale = scale
+        self.pretrained = pretrained
+        stage_repeats = [4, 8, 4]
+
+        if scale == 0.25:
+            stage_out_channels = [-1, 24, 24, 48, 96, 512]
+        elif scale == 0.33:
+            stage_out_channels = [-1, 24, 32, 64, 128, 512]
+        elif scale == 0.5:
+            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
+        elif scale == 1.0:
+            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
+        elif scale == 1.5:
+            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
+        elif scale == 2.0:
+            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
+        else:
+            raise NotImplementedError("This scale size:[" + str(scale) +
+                                      "] is not implemented!")
+
+        self.out_index = [3, 11, 15]
+        self.feat_channels = stage_out_channels[1:5]
+
+        # 1. conv1
+        self._conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=stage_out_channels[1],
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            act=act,
+            name='stage1_conv')
+        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
+
+        # 2. bottleneck sequences
+        self._block_list = []
+        for stage_id, num_repeat in enumerate(stage_repeats):
+            for i in range(num_repeat):
+                if i == 0:
+                    block = self.add_sublayer(
+                        name=str(stage_id + 2) + '_' + str(i + 1),
+                        sublayer=InvertedResidualDS(
+                            in_channels=stage_out_channels[stage_id + 1],
+                            out_channels=stage_out_channels[stage_id + 2],
+                            stride=2,
+                            act=act,
+                            name=str(stage_id + 2) + '_' + str(i + 1)))
+                else:
+                    block = self.add_sublayer(
+                        name=str(stage_id + 2) + '_' + str(i + 1),
+                        sublayer=InvertedResidual(
+                            in_channels=stage_out_channels[stage_id + 2],
+                            out_channels=stage_out_channels[stage_id + 2],
+                            stride=1,
+                            act=act,
+                            name=str(stage_id + 2) + '_' + str(i + 1)))
+                self._block_list.append(block)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, inputs):
+        feat_list = []
+
+        y = self._conv1(inputs)
+        y = self._max_pool(y)
+        feat_list.append(y)
+
+        for idx, inv in enumerate(self._block_list):
+            y = inv(y)
+            if idx in self.out_index:
+                feat_list.append(y)
+        return feat_list
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_25(**kwargs):
+    model = ShuffleNet(scale=0.25, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_33(**kwargs):
+    model = ShuffleNet(scale=0.33, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_5(**kwargs):
+    model = ShuffleNet(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x1_0(**kwargs):
+    model = ShuffleNet(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x1_5(**kwargs):
+    model = ShuffleNet(scale=1.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x2_0(**kwargs):
+    model = ShuffleNet(scale=2.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_swish(**kwargs):
+    model = ShuffleNet(scale=1.0, act="swish", **kwargs)
+    return model

+ 117 - 63
paddlers/models/ppseg/models/backbones/stdcnet.py

@@ -37,9 +37,9 @@ class STDCNet(nn.Layer):
         layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
         layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
         block_num(int,optional): block_num of features block. Default: 4.
         block_num(int,optional): block_num of features block. Default: 4.
         type(str,optional): feature fusion method "cat"/"add". Default: "cat".
         type(str,optional): feature fusion method "cat"/"add". Default: "cat".
-        num_classes(int, optional): class number for image classification. Default: 1000.
-        dropout(float,optional): dropout ratio. if >0,use dropout ratio.  Default: 0.20.
-        use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False.
+        relative_lr(float,optional): parameters here receive a different learning rate when updating. The effective 
+            learning rate is the prodcut of relative_lr and the global learning rate. Default: 1.0. 
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained(str, optional): the path of pretrained model.
         pretrained(str, optional): the path of pretrained model.
     """
     """
 
 
@@ -48,34 +48,18 @@ class STDCNet(nn.Layer):
                  layers=[4, 5, 3],
                  layers=[4, 5, 3],
                  block_num=4,
                  block_num=4,
                  type="cat",
                  type="cat",
-                 num_classes=1000,
-                 dropout=0.20,
-                 use_conv_last=False,
+                 relative_lr=1.0,
+                 in_channels=3,
                  pretrained=None):
                  pretrained=None):
         super(STDCNet, self).__init__()
         super(STDCNet, self).__init__()
         if type == "cat":
         if type == "cat":
             block = CatBottleneck
             block = CatBottleneck
         elif type == "add":
         elif type == "add":
             block = AddBottleneck
             block = AddBottleneck
-        self.use_conv_last = use_conv_last
-        self.features = self._make_layers(base, layers, block_num, block)
-        self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1)
-
-        if (layers == [4, 5, 3]):  #stdc1446
-            self.x2 = nn.Sequential(self.features[:1])
-            self.x4 = nn.Sequential(self.features[1:2])
-            self.x8 = nn.Sequential(self.features[2:6])
-            self.x16 = nn.Sequential(self.features[6:11])
-            self.x32 = nn.Sequential(self.features[11:])
-        elif (layers == [2, 2, 2]):  #stdc813
-            self.x2 = nn.Sequential(self.features[:1])
-            self.x4 = nn.Sequential(self.features[1:2])
-            self.x8 = nn.Sequential(self.features[2:4])
-            self.x16 = nn.Sequential(self.features[4:6])
-            self.x32 = nn.Sequential(self.features[6:])
-        else:
-            raise NotImplementedError(
-                "model with layers:{} is not implemented!".format(layers))
+        self.layers = layers
+        self.feat_channels = [base // 2, base, base * 4, base * 8, base * 16]
+        self.features = self._make_layers(in_channels, base, layers, block_num,
+                                          block, relative_lr)
 
 
         self.pretrained = pretrained
         self.pretrained = pretrained
         self.init_weight()
         self.init_weight()
@@ -84,32 +68,42 @@ class STDCNet(nn.Layer):
         """
         """
         forward function for feature extract.
         forward function for feature extract.
         """
         """
-        feat2 = self.x2(x)
-        feat4 = self.x4(feat2)
-        feat8 = self.x8(feat4)
-        feat16 = self.x16(feat8)
-        feat32 = self.x32(feat16)
-        if self.use_conv_last:
-            feat32 = self.conv_last(feat32)
-        return feat2, feat4, feat8, feat16, feat32
-
-    def _make_layers(self, base, layers, block_num, block):
+        out_feats = []
+
+        x = self.features[0](x)
+        out_feats.append(x)
+        x = self.features[1](x)
+        out_feats.append(x)
+
+        idx = [[2, 2 + self.layers[0]],
+               [2 + self.layers[0], 2 + sum(self.layers[0:2])],
+               [2 + sum(self.layers[0:2]), 2 + sum(self.layers)]]
+        for start_idx, end_idx in idx:
+            for i in range(start_idx, end_idx):
+                x = self.features[i](x)
+            out_feats.append(x)
+
+        return out_feats
+
+    def _make_layers(self, in_channels, base, layers, block_num, block,
+                     relative_lr):
         features = []
         features = []
-        features += [ConvBNRelu(3, base // 2, 3, 2)]
-        features += [ConvBNRelu(base // 2, base, 3, 2)]
+        features += [ConvBNRelu(in_channels, base // 2, 3, 2, relative_lr)]
+        features += [ConvBNRelu(base // 2, base, 3, 2, relative_lr)]
 
 
         for i, layer in enumerate(layers):
         for i, layer in enumerate(layers):
             for j in range(layer):
             for j in range(layer):
                 if i == 0 and j == 0:
                 if i == 0 and j == 0:
-                    features.append(block(base, base * 4, block_num, 2))
+                    features.append(
+                        block(base, base * 4, block_num, 2, relative_lr))
                 elif j == 0:
                 elif j == 0:
                     features.append(
                     features.append(
                         block(base * int(math.pow(2, i + 1)), base * int(
                         block(base * int(math.pow(2, i + 1)), base * int(
-                            math.pow(2, i + 2)), block_num, 2))
+                            math.pow(2, i + 2)), block_num, 2, relative_lr))
                 else:
                 else:
                     features.append(
                     features.append(
                         block(base * int(math.pow(2, i + 2)), base * int(
                         block(base * int(math.pow(2, i + 2)), base * int(
-                            math.pow(2, i + 2)), block_num, 1))
+                            math.pow(2, i + 2)), block_num, 1, relative_lr))
 
 
         return nn.Sequential(*features)
         return nn.Sequential(*features)
 
 
@@ -125,16 +119,24 @@ class STDCNet(nn.Layer):
 
 
 
 
 class ConvBNRelu(nn.Layer):
 class ConvBNRelu(nn.Layer):
-    def __init__(self, in_planes, out_planes, kernel=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 kernel=3,
+                 stride=1,
+                 relative_lr=1.0):
         super(ConvBNRelu, self).__init__()
         super(ConvBNRelu, self).__init__()
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
         self.conv = nn.Conv2D(
         self.conv = nn.Conv2D(
             in_planes,
             in_planes,
             out_planes,
             out_planes,
             kernel_size=kernel,
             kernel_size=kernel,
             stride=stride,
             stride=stride,
             padding=kernel // 2,
             padding=kernel // 2,
+            weight_attr=param_attr,
             bias_attr=False)
             bias_attr=False)
-        self.bn = SyncBatchNorm(out_planes, data_format='NCHW')
+        self.bn = nn.BatchNorm2D(
+            out_planes, weight_attr=param_attr, bias_attr=param_attr)
         self.relu = nn.ReLU()
         self.relu = nn.ReLU()
 
 
     def forward(self, x):
     def forward(self, x):
@@ -143,11 +145,17 @@ class ConvBNRelu(nn.Layer):
 
 
 
 
 class AddBottleneck(nn.Layer):
 class AddBottleneck(nn.Layer):
-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 block_num=3,
+                 stride=1,
+                 relative_lr=1.0):
         super(AddBottleneck, self).__init__()
         super(AddBottleneck, self).__init__()
         assert block_num > 1, "block number should be larger than 1."
         assert block_num > 1, "block number should be larger than 1."
         self.conv_list = nn.LayerList()
         self.conv_list = nn.LayerList()
         self.stride = stride
         self.stride = stride
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
         if stride == 2:
         if stride == 2:
             self.avd_layer = nn.Sequential(
             self.avd_layer = nn.Sequential(
                 nn.Conv2D(
                 nn.Conv2D(
@@ -157,8 +165,12 @@ class AddBottleneck(nn.Layer):
                     stride=2,
                     stride=2,
                     padding=1,
                     padding=1,
                     groups=out_planes // 2,
                     groups=out_planes // 2,
+                    weight_attr=param_attr,
                     bias_attr=False),
                     bias_attr=False),
-                nn.BatchNorm2D(out_planes // 2), )
+                nn.BatchNorm2D(
+                    out_planes // 2,
+                    weight_attr=param_attr,
+                    bias_attr=param_attr), )
             self.skip = nn.Sequential(
             self.skip = nn.Sequential(
                 nn.Conv2D(
                 nn.Conv2D(
                     in_planes,
                     in_planes,
@@ -167,34 +179,53 @@ class AddBottleneck(nn.Layer):
                     stride=2,
                     stride=2,
                     padding=1,
                     padding=1,
                     groups=in_planes,
                     groups=in_planes,
+                    weight_attr=param_attr,
                     bias_attr=False),
                     bias_attr=False),
-                nn.BatchNorm2D(in_planes),
+                nn.BatchNorm2D(
+                    in_planes, weight_attr=param_attr, bias_attr=param_attr),
                 nn.Conv2D(
                 nn.Conv2D(
-                    in_planes, out_planes, kernel_size=1, bias_attr=False),
-                nn.BatchNorm2D(out_planes), )
+                    in_planes,
+                    out_planes,
+                    kernel_size=1,
+                    bias_attr=False,
+                    weight_attr=param_attr),
+                nn.BatchNorm2D(
+                    out_planes, weight_attr=param_attr, bias_attr=param_attr), )
             stride = 1
             stride = 1
 
 
         for idx in range(block_num):
         for idx in range(block_num):
             if idx == 0:
             if idx == 0:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        in_planes, out_planes // 2, kernel=1))
+                        in_planes,
+                        out_planes // 2,
+                        kernel=1,
+                        relative_lr=relative_lr))
             elif idx == 1 and block_num == 2:
             elif idx == 1 and block_num == 2:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        out_planes // 2, out_planes // 2, stride=stride))
+                        out_planes // 2,
+                        out_planes // 2,
+                        stride=stride,
+                        relative_lr=relative_lr))
             elif idx == 1 and block_num > 2:
             elif idx == 1 and block_num > 2:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        out_planes // 2, out_planes // 4, stride=stride))
+                        out_planes // 2,
+                        out_planes // 4,
+                        stride=stride,
+                        relative_lr=relative_lr))
             elif idx < block_num - 1:
             elif idx < block_num - 1:
                 self.conv_list.append(
                 self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx + 1))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx + 1)),
+                        relative_lr=relative_lr))
             else:
             else:
                 self.conv_list.append(
                 self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx))))
+                    ConvBNRelu(out_planes // int(math.pow(2, idx)),
+                               out_planes // int(math.pow(2, idx))),
+                    relative_lr=relative_lr)
 
 
     def forward(self, x):
     def forward(self, x):
         out_list = []
         out_list = []
@@ -211,11 +242,17 @@ class AddBottleneck(nn.Layer):
 
 
 
 
 class CatBottleneck(nn.Layer):
 class CatBottleneck(nn.Layer):
-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 block_num=3,
+                 stride=1,
+                 relative_lr=1.0):
         super(CatBottleneck, self).__init__()
         super(CatBottleneck, self).__init__()
         assert block_num > 1, "block number should be larger than 1."
         assert block_num > 1, "block number should be larger than 1."
         self.conv_list = nn.LayerList()
         self.conv_list = nn.LayerList()
         self.stride = stride
         self.stride = stride
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
         if stride == 2:
         if stride == 2:
             self.avd_layer = nn.Sequential(
             self.avd_layer = nn.Sequential(
                 nn.Conv2D(
                 nn.Conv2D(
@@ -225,8 +262,12 @@ class CatBottleneck(nn.Layer):
                     stride=2,
                     stride=2,
                     padding=1,
                     padding=1,
                     groups=out_planes // 2,
                     groups=out_planes // 2,
+                    weight_attr=param_attr,
                     bias_attr=False),
                     bias_attr=False),
-                nn.BatchNorm2D(out_planes // 2), )
+                nn.BatchNorm2D(
+                    out_planes // 2,
+                    weight_attr=param_attr,
+                    bias_attr=param_attr), )
             self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
             self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
             stride = 1
             stride = 1
 
 
@@ -234,23 +275,36 @@ class CatBottleneck(nn.Layer):
             if idx == 0:
             if idx == 0:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        in_planes, out_planes // 2, kernel=1))
+                        in_planes,
+                        out_planes // 2,
+                        kernel=1,
+                        relative_lr=relative_lr))
             elif idx == 1 and block_num == 2:
             elif idx == 1 and block_num == 2:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        out_planes // 2, out_planes // 2, stride=stride))
+                        out_planes // 2,
+                        out_planes // 2,
+                        stride=stride,
+                        relative_lr=relative_lr))
             elif idx == 1 and block_num > 2:
             elif idx == 1 and block_num > 2:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        out_planes // 2, out_planes // 4, stride=stride))
+                        out_planes // 2,
+                        out_planes // 4,
+                        stride=stride,
+                        relative_lr=relative_lr))
             elif idx < block_num - 1:
             elif idx < block_num - 1:
                 self.conv_list.append(
                 self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx + 1))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx + 1)),
+                        relative_lr=relative_lr))
             else:
             else:
                 self.conv_list.append(
                 self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx)),
+                        relative_lr=relative_lr))
 
 
     def forward(self, x):
     def forward(self, x):
         out_list = []
         out_list = []

+ 4 - 4
paddlers/models/ppseg/models/backbones/swin_transformer.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -531,7 +531,7 @@ class SwinTransformer(nn.Layer):
     Args:
     Args:
         pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
         pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
         patch_size (int | tuple(int)): Patch size. Default: 4.
         patch_size (int | tuple(int)): Patch size. Default: 4.
-        in_chans (int): Number of input image channels. Default: 3.
+        in_channels (int): Number of input image channels. Default: 3.
         embed_dim (int): Number of linear projection output channels. Default: 96.
         embed_dim (int): Number of linear projection output channels. Default: 96.
         depths (tuple[int]): Depths of each Swin Transformer stage.
         depths (tuple[int]): Depths of each Swin Transformer stage.
         num_heads (tuple[int]): Number of attention head of each stage.
         num_heads (tuple[int]): Number of attention head of each stage.
@@ -553,7 +553,7 @@ class SwinTransformer(nn.Layer):
     def __init__(self,
     def __init__(self,
                  pretrain_img_size=224,
                  pretrain_img_size=224,
                  patch_size=4,
                  patch_size=4,
-                 in_chans=3,
+                 in_channels=3,
                  embed_dim=96,
                  embed_dim=96,
                  depths=[2, 2, 6, 2],
                  depths=[2, 2, 6, 2],
                  num_heads=[3, 6, 12, 24],
                  num_heads=[3, 6, 12, 24],
@@ -583,7 +583,7 @@ class SwinTransformer(nn.Layer):
         # split image into non-overlapping patches
         # split image into non-overlapping patches
         self.patch_embed = PatchEmbed(
         self.patch_embed = PatchEmbed(
             patch_size=patch_size,
             patch_size=patch_size,
-            in_chans=in_chans,
+            in_chans=in_channels,
             embed_dim=embed_dim,
             embed_dim=embed_dim,
             norm_layer=norm_layer if self.patch_norm else None)
             norm_layer=norm_layer if self.patch_norm else None)
 
 

+ 716 - 0
paddlers/models/ppseg/models/backbones/top_transformer.py

@@ -0,0 +1,716 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT
+"""
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.models.backbones.transformer_utils import Identity, DropPath
+
+__all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"]
+
+
+def make_divisible(val, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(val + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * val:
+        new_v += divisor
+    return new_v
+
+
+class HSigmoid(nn.Layer):
+    def __init__(self, inplace=True):
+        super().__init__()
+        self.relu = nn.ReLU6()
+
+    def forward(self, x):
+        return self.relu(x + 3) / 6
+
+
+class Conv2DBN(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 ks=1,
+                 stride=1,
+                 pad=0,
+                 dilation=1,
+                 groups=1,
+                 bn_weight_init=1,
+                 lr_mult=1.0):
+        super().__init__()
+        conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.c = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=ks,
+            stride=stride,
+            padding=pad,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=conv_weight_attr,
+            bias_attr=False)
+        bn_weight_attr = paddle.ParamAttr(
+            initializer=nn.initializer.Constant(bn_weight_init),
+            learning_rate=lr_mult)
+        bn_bias_attr = paddle.ParamAttr(
+            initializer=nn.initializer.Constant(0), learning_rate=lr_mult)
+        self.bn = nn.BatchNorm2D(
+            out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr)
+
+    def forward(self, inputs):
+        out = self.c(inputs)
+        out = self.bn(out)
+        return out
+
+
+class ConvBNAct(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=1,
+                 stride=1,
+                 padding=0,
+                 groups=1,
+                 norm=nn.BatchNorm2D,
+                 act=None,
+                 bias_attr=False,
+                 lr_mult=1.0):
+        super(ConvBNAct, self).__init__()
+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=param_attr,
+            bias_attr=param_attr if bias_attr else False)
+        self.act = act() if act is not None else Identity()
+        self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \
+            if norm is not None else Identity()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.act(x)
+        return x
+
+
+class MLP(nn.Layer):
+    def __init__(self,
+                 in_features,
+                 hidden_features=None,
+                 out_features=None,
+                 act_layer=nn.ReLU,
+                 drop=0.,
+                 lr_mult=1.0):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult)
+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.dwconv = nn.Conv2D(
+            hidden_features,
+            hidden_features,
+            3,
+            1,
+            1,
+            groups=hidden_features,
+            weight_attr=param_attr,
+            bias_attr=param_attr)
+        self.act = act_layer()
+        self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.dwconv(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class InvertedResidual(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 expand_ratio,
+                 activations=None,
+                 lr_mult=1.0):
+        super(InvertedResidual, self).__init__()
+        assert stride in [1, 2], "The stride should be 1 or 2."
+
+        if activations is None:
+            activations = nn.ReLU
+
+        hidden_dim = int(round(in_channels * expand_ratio))
+        self.use_res_connect = stride == 1 and in_channels == out_channels
+
+        layers = []
+        if expand_ratio != 1:
+            layers.append(
+                Conv2DBN(
+                    in_channels, hidden_dim, ks=1, lr_mult=lr_mult))
+            layers.append(activations())
+        layers.extend([
+            Conv2DBN(
+                hidden_dim,
+                hidden_dim,
+                ks=kernel_size,
+                stride=stride,
+                pad=kernel_size // 2,
+                groups=hidden_dim,
+                lr_mult=lr_mult), activations(), Conv2DBN(
+                    hidden_dim, out_channels, ks=1, lr_mult=lr_mult)
+        ])
+        self.conv = nn.Sequential(*layers)
+        self.out_channels = out_channels
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class TokenPyramidModule(nn.Layer):
+    def __init__(self,
+                 cfgs,
+                 out_indices,
+                 in_channels=3,
+                 inp_channel=16,
+                 activation=nn.ReLU,
+                 width_mult=1.,
+                 lr_mult=1.):
+        super().__init__()
+        self.out_indices = out_indices
+
+        self.stem = nn.Sequential(
+            Conv2DBN(
+                in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult),
+            activation())
+
+        self.layers = []
+        for i, (k, t, c, s) in enumerate(cfgs):
+            output_channel = make_divisible(c * width_mult, 8)
+            exp_size = t * inp_channel
+            exp_size = make_divisible(exp_size * width_mult, 8)
+            layer_name = 'layer{}'.format(i + 1)
+            layer = InvertedResidual(
+                inp_channel,
+                output_channel,
+                kernel_size=k,
+                stride=s,
+                expand_ratio=t,
+                activations=activation,
+                lr_mult=lr_mult)
+            self.add_sublayer(layer_name, layer)
+            self.layers.append(layer_name)
+            inp_channel = output_channel
+
+    def forward(self, x):
+        outs = []
+        x = self.stem(x)
+        for i, layer_name in enumerate(self.layers):
+            layer = getattr(self, layer_name)
+            x = layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        return outs
+
+
+class Attention(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads,
+                 attn_ratio=4,
+                 activation=None,
+                 lr_mult=1.0):
+        super().__init__()
+        self.num_heads = num_heads
+        self.scale = key_dim**-0.5
+        self.key_dim = key_dim
+        self.nh_kd = nh_kd = key_dim * num_heads
+        self.d = int(attn_ratio * key_dim)
+        self.dh = int(attn_ratio * key_dim) * num_heads
+        self.attn_ratio = attn_ratio
+
+        self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
+        self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
+        self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult)
+
+        self.proj = nn.Sequential(
+            activation(),
+            Conv2DBN(
+                self.dh, dim, bn_weight_init=0, lr_mult=lr_mult))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+        H, W = x_shape[2], x_shape[3]
+
+        qq = self.to_q(x).reshape(
+            [0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2])
+        kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1])
+        vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose(
+            [0, 1, 3, 2])
+
+        attn = paddle.matmul(qq, kk)
+        attn = F.softmax(attn, axis=-1)
+
+        xx = paddle.matmul(attn, vv)
+
+        xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W])
+        xx = self.proj(xx)
+        return xx
+
+
+class Block(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads,
+                 mlp_ratios=4.,
+                 attn_ratio=2.,
+                 drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.ReLU,
+                 lr_mult=1.0):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.mlp_ratios = mlp_ratios
+
+        self.attn = Attention(
+            dim,
+            key_dim=key_dim,
+            num_heads=num_heads,
+            attn_ratio=attn_ratio,
+            activation=act_layer,
+            lr_mult=lr_mult)
+
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+        mlp_hidden_dim = int(dim * mlp_ratios)
+        self.mlp = MLP(in_features=dim,
+                       hidden_features=mlp_hidden_dim,
+                       act_layer=act_layer,
+                       drop=drop,
+                       lr_mult=lr_mult)
+
+    def forward(self, x):
+        h = x
+        x = self.attn(x)
+        x = self.drop_path(x)
+        x = h + x
+
+        h = x
+        x = self.mlp(x)
+        x = self.drop_path(x)
+        x = x + h
+        return x
+
+
+class BasicLayer(nn.Layer):
+    def __init__(self,
+                 block_num,
+                 embedding_dim,
+                 key_dim,
+                 num_heads,
+                 mlp_ratios=4.,
+                 attn_ratio=2.,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=None,
+                 lr_mult=1.0):
+        super().__init__()
+        self.block_num = block_num
+
+        self.transformer_blocks = nn.LayerList()
+        for i in range(self.block_num):
+            self.transformer_blocks.append(
+                Block(
+                    embedding_dim,
+                    key_dim=key_dim,
+                    num_heads=num_heads,
+                    mlp_ratios=mlp_ratios,
+                    attn_ratio=attn_ratio,
+                    drop=drop,
+                    drop_path=drop_path[i]
+                    if isinstance(drop_path, list) else drop_path,
+                    act_layer=act_layer,
+                    lr_mult=lr_mult))
+
+    def forward(self, x):
+        # token * N 
+        for i in range(self.block_num):
+            x = self.transformer_blocks[i](x)
+        return x
+
+
+class PyramidPoolAgg(nn.Layer):
+    def __init__(self, stride):
+        super().__init__()
+        self.stride = stride
+        self.tmp = Identity()  # avoid the error of paddle.flops
+
+    def forward(self, inputs):
+        '''
+        # The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
+        # so exporting the inference model will raise error.
+        _, _, H, W = inputs[-1].shape
+        H = (H - 1) // self.stride + 1
+        W = (W - 1) // self.stride + 1
+        return paddle.concat(
+            [F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
+        '''
+        out = []
+        ks = 2**len(inputs)
+        stride = self.stride**len(inputs)
+        for x in inputs:
+            x = F.avg_pool2d(x, int(ks), int(stride))
+            ks /= 2
+            stride /= 2
+            out.append(x)
+        out = paddle.concat(out, axis=1)
+        return out
+
+
+class InjectionMultiSum(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None,
+                 lr_mult=1.0):
+        super(InjectionMultiSum, self).__init__()
+
+        self.local_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.global_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.global_act = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_global):
+        xl_hw = paddle.shape(x_low)[2:]
+        local_feat = self.local_embedding(x_low)
+
+        global_act = self.global_act(x_global)
+        sig_act = F.interpolate(
+            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
+
+        global_feat = self.global_embedding(x_global)
+        global_feat = F.interpolate(
+            global_feat, xl_hw, mode='bilinear', align_corners=False)
+
+        out = local_feat * sig_act + global_feat
+        return out
+
+
+class InjectionMultiSumCBR(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None):
+        '''
+        local_embedding: conv-bn-relu
+        global_embedding: conv-bn-relu
+        global_act: conv
+        '''
+        super(InjectionMultiSumCBR, self).__init__()
+
+        self.local_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1)
+        self.global_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1)
+        self.global_act = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, norm=None, act=None)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_global):
+        xl_hw = paddle.shape(x)[2:]
+        local_feat = self.local_embedding(x_low)
+        # kernel
+        global_act = self.global_act(x_global)
+        global_act = F.interpolate(
+            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
+        # feat_h
+        global_feat = self.global_embedding(x_global)
+        global_feat = F.interpolate(
+            global_feat, xl_hw, mode='bilinear', align_corners=False)
+        out = local_feat * global_act + global_feat
+        return out
+
+
+class FuseBlockSum(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None):
+        super(FuseBlockSum, self).__init__()
+
+        self.fuse1 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.fuse2 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+
+    def forward(self, x_low, x_high):
+        xl_hw = paddle.shape(x)[2:]
+        inp = self.fuse1(x_low)
+        kernel = self.fuse2(x_high)
+        feat_h = F.interpolate(
+            kernel, xl_hw, mode='bilinear', align_corners=False)
+        out = inp + feat_h
+        return out
+
+
+class FuseBlockMulti(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            stride=1,
+            activations=None, ):
+        super(FuseBlockMulti, self).__init__()
+        assert stride in [1, 2], "The stride should be 1 or 2."
+
+        self.fuse1 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.fuse2 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_high):
+        xl_hw = paddle.shape(x)[2:]
+        inp = self.fuse1(x_low)
+        sig_act = self.fuse2(x_high)
+        sig_act = F.interpolate(
+            self.act(sig_act), xl_hw, mode='bilinear', align_corners=False)
+        out = inp * sig_act
+        return out
+
+
+SIM_BLOCK = {
+    "fuse_sum": FuseBlockSum,
+    "fuse_multi": FuseBlockMulti,
+    "multi_sum": InjectionMultiSum,
+    "multi_sum_cbr": InjectionMultiSumCBR,
+}
+
+
+class TopTransformer(nn.Layer):
+    def __init__(self,
+                 cfgs,
+                 injection_out_channels,
+                 encoder_out_indices,
+                 trans_out_indices=[1, 2, 3],
+                 depths=4,
+                 key_dim=16,
+                 num_heads=8,
+                 attn_ratios=2,
+                 mlp_ratios=2,
+                 c2t_stride=2,
+                 drop_path_rate=0.,
+                 act_layer=nn.ReLU6,
+                 injection_type="muli_sum",
+                 injection=True,
+                 lr_mult=1.0,
+                 in_channels=3,
+                 pretrained=None):
+        super().__init__()
+        self.feat_channels = [
+            c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices
+        ]
+        self.injection_out_channels = injection_out_channels
+        self.injection = injection
+        self.embed_dim = sum(self.feat_channels)
+        self.trans_out_indices = trans_out_indices
+
+        self.tpm = TokenPyramidModule(
+            cfgs=cfgs,
+            out_indices=encoder_out_indices,
+            in_channels=in_channels,
+            lr_mult=lr_mult)
+        self.ppa = PyramidPoolAgg(stride=c2t_stride)
+
+        dpr = [x.item() for x in \
+               paddle.linspace(0, drop_path_rate, depths)]
+        self.trans = BasicLayer(
+            block_num=depths,
+            embedding_dim=self.embed_dim,
+            key_dim=key_dim,
+            num_heads=num_heads,
+            mlp_ratios=mlp_ratios,
+            attn_ratio=attn_ratios,
+            drop=0,
+            attn_drop=0,
+            drop_path=dpr,
+            act_layer=act_layer,
+            lr_mult=lr_mult)
+
+        self.SIM = nn.LayerList()
+        inj_module = SIM_BLOCK[injection_type]
+        if self.injection:
+            for i in range(len(self.feat_channels)):
+                if i in trans_out_indices:
+                    self.SIM.append(
+                        inj_module(
+                            self.feat_channels[i],
+                            injection_out_channels[i],
+                            activations=act_layer,
+                            lr_mult=lr_mult))
+                else:
+                    self.SIM.append(Identity())
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, x):
+        ouputs = self.tpm(x)
+        out = self.ppa(ouputs)
+        out = self.trans(out)
+
+        if self.injection:
+            xx = out.split(self.feat_channels, axis=1)
+            results = []
+            for i in range(len(self.feat_channels)):
+                if i in self.trans_out_indices:
+                    local_tokens = ouputs[i]
+                    global_semantics = xx[i]
+                    out_ = self.SIM[i](local_tokens, global_semantics)
+                    results.append(out_)
+            return results
+        else:
+            ouputs.append(out)
+            return ouputs
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Base(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2        
+        [3, 4, 32, 2],  # 1/4 1      
+        [3, 3, 32, 1],  #            
+        [5, 3, 64, 2],  # 1/8 3      
+        [5, 3, 64, 1],  #            
+        [3, 3, 128, 2],  # 1/16 5     
+        [3, 3, 128, 1],  #            
+        [5, 6, 160, 2],  # 1/32 7     
+        [5, 6, 160, 1],  #            
+        [3, 6, 160, 1],  #            
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 256, 256, 256],
+        encoder_out_indices=[2, 4, 6, 9],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=8,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Small(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2        
+        [3, 4, 24, 2],  # 1/4 1      
+        [3, 3, 24, 1],  #            
+        [5, 3, 48, 2],  # 1/8 3      
+        [5, 3, 48, 1],  #            
+        [3, 3, 96, 2],  # 1/16 5     
+        [3, 3, 96, 1],  #            
+        [5, 6, 128, 2],  # 1/32 7     
+        [5, 6, 128, 1],  #            
+        [3, 6, 128, 1],  #           
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 192, 192, 192],
+        encoder_out_indices=[2, 4, 6, 9],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=6,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Tiny(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2       
+        [3, 4, 16, 2],  # 1/4 1      
+        [3, 3, 16, 1],  #            
+        [5, 3, 32, 2],  # 1/8 3      
+        [5, 3, 32, 1],  #            
+        [3, 3, 64, 2],  # 1/16 5     
+        [3, 3, 64, 1],  #            
+        [5, 6, 96, 2],  # 1/32 7     
+        [5, 6, 96, 1],  #               
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 128, 128, 128],
+        encoder_out_indices=[2, 4, 6, 8],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=4,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model

+ 2 - 2
paddlers/models/ppseg/models/backbones/transformer_utils.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ def drop_path(x, drop_prob=0., training=False):
         return x
         return x
     keep_prob = paddle.to_tensor(1 - drop_prob)
     keep_prob = paddle.to_tensor(1 - drop_prob)
     shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
     shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+    random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype)
     random_tensor = paddle.floor(random_tensor)  # binarize
     random_tensor = paddle.floor(random_tensor)  # binarize
     output = x.divide(keep_prob) * random_tensor
     output = x.divide(keep_prob) * random_tensor
     return output
     return output

+ 3 - 3
paddlers/models/ppseg/models/backbones/vision_transformer.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -154,7 +154,7 @@ class VisionTransformer(nn.Layer):
     def __init__(self,
     def __init__(self,
                  img_size=224,
                  img_size=224,
                  patch_size=16,
                  patch_size=16,
-                 in_chans=3,
+                 in_channels=3,
                  embed_dim=768,
                  embed_dim=768,
                  depth=12,
                  depth=12,
                  num_heads=12,
                  num_heads=12,
@@ -176,7 +176,7 @@ class VisionTransformer(nn.Layer):
         self.patch_embed = PatchEmbed(
         self.patch_embed = PatchEmbed(
             img_size=img_size,
             img_size=img_size,
             patch_size=patch_size,
             patch_size=patch_size,
-            in_chans=in_chans,
+            in_chans=in_channels,
             embed_dim=embed_dim)
             embed_dim=embed_dim)
         self.pos_w = self.patch_embed.num_patches_in_w
         self.pos_w = self.patch_embed.num_patches_in_w
         self.pos_h = self.patch_embed.num_patches_in_h
         self.pos_h = self.patch_embed.num_patches_in_h

+ 8 - 3
paddlers/models/ppseg/models/backbones/xception_deeplab.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -255,12 +255,17 @@ class XceptionDeeplab(nn.Layer):
 
 
      Args:
      Args:
          backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
          backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
+         in_channels (int, optional): The channels of input image. Default: 3.
          pretrained (str, optional): The path of pretrained model.
          pretrained (str, optional): The path of pretrained model.
          output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.
          output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.
 
 
     """
     """
 
 
-    def __init__(self, backbone, pretrained=None, output_stride=16):
+    def __init__(self,
+                 backbone,
+                 in_channels=3,
+                 pretrained=None,
+                 output_stride=16):
 
 
         super(XceptionDeeplab, self).__init__()
         super(XceptionDeeplab, self).__init__()
 
 
@@ -269,7 +274,7 @@ class XceptionDeeplab(nn.Layer):
         self.feat_channels = [128, 2048]
         self.feat_channels = [128, 2048]
 
 
         self._conv1 = ConvBNLayer(
         self._conv1 = ConvBNLayer(
-            3,
+            in_channels,
             32,
             32,
             3,
             3,
             stride=2,
             stride=2,

+ 10 - 8
paddlers/models/ppseg/models/bisenet.py

@@ -35,6 +35,7 @@ class BiSeNetV2(nn.Layer):
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
         lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
         lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
     """
     """
 
 
@@ -42,6 +43,7 @@ class BiSeNetV2(nn.Layer):
                  num_classes,
                  num_classes,
                  lambd=0.25,
                  lambd=0.25,
                  align_corners=False,
                  align_corners=False,
+                 in_channels=3,
                  pretrained=None):
                  pretrained=None):
         super().__init__()
         super().__init__()
 
 
@@ -51,8 +53,8 @@ class BiSeNetV2(nn.Layer):
         sb_channels = (C1, C3, C4, C5)
         sb_channels = (C1, C3, C4, C5)
         mid_channels = 128
         mid_channels = 128
 
 
-        self.db = DetailBranch(db_channels)
-        self.sb = SemanticBranch(sb_channels)
+        self.db = DetailBranch(in_channels, db_channels)
+        self.sb = SemanticBranch(in_channels, sb_channels)
 
 
         self.bga = BGA(mid_channels, align_corners)
         self.bga = BGA(mid_channels, align_corners)
         self.aux_head1 = SegHead(C1, C1, num_classes)
         self.aux_head1 = SegHead(C1, C1, num_classes)
@@ -189,15 +191,15 @@ class GatherAndExpansionLayer2(nn.Layer):
 class DetailBranch(nn.Layer):
 class DetailBranch(nn.Layer):
     """The detail branch of BiSeNet, which has wide channels but shallow layers."""
     """The detail branch of BiSeNet, which has wide channels but shallow layers."""
 
 
-    def __init__(self, in_channels):
+    def __init__(self, in_channels, feature_channels):
         super().__init__()
         super().__init__()
 
 
-        C1, C2, C3 = in_channels
+        C1, C2, C3 = feature_channels
 
 
         self.convs = nn.Sequential(
         self.convs = nn.Sequential(
             # stage 1
             # stage 1
             layers.ConvBNReLU(
             layers.ConvBNReLU(
-                3, C1, 3, stride=2),
+                in_channels, C1, 3, stride=2),
             layers.ConvBNReLU(C1, C1, 3),
             layers.ConvBNReLU(C1, C1, 3),
             # stage 2
             # stage 2
             layers.ConvBNReLU(
             layers.ConvBNReLU(
@@ -217,11 +219,11 @@ class DetailBranch(nn.Layer):
 class SemanticBranch(nn.Layer):
 class SemanticBranch(nn.Layer):
     """The semantic branch of BiSeNet, which has narrow channels but deep layers."""
     """The semantic branch of BiSeNet, which has narrow channels but deep layers."""
 
 
-    def __init__(self, in_channels):
+    def __init__(self, in_channels, feature_channels):
         super().__init__()
         super().__init__()
-        C1, C3, C4, C5 = in_channels
+        C1, C3, C4, C5 = feature_channels
 
 
-        self.stem = StemBlock(3, C1)
+        self.stem = StemBlock(in_channels, C1)
 
 
         self.stage3 = nn.Sequential(
         self.stage3 = nn.Sequential(
             GatherAndExpansionLayer2(C1, C3, 6),
             GatherAndExpansionLayer2(C1, C3, 6),

+ 174 - 0
paddlers/models/ppseg/models/ccnet.py

@@ -0,0 +1,174 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.utils import utils
+
+
+@manager.MODELS.add_component
+class CCNet(nn.Layer):
+    """
+    The CCNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation"
+    (https://arxiv.org/abs/1811.11721)
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd.
+        backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3).
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
+        dropout_prob (float, optional): The probability of dropout. Default: 0.0.
+        recurrence (int, optional): The number of recurrent operations. Defautl: 1.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=(2, 3),
+                 enable_auxiliary_loss=True,
+                 dropout_prob=0.0,
+                 recurrence=1,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.recurrence = recurrence
+        self.align_corners = align_corners
+
+        self.backbone = backbone
+        self.backbone_indices = backbone_indices
+        backbone_channels = [
+            backbone.feat_channels[i] for i in backbone_indices
+        ]
+
+        if enable_auxiliary_loss:
+            self.aux_head = layers.AuxLayer(
+                backbone_channels[0],
+                512,
+                num_classes,
+                dropout_prob=dropout_prob)
+        self.head = RCCAModule(
+            backbone_channels[1],
+            512,
+            num_classes,
+            dropout_prob=dropout_prob,
+            recurrence=recurrence)
+        self.pretrained = pretrained
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, x):
+        feat_list = self.backbone(x)
+        logit_list = []
+        output = self.head(feat_list[self.backbone_indices[-1]])
+        logit_list.append(output)
+        if self.training and self.enable_auxiliary_loss:
+            aux_out = self.aux_head(feat_list[self.backbone_indices[-2]])
+            logit_list.append(aux_out)
+        return [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+
+
+class RCCAModule(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 num_classes,
+                 dropout_prob=0.1,
+                 recurrence=1):
+        super().__init__()
+        inter_channels = in_channels // 4
+        self.recurrence = recurrence
+        self.conva = layers.ConvBNLeakyReLU(
+            in_channels, inter_channels, 3, padding=1, bias_attr=False)
+        self.cca = CrissCrossAttention(inter_channels)
+        self.convb = layers.ConvBNLeakyReLU(
+            inter_channels, inter_channels, 3, padding=1, bias_attr=False)
+        self.out = layers.AuxLayer(
+            in_channels + inter_channels,
+            out_channels,
+            num_classes,
+            dropout_prob=dropout_prob)
+
+    def forward(self, x):
+        feat = self.conva(x)
+        for i in range(self.recurrence):
+            feat = self.cca(feat)
+        feat = self.convb(feat)
+        output = self.out(paddle.concat([x, feat], axis=1))
+        return output
+
+
+class CrissCrossAttention(nn.Layer):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
+        self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
+        self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1)
+        self.softmax = nn.Softmax(axis=3)
+        self.gamma = self.create_parameter(
+            shape=(1, ), default_initializer=nn.initializer.Constant(0))
+        self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf'))
+
+    def forward(self, x):
+        b, c, h, w = paddle.shape(x)
+        proj_q = self.q_conv(x)
+        proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape(
+            [b * w, -1, h]).transpose([0, 2, 1])
+        proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape(
+            [b * h, -1, w]).transpose([0, 2, 1])
+
+        proj_k = self.k_conv(x)
+        proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
+        proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
+
+        proj_v = self.v_conv(x)
+        proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
+        proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
+
+        energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape(
+            [b, w, h, h]).transpose([0, 2, 1, 3])
+        energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w])
+        concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3))
+
+        attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape(
+            [b * w, h, h])
+        attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w])
+        out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape(
+            [b, w, -1, h]).transpose([0, 2, 3, 1])
+        out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape(
+            [b, h, -1, w]).transpose([0, 2, 1, 3])
+        return self.gamma * (out_h + out_w) + x
+
+    def Inf(self, B, H, W):
+        return -paddle.tile(
+            paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0),
+            [B * W, 1, 1])

+ 403 - 0
paddlers/models/ppseg/models/ddrnet.py

@@ -0,0 +1,403 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager, param_init
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.utils import utils
+
+
+class DualResNet(nn.Layer):
+    """
+    The DDRNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes"
+    (https://arxiv.org/abs/2101.06085)
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        in_channels (int, optional): Number of input channels. Default: 3.
+        block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2].
+        planes (int): Base channels in network. Default: 64.
+        spp_planes (int): Branch channels for DAPPM. Default: 128.
+        head_planes (int): Mid channels of segmentation head. Default: 128.
+        enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 in_channels=3,
+                 block_layers=[2, 2, 2, 2],
+                 planes=64,
+                 spp_planes=128,
+                 head_planes=128,
+                 enable_auxiliary_loss=False,
+                 pretrained=None):
+        super().__init__()
+        highres_planes = planes * 2
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.conv1 = nn.Sequential(
+            layers.ConvBNReLU(
+                in_channels, planes, kernel_size=3, stride=2, padding=1),
+            layers.ConvBNReLU(
+                planes, planes, kernel_size=3, stride=2, padding=1), )
+        self.relu = nn.ReLU()
+        self.layer1 = self._make_layers(BasicBlock, planes, planes,
+                                        block_layers[0])
+        self.layer2 = self._make_layers(
+            BasicBlock, planes, planes * 2, block_layers[1], stride=2)
+        self.layer3 = self._make_layers(
+            BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2)
+        self.layer4 = self._make_layers(
+            BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2)
+
+        self.compression3 = layers.ConvBN(
+            planes * 4, highres_planes, kernel_size=1, bias_attr=False)
+
+        self.compression4 = layers.ConvBN(
+            planes * 8, highres_planes, kernel_size=1, bias_attr=False)
+
+        self.down3 = layers.ConvBN(
+            highres_planes,
+            planes * 4,
+            kernel_size=3,
+            stride=2,
+            bias_attr=False)
+
+        self.down4 = nn.Sequential(
+            layers.ConvBNReLU(
+                highres_planes,
+                planes * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias_attr=False),
+            layers.ConvBN(
+                planes * 4,
+                planes * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias_attr=False))
+
+        self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes,
+                                         2)
+        self.layer4_ = self._make_layers(BasicBlock, highres_planes,
+                                         highres_planes, 2)
+        self.layer5_ = self._make_layers(Bottleneck, highres_planes,
+                                         highres_planes, 1)
+        self.layer5 = self._make_layers(
+            Bottleneck, planes * 8, planes * 8, 1, stride=2)
+
+        self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
+        if self.enable_auxiliary_loss:
+            self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes)
+        self.head = DDRNetHead(planes * 4, head_planes, num_classes)
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+        else:
+            for m in self.sublayers():
+                if isinstance(m, nn.Conv2D):
+                    param_init.kaiming_normal_init(m.weight)
+                elif isinstance(m, nn.BatchNorm2D):
+                    param_init.constant_init(m.weight, value=1)
+                    param_init.constant_init(m.bias, value=0)
+
+    def _make_layers(self, block, inplanes, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2D(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias_attr=False),
+                nn.BatchNorm2D(planes * block.expansion), )
+        layers = []
+        layers.append(block(inplanes, planes, stride, downsample))
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            if i == (blocks - 1):
+                layers.append(block(inplanes, planes, stride=1, no_relu=True))
+            else:
+                layers.append(block(inplanes, planes, stride=1, no_relu=False))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        n, c, h, w = paddle.shape(x)
+        width_output = w // 8
+        height_output = h // 8
+
+        x = self.conv1(x)
+        stage1_out = self.layer1(x)
+        stage2_out = self.layer2(self.relu(stage1_out))
+        stage3_out = self.layer3(self.relu(stage2_out))
+        stage3_out_dual = self.layer3_(self.relu(stage2_out))
+        x = stage3_out + self.down3(self.relu(stage3_out_dual))
+        stage3_merge = stage3_out_dual + F.interpolate(
+            self.compression3(self.relu(stage3_out)),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        stage4_out = self.layer4(self.relu(x))
+        stage4_out_dual = self.layer4_(self.relu(stage3_merge))
+
+        x = stage4_out + self.down4(self.relu(stage4_out_dual))
+        stage4_merge = stage4_out_dual + F.interpolate(
+            self.compression4(self.relu(stage4_out)),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        stage5_out_dual = self.layer5_(self.relu(stage4_merge))
+        x = F.interpolate(
+            self.spp(self.layer5(self.relu(x))),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        output = self.head(x + stage5_out_dual)
+        logit_list = []
+        logit_list.append(output)
+
+        if self.enable_auxiliary_loss:
+            aux_out = self.aux_head(stage3_merge)
+            logit_list.append(aux_out)
+        return [
+            F.interpolate(
+                logit, [h, w], mode='bilinear') for logit in logit_list
+        ]
+
+
+class BasicBlock(nn.Layer):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 no_relu=False):
+        super().__init__()
+        self.conv_bn_relu = layers.ConvBNReLU(
+            inplanes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias_attr=False)
+        self.relu = nn.ReLU()
+        self.conv_bn = layers.ConvBN(
+            planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
+        self.downsample = downsample
+        self.stride = stride
+        self.no_relu = no_relu
+
+    def forward(self, x):
+        residual = x
+        out = self.conv_bn_relu(x)
+        out = self.conv_bn(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        if self.no_relu:
+            return out
+        else:
+            return self.relu(out)
+
+
+class Bottleneck(nn.Layer):
+    expansion = 2
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 no_relu=True):
+        super().__init__()
+        self.conv_bn_relu1 = layers.ConvBNReLU(
+            inplanes, planes, kernel_size=1, bias_attr=False)
+        self.conv_bn_relu2 = layers.ConvBNReLU(
+            planes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias_attr=False)
+        self.conv_bn = layers.ConvBN(
+            planes, planes * self.expansion, kernel_size=1, bias_attr=False)
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+        self.no_relu = no_relu
+
+    def forward(self, x):
+        residual = x
+        out = self.conv_bn_relu1(x)
+        out = self.conv_bn_relu2(out)
+        out = self.conv_bn(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        if self.no_relu:
+            return out
+        else:
+            return self.relu(out)
+
+
+class DAPPM(nn.Layer):
+    def __init__(self, inplanes, branch_planes, outplanes):
+        super().__init__()
+        self.scale1 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=5, stride=2, padding=2),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale2 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=9, stride=4, padding=4),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale3 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=17, stride=8, padding=8),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale4 = nn.Sequential(
+            nn.AdaptiveAvgPool2D((1, 1)),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale0 = nn.Sequential(
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.process1 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process2 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process3 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process4 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.compression = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes * 5),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes * 5, outplanes, kernel_size=1, bias_attr=False))
+        self.shortcut = nn.Sequential(
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, outplanes, kernel_size=1, bias_attr=False))
+
+    def forward(self, x):
+        n, c, h, w = paddle.shape(x)
+        x0 = self.scale0(x)
+        x1 = self.process1(
+            F.interpolate(
+                self.scale1(x), size=[h, w], mode='bilinear') + x0)
+        x2 = self.process2(
+            F.interpolate(
+                self.scale2(x), size=[h, w], mode='bilinear') + x1)
+        x3 = self.process3(
+            F.interpolate(
+                self.scale3(x), size=[h, w], mode='bilinear') + x2)
+        x4 = self.process4(
+            F.interpolate(
+                self.scale4(x), size=[h, w], mode='bilinear') + x3)
+
+        out = self.compression(paddle.concat([x0, x1, x2, x3, x4],
+                                             1)) + self.shortcut(x)
+        return out
+
+
+class DDRNetHead(nn.Layer):
+    def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
+        super().__init__()
+        self.bn1 = nn.BatchNorm2D(inplanes)
+        self.relu = nn.ReLU()
+        self.conv_bn_relu = layers.ConvBNReLU(
+            inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False)
+        self.conv = nn.Conv2D(
+            interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True)
+
+        self.scale_factor = scale_factor
+
+    def forward(self, x):
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv_bn_relu(x)
+        out = self.conv(x)
+
+        if self.scale_factor is not None:
+            out = F.interpolate(
+                out, scale_factor=self.scale_factor, mode='bilinear')
+        return out
+
+
+@manager.MODELS.add_component
+def DDRNet_23(**kwargs):
+    return DualResNet(
+        block_layers=[2, 2, 2, 2],
+        planes=64,
+        spp_planes=128,
+        head_planes=128,
+        **kwargs)

+ 3 - 1
paddlers/models/ppseg/models/emanet.py

@@ -209,7 +209,9 @@ class EMAU(nn.Layer):
             mu = F.normalize(mu, axis=1, p=2)
             mu = F.normalize(mu, axis=1, p=2)
             mu = self.mu * (1 - self.momentum) + mu * self.momentum
             mu = self.mu * (1 - self.momentum) + mu * self.momentum
             if paddle.distributed.get_world_size() > 1:
             if paddle.distributed.get_world_size() > 1:
-                mu = paddle.distributed.all_reduce(mu)
+                out = paddle.distributed.all_reduce(mu)
+                if out is not None:
+                    mu = out
                 mu /= paddle.distributed.get_world_size()
                 mu /= paddle.distributed.get_world_size()
             self.mu = mu
             self.mu = mu
 
 

+ 3 - 1
paddlers/models/ppseg/models/enet.py

@@ -34,6 +34,7 @@ class ENet(nn.Layer):
 
 
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
         encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
         encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
             function; otherwise, PReLU is used. Default: False.
             function; otherwise, PReLU is used. Default: False.
@@ -43,13 +44,14 @@ class ENet(nn.Layer):
 
 
     def __init__(self,
     def __init__(self,
                  num_classes,
                  num_classes,
+                 in_channels=3,
                  pretrained=None,
                  pretrained=None,
                  encoder_relu=False,
                  encoder_relu=False,
                  decoder_relu=True):
                  decoder_relu=True):
         super(ENet, self).__init__()
         super(ENet, self).__init__()
 
 
         self.numclasses = num_classes
         self.numclasses = num_classes
-        self.initial_block = InitialBlock(3, 16, relu=encoder_relu)
+        self.initial_block = InitialBlock(in_channels, 16, relu=encoder_relu)
 
 
         self.downsample1_0 = DownsamplingBottleneck(
         self.downsample1_0 = DownsamplingBottleneck(
             16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)
             16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)

+ 13 - 3
paddlers/models/ppseg/models/fast_scnn.py

@@ -34,6 +34,7 @@ class FastSCNN(nn.Layer):
     (https://arxiv.org/pdf/1902.04502.pdf).
     (https://arxiv.org/pdf/1902.04502.pdf).
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
         enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
         enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
             If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
             If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@@ -43,13 +44,15 @@ class FastSCNN(nn.Layer):
 
 
     def __init__(self,
     def __init__(self,
                  num_classes,
                  num_classes,
+                 in_channels=3,
                  enable_auxiliary_loss=True,
                  enable_auxiliary_loss=True,
                  align_corners=False,
                  align_corners=False,
                  pretrained=None):
                  pretrained=None):
 
 
         super().__init__()
         super().__init__()
 
 
-        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
+        self.learning_to_downsample = LearningToDownsample(in_channels, 32, 48,
+                                                           64)
         self.global_feature_extractor = GlobalFeatureExtractor(
         self.global_feature_extractor = GlobalFeatureExtractor(
             in_channels=64,
             in_channels=64,
             block_channels=[64, 96, 128],
             block_channels=[64, 96, 128],
@@ -108,11 +111,18 @@ class LearningToDownsample(nn.Layer):
         out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
         out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
     """
     """
 
 
-    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
+    def __init__(self,
+                 in_channels=3,
+                 dw_channels1=32,
+                 dw_channels2=48,
+                 out_channels=64):
         super(LearningToDownsample, self).__init__()
         super(LearningToDownsample, self).__init__()
 
 
         self.conv_bn_relu = layers.ConvBNReLU(
         self.conv_bn_relu = layers.ConvBNReLU(
-            in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
+            in_channels=in_channels,
+            out_channels=dw_channels1,
+            kernel_size=3,
+            stride=2)
         self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
         self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
             in_channels=dw_channels1,
             in_channels=dw_channels1,
             out_channels=dw_channels2,
             out_channels=dw_channels2,

+ 1 - 1
paddlers/models/ppseg/models/ginet.py

@@ -92,7 +92,7 @@ class GINet(nn.Layer):
 
 
         return [
         return [
             F.interpolate(
             F.interpolate(
-                logit, (h, w),
+                logit, [h, w],
                 mode='bilinear',
                 mode='bilinear',
                 align_corners=self.align_corners) for logit in logit_list
                 align_corners=self.align_corners) for logit in logit_list
         ]
         ]

+ 198 - 0
paddlers/models/ppseg/models/glore.py

@@ -0,0 +1,198 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.utils import utils
+
+
+@manager.MODELS.add_component
+class GloRe(nn.Layer):
+    """
+    The GloRe implementation based on PaddlePaddle.
+
+    The original article refers to:
+       Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks"
+       (https://arxiv.org/pdf/1811.12814.pdf)
+    
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
+        backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
+        gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512.
+        gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128.
+        gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128.
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=(2, 3),
+                 gru_channels=512,
+                 gru_num_state=128,
+                 gru_num_node=64,
+                 enable_auxiliary_loss=True,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+
+        self.backbone = backbone
+        backbone_channels = [
+            backbone.feat_channels[i] for i in backbone_indices
+        ]
+
+        self.head = GloReHead(num_classes, backbone_indices, backbone_channels,
+                              gru_channels, gru_num_state, gru_num_node,
+                              enable_auxiliary_loss)
+        self.align_corners = align_corners
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        feat_list = self.backbone(x)
+        logit_list = self.head(feat_list)
+        return [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class GloReHead(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 backbone_indices,
+                 backbone_channels,
+                 gru_channels=512,
+                 gru_num_state=128,
+                 gru_num_node=64,
+                 enable_auxiliary_loss=True):
+        super().__init__()
+
+        in_channels = backbone_channels[1]
+        self.conv_bn_relu = layers.ConvBNReLU(
+            in_channels, gru_channels, 1, bias_attr=False)
+        self.gru_module = GruModule(
+            num_input=gru_channels,
+            num_state=gru_num_state,
+            num_node=gru_num_node)
+
+        self.dropout = nn.Dropout(0.1)
+        self.classifier = nn.Conv2D(512, num_classes, kernel_size=1)
+        self.auxlayer = layers.AuxLayer(
+            in_channels=backbone_channels[0],
+            inter_channels=backbone_channels[0] // 4,
+            out_channels=num_classes)
+
+        self.backbone_indices = backbone_indices
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+    def forward(self, feat_list):
+
+        logit_list = []
+        x = feat_list[self.backbone_indices[1]]
+
+        feature = self.conv_bn_relu(x)
+        gru_output = self.gru_module(feature)
+        output = self.dropout(gru_output)
+        logit = self.classifier(output)
+        logit_list.append(logit)
+
+        if self.enable_auxiliary_loss:
+            low_level_feat = feat_list[self.backbone_indices[0]]
+            auxiliary_logit = self.auxlayer(low_level_feat)
+            logit_list.append(auxiliary_logit)
+
+        return logit_list
+
+
+class GCN(nn.Layer):
+    def __init__(self, num_state, num_node, bias=False):
+        super(GCN, self).__init__()
+        self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1)
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv1D(
+            num_state, num_state, kernel_size=1, bias_attr=bias)
+
+    def forward(self, x):
+        h = self.conv1(paddle.transpose(x, perm=(0, 2, 1)))
+        h = paddle.transpose(h, perm=(0, 2, 1))
+        h = h + x
+        h = self.relu(self.conv2(h))
+        return h
+
+
+class GruModule(nn.Layer):
+    def __init__(self,
+                 num_input=512,
+                 num_state=128,
+                 num_node=64,
+                 normalize=False):
+        super(GruModule, self).__init__()
+        self.normalize = normalize
+        self.num_state = num_state
+        self.num_node = num_node
+        self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1)
+        self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1)
+        self.gcn = GCN(num_state=self.num_state, num_node=self.num_node)
+        self.extend_dim = nn.Conv2D(
+            self.num_state, num_input, kernel_size=1, bias_attr=False)
+        self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4)
+
+    def forward(self, input):
+        n, c, h, w = input.shape
+        # B, C, H, W
+        reduction_dim = self.reduction_dim(input)
+        # B, N, H, W
+        mat_B = self.projection_mat(input)
+        # B, C, H*W
+        reshaped_reduction = paddle.reshape(
+            reduction_dim, shape=[n, self.num_state, h * w])
+        # B, N, H*W
+        reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w])
+        # B, N, H*W
+        reproject = reshaped_B
+        # B, C, N
+        node_state_V = paddle.matmul(
+            reshaped_reduction, paddle.transpose(
+                reshaped_B, perm=[0, 2, 1]))
+
+        if self.normalize:
+            node_state_V = node_state_V * (1. / reshaped_reduction.shape[2])
+
+        # B, C, N
+        gcn_out = self.gcn(node_state_V)
+        # B, C, H*W
+        Y = paddle.matmul(gcn_out, reproject)
+        # B, C, H, W
+        Y = paddle.reshape(Y, shape=[n, self.num_state, h, w])
+        Y_extend = self.extend_dim(Y)
+        Y_extend = self.extend_bn(Y_extend)
+
+        out = input + Y_extend
+        return out

+ 3 - 1
paddlers/models/ppseg/models/hardnet.py

@@ -31,6 +31,7 @@ class HarDNet(nn.Layer):
 
 
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
         stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
         stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
         ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
         ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
         grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
         grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
@@ -43,6 +44,7 @@ class HarDNet(nn.Layer):
 
 
     def __init__(self,
     def __init__(self,
                  num_classes,
                  num_classes,
+                 in_channels=3,
                  stem_channels=(16, 24, 32, 48),
                  stem_channels=(16, 24, 32, 48),
                  ch_list=(64, 96, 160, 224, 320),
                  ch_list=(64, 96, 160, 224, 320),
                  grmul=1.7,
                  grmul=1.7,
@@ -60,7 +62,7 @@ class HarDNet(nn.Layer):
 
 
         self.stem = nn.Sequential(
         self.stem = nn.Sequential(
             layers.ConvBNReLU(
             layers.ConvBNReLU(
-                3, stem_channels[0], kernel_size=3, bias_attr=False),
+                in_channels, stem_channels[0], kernel_size=3, bias_attr=False),
             layers.ConvBNReLU(
             layers.ConvBNReLU(
                 stem_channels[0],
                 stem_channels[0],
                 stem_channels[1],
                 stem_channels[1],

+ 2 - 1
paddlers/models/ppseg/models/layers/__init__.py

@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
 
 
-from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU
+from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU, ConvBNAct, ConvBNLeakyReLU
 from .activation import Activation
 from .activation import Activation
 from .pyramid_pool import ASPPModule, PPModule
 from .pyramid_pool import ASPPModule, PPModule
 from .attention import AttentionBlock
 from .attention import AttentionBlock
 from .nonlocal2d import NonLocal2D
 from .nonlocal2d import NonLocal2D
 from .wrap_functions import *
 from .wrap_functions import *
+from .tensor_fusion import UAFM_SpAtten, UAFM_SpAtten_S, UAFM_ChAtten, UAFM_ChAtten_S, UAFM, UAFMMobile, UAFMMobile_SpAtten

+ 126 - 0
paddlers/models/ppseg/models/layers/attention.py

@@ -144,3 +144,129 @@ class AttentionBlock(nn.Layer):
         if self.out_project is not None:
         if self.out_project is not None:
             context = self.out_project(context)
             context = self.out_project(context)
         return context
         return context
+
+
+class DualAttentionModule(nn.Layer):
+    """
+    Dual attention module.
+
+    Args:
+        in_channels (int): The number of input channels.
+        out_channels (int): The number of output channels.
+    """
+
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        inter_channels = in_channels // 4
+
+        self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
+        self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
+        self.pam = PAM(inter_channels)
+        self.cam = CAM(inter_channels)
+        self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
+        self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
+        self.conv3 = layers.ConvBNReLU(inter_channels, out_channels, 3)
+
+    def forward(self, feats):
+        channel_feats = self.channel_conv(feats)
+        channel_feats = self.cam(channel_feats)
+        channel_feats = self.conv1(channel_feats)
+
+        position_feats = self.position_conv(feats)
+        position_feats = self.pam(position_feats)
+        position_feats = self.conv2(position_feats)
+
+        feats_sum = position_feats + channel_feats
+        out = self.conv3(feats_sum)
+        return out
+
+
+class PAM(nn.Layer):
+    """
+    Position attention module.
+    Args:
+        in_channels (int): The number of input channels.
+    """
+
+    def __init__(self, in_channels):
+        super().__init__()
+        mid_channels = in_channels // 8
+        self.mid_channels = mid_channels
+        self.in_channels = in_channels
+
+        self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
+        self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
+        self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1)
+
+        self.gamma = self.create_parameter(
+            shape=[1],
+            dtype='float32',
+            default_initializer=nn.initializer.Constant(0))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+
+        # query: n, h * w, c1
+        query = self.query_conv(x)
+        query = paddle.reshape(query, (0, self.mid_channels, -1))
+        query = paddle.transpose(query, (0, 2, 1))
+
+        # key: n, c1, h * w
+        key = self.key_conv(x)
+        key = paddle.reshape(key, (0, self.mid_channels, -1))
+
+        # sim: n, h * w, h * w
+        sim = paddle.bmm(query, key)
+        sim = F.softmax(sim, axis=-1)
+
+        value = self.value_conv(x)
+        value = paddle.reshape(value, (0, self.in_channels, -1))
+        sim = paddle.transpose(sim, (0, 2, 1))
+
+        # feat: from (n, c2, h * w) -> (n, c2, h, w)
+        feat = paddle.bmm(value, sim)
+        feat = paddle.reshape(feat,
+                              (0, self.in_channels, x_shape[2], x_shape[3]))
+
+        out = self.gamma * feat + x
+        return out
+
+
+class CAM(nn.Layer):
+    """
+    Channel attention module.
+    Args:
+        in_channels (int): The number of input channels.
+    """
+
+    def __init__(self, channels):
+        super().__init__()
+
+        self.channels = channels
+        self.gamma = self.create_parameter(
+            shape=[1],
+            dtype='float32',
+            default_initializer=nn.initializer.Constant(0))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+        # query: n, c, h * w
+        query = paddle.reshape(x, (0, self.channels, -1))
+        # key: n, h * w, c
+        key = paddle.reshape(x, (0, self.channels, -1))
+        key = paddle.transpose(key, (0, 2, 1))
+
+        # sim: n, c, c
+        sim = paddle.bmm(query, key)
+        # The danet author claims that this can avoid gradient divergence
+        sim = paddle.max(sim, axis=-1, keepdim=True).tile(
+            [1, 1, self.channels]) - sim
+        sim = F.softmax(sim, axis=-1)
+
+        # feat: from (n, c, h * w) to (n, c, h, w)
+        value = paddle.reshape(x, (0, self.channels, -1))
+        feat = paddle.bmm(sim, value)
+        feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3]))
+
+        out = self.gamma * feat + x
+        return out

+ 57 - 0
paddlers/models/ppseg/models/layers/layer_libs.py

@@ -56,6 +56,37 @@ class ConvBNReLU(nn.Layer):
         return x
         return x
 
 
 
 
+class ConvBNAct(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 padding='same',
+                 act_type=None,
+                 **kwargs):
+        super().__init__()
+
+        self._conv = nn.Conv2D(
+            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
+
+        if 'data_format' in kwargs:
+            data_format = kwargs['data_format']
+        else:
+            data_format = 'NCHW'
+        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
+
+        self._act_type = act_type
+        if act_type is not None:
+            self._act = layers.Activation(act_type)
+
+    def forward(self, x):
+        x = self._conv(x)
+        x = self._batch_norm(x)
+        if self._act_type is not None:
+            x = self._act(x)
+        return x
+
+
 class ConvBN(nn.Layer):
 class ConvBN(nn.Layer):
     def __init__(self,
     def __init__(self,
                  in_channels,
                  in_channels,
@@ -293,3 +324,29 @@ class ConvBNPReLU(nn.Layer):
         x = self._batch_norm(x)
         x = self._batch_norm(x)
         x = self._prelu(x)
         x = self._prelu(x)
         return x
         return x
+
+
+class ConvBNLeakyReLU(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 padding='same',
+                 **kwargs):
+        super().__init__()
+
+        self._conv = nn.Conv2D(
+            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
+
+        if 'data_format' in kwargs:
+            data_format = kwargs['data_format']
+        else:
+            data_format = 'NCHW'
+        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
+        self._relu = layers.Activation("leakyrelu")
+
+    def forward(self, x):
+        x = self._conv(x)
+        x = self._batch_norm(x)
+        x = self._relu(x)
+        return x

+ 285 - 0
paddlers/models/ppseg/models/layers/tensor_fusion.py

@@ -0,0 +1,285 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Constant
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.models.layers import tensor_fusion_helper as helper
+
+
+class UAFM(nn.Layer):
+    """
+    The base of Unified Attention Fusion Module.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__()
+
+        self.conv_x = layers.ConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.ConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+        self.resize_mode = resize_mode
+
+    def check(self, x, y):
+        assert x.ndim == 4 and y.ndim == 4
+        x_h, x_w = x.shape[2:]
+        y_h, y_w = y.shape[2:]
+        assert x_h >= y_h and x_w >= y_w
+
+    def prepare(self, x, y):
+        x = self.prepare_x(x, y)
+        y = self.prepare_y(x, y)
+        return x, y
+
+    def prepare_x(self, x, y):
+        x = self.conv_x(x)
+        return x
+
+    def prepare_y(self, x, y):
+        y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode)
+        return y_up
+
+    def fuse(self, x, y):
+        out = x + y
+        out = self.conv_out(out)
+        return out
+
+    def forward(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        self.check(x, y)
+        x, y = self.prepare(x, y)
+        out = self.fuse(x, y)
+        return out
+
+
+class UAFM_ChAtten(UAFM):
+    """
+    The UAFM with channel attention, which uses mean and max values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNAct(
+                4 * y_ch,
+                y_ch // 2,
+                kernel_size=1,
+                bias_attr=False,
+                act_type="leakyrelu"),
+            layers.ConvBN(
+                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_hw([x, y], self.training)
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_ChAtten_S(UAFM):
+    """
+    The UAFM with channel attention, which uses mean values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNAct(
+                2 * y_ch,
+                y_ch // 2,
+                kernel_size=1,
+                bias_attr=False,
+                act_type="leakyrelu"),
+            layers.ConvBN(
+                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_reduce_hw([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_SpAtten(UAFM):
+    """
+    The UAFM with spatial attention, which uses mean and max values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                4, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+        self._scale = self.create_parameter(
+            shape=[1],
+            attr=ParamAttr(initializer=Constant(value=1.)),
+            dtype="float32")
+        self._scale.stop_gradient = True
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (self._scale - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_SpAtten_S(UAFM):
+    """
+    The UAFM with spatial attention, which uses mean values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                2, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFMMobile(UAFM):
+    """
+    Unified Attention Fusion Module for mobile.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_x = layers.SeparableConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.SeparableConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+
+
+class UAFMMobile_SpAtten(UAFM):
+    """
+    Unified Attention Fusion Module with spatial attention for mobile.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_x = layers.SeparableConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.SeparableConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                4, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out

+ 133 - 0
paddlers/models/ppseg/models/layers/tensor_fusion_helper.py

@@ -0,0 +1,133 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+def avg_reduce_hw(x):
+    # Reduce hw by avg
+    # Return cat([avg_pool_0, avg_pool_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return F.adaptive_avg_pool2d(x, 1)
+    elif len(x) == 1:
+        return F.adaptive_avg_pool2d(x[0], 1)
+    else:
+        res = []
+        for xi in x:
+            res.append(F.adaptive_avg_pool2d(xi, 1))
+        return paddle.concat(res, axis=1)
+
+
+def avg_max_reduce_hw_helper(x, is_training, use_concat=True):
+    assert not isinstance(x, (list, tuple))
+    avg_pool = F.adaptive_avg_pool2d(x, 1)
+    # TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training.
+    if is_training:
+        max_pool = F.adaptive_max_pool2d(x, 1)
+    else:
+        max_pool = paddle.max(x, axis=[2, 3], keepdim=True)
+
+    if use_concat:
+        res = paddle.concat([avg_pool, max_pool], axis=1)
+    else:
+        res = [avg_pool, max_pool]
+    return res
+
+
+def avg_max_reduce_hw(x, is_training):
+    # Reduce hw by avg and max
+    # Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return avg_max_reduce_hw_helper(x, is_training)
+    elif len(x) == 1:
+        return avg_max_reduce_hw_helper(x[0], is_training)
+    else:
+        res_avg = []
+        res_max = []
+        for xi in x:
+            avg, max = avg_max_reduce_hw_helper(xi, is_training, False)
+            res_avg.append(avg)
+            res_max.append(max)
+        res = res_avg + res_max
+        return paddle.concat(res, axis=1)
+
+
+def avg_reduce_channel(x):
+    # Reduce channel by avg
+    # Return cat([avg_ch_0, avg_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return paddle.mean(x, axis=1, keepdim=True)
+    elif len(x) == 1:
+        return paddle.mean(x[0], axis=1, keepdim=True)
+    else:
+        res = []
+        for xi in x:
+            res.append(paddle.mean(xi, axis=1, keepdim=True))
+        return paddle.concat(res, axis=1)
+
+
+def max_reduce_channel(x):
+    # Reduce channel by max
+    # Return cat([max_ch_0, max_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return paddle.max(x, axis=1, keepdim=True)
+    elif len(x) == 1:
+        return paddle.max(x[0], axis=1, keepdim=True)
+    else:
+        res = []
+        for xi in x:
+            res.append(paddle.max(xi, axis=1, keepdim=True))
+        return paddle.concat(res, axis=1)
+
+
+def avg_max_reduce_channel_helper(x, use_concat=True):
+    # Reduce hw by avg and max, only support single input
+    assert not isinstance(x, (list, tuple))
+    mean_value = paddle.mean(x, axis=1, keepdim=True)
+    max_value = paddle.max(x, axis=1, keepdim=True)
+
+    if use_concat:
+        res = paddle.concat([mean_value, max_value], axis=1)
+    else:
+        res = [mean_value, max_value]
+    return res
+
+
+def avg_max_reduce_channel(x):
+    # Reduce hw by avg and max
+    # Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return avg_max_reduce_channel_helper(x)
+    elif len(x) == 1:
+        return avg_max_reduce_channel_helper(x[0])
+    else:
+        res = []
+        for xi in x:
+            res.extend(avg_max_reduce_channel_helper(xi, False))
+        return paddle.concat(res, axis=1)
+
+
+def cat_avg_max_reduce_channel(x):
+    # Reduce hw by cat+avg+max
+    assert isinstance(x, (list, tuple)) and len(x) > 1
+
+    x = paddle.concat(x, axis=1)
+
+    mean_value = paddle.mean(x, axis=1, keepdim=True)
+    max_value = paddle.max(x, axis=1, keepdim=True)
+    res = paddle.concat([mean_value, max_value], axis=1)
+
+    return res

+ 1 - 1
paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py

@@ -99,7 +99,7 @@ class BCELoss(nn.Layer):
                     raise ValueError(
                     raise ValueError(
                         "if type of `weight` is str, it should equal to 'dynamic', but it is {}"
                         "if type of `weight` is str, it should equal to 'dynamic', but it is {}"
                         .format(self.weight))
                         .format(self.weight))
-            elif isinstance(self.weight, paddle.VarBase):
+            elif not isinstance(self.weight, paddle.Tensor):
                 raise TypeError(
                 raise TypeError(
                     'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
                     'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
                     .format(type(self.weight)))
                     .format(type(self.weight)))

+ 1 - 3
paddlers/models/ppseg/models/losses/cross_entropy_loss.py

@@ -78,8 +78,6 @@ class CrossEntropyLoss(nn.Layer):
             logit = paddle.transpose(logit, [0, 2, 3, 1])
             logit = paddle.transpose(logit, [0, 2, 3, 1])
         label = label.astype('int64')
         label = label.astype('int64')
 
 
-        # In F.cross_entropy, the ignore_index is invalid, which needs to be fixed.
-        # When there is 255 in the label and paddle version <= 2.1.3, the cross_entropy OP will report an error, which is fixed in paddle develop version.
         loss = F.cross_entropy(
         loss = F.cross_entropy(
             logit,
             logit,
             label,
             label,
@@ -121,7 +119,7 @@ class CrossEntropyLoss(nn.Layer):
             loss = loss * semantic_weights
             loss = loss * semantic_weights
 
 
         if self.weight is not None:
         if self.weight is not None:
-            _one_hot = F.one_hot(label, logit.shape[-1])
+            _one_hot = F.one_hot(label * mask, logit.shape[-1])
             coef = paddle.sum(_one_hot * self.weight, axis=-1)
             coef = paddle.sum(_one_hot * self.weight, axis=-1)
         else:
         else:
             coef = paddle.ones_like(label)
             coef = paddle.ones_like(label)

+ 1 - 1
paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py

@@ -16,7 +16,7 @@ import numpy as np
 import paddle
 import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
-from scipy.ndimage.interpolation import shift
+from scipy.ndimage import shift
 
 
 from paddlers.models.ppseg.cvlibs import manager
 from paddlers.models.ppseg.cvlibs import manager
 
 

+ 1 - 1
paddlers/models/ppseg/models/losses/detail_aggregate_loss.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 43 - 22
paddlers/models/ppseg/models/losses/dice_loss.py

@@ -19,38 +19,59 @@ from paddlers.models.ppseg.cvlibs import manager
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
 class DiceLoss(nn.Layer):
 class DiceLoss(nn.Layer):
     """
     """
-    Implements the dice loss function.
+    The implements of the dice loss.
 
 
     Args:
     Args:
-        ignore_index (int64): Specifies a target value that is ignored
-            and does not contribute to the input gradient. Default ``255``.
-        smooth (float32): laplace smoothing,
-            to smooth dice loss and accelerate convergence. following:
-            https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
+        weight (list[float], optional): The weight for each class. Default: None.
+        ignore_index (int64): ignore_index (int64, optional): Specifies a target value that
+            is ignored and does not contribute to the input gradient. Default ``255``.
+        smooth (float32): Laplace smoothing to smooth dice loss and accelerate convergence.
+            Default: 1.0
     """
     """
 
 
-    def __init__(self, ignore_index=255, smooth=0.):
-        super(DiceLoss, self).__init__()
+    def __init__(self, weight=None, ignore_index=255, smooth=1.0):
+        super().__init__()
+        self.weight = weight
         self.ignore_index = ignore_index
         self.ignore_index = ignore_index
-        self.eps = 1e-5
         self.smooth = smooth
         self.smooth = smooth
+        self.eps = 1e-8
 
 
     def forward(self, logits, labels):
     def forward(self, logits, labels):
-        labels = paddle.cast(labels, dtype='int32')
-        labels_one_hot = F.one_hot(labels, num_classes=logits.shape[1])
-        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
-        labels_one_hot = paddle.cast(labels_one_hot, dtype='float32')
+        num_class = logits.shape[1]
+        if self.weight is not None:
+            assert num_class == len(self.weight), \
+                "The lenght of weight should be euqal to the num class"
+
+        mask = labels != self.ignore_index
+        mask = paddle.cast(paddle.unsqueeze(mask, 1), 'float32')
 
 
+        labels[labels == self.ignore_index] = 0
+        labels_one_hot = F.one_hot(labels, num_class)
+        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
         logits = F.softmax(logits, axis=1)
         logits = F.softmax(logits, axis=1)
 
 
-        mask = (paddle.unsqueeze(labels, 1) != self.ignore_index)
-        logits = logits * mask
-        labels_one_hot = labels_one_hot * mask
+        dice_loss = 0.0
+        for i in range(num_class):
+            dice_loss_i = dice_loss_helper(logits[:, i], labels_one_hot[:, i],
+                                           mask, self.smooth, self.eps)
+            if self.weight is not None:
+                dice_loss_i *= self.weight[i]
+            dice_loss += dice_loss_i
+        dice_loss = dice_loss / num_class
+
+        return dice_loss
 
 
-        dims = (0, ) + tuple(range(2, labels.ndimension() + 1))
 
 
-        intersection = paddle.sum(logits * labels_one_hot, dims)
-        cardinality = paddle.sum(logits + labels_one_hot, dims)
-        dice_loss = ((2. * intersection + self.smooth) /
-                     (cardinality + self.eps + self.smooth)).mean()
-        return 1 - dice_loss
+def dice_loss_helper(logit, label, mask, smooth, eps):
+    assert logit.shape == label.shape, \
+        "The shape of logit and label should be the same"
+    logit = paddle.reshape(logit, [0, -1])
+    label = paddle.reshape(label, [0, -1])
+    mask = paddle.reshape(mask, [0, -1])
+    logit *= mask
+    label *= mask
+    intersection = paddle.sum(logit * label, axis=1)
+    cardinality = paddle.sum(logit + label, axis=1)
+    dice_loss = 1 - (2 * intersection + smooth) / (cardinality + smooth + eps)
+    dice_loss = dice_loss.mean()
+    return dice_loss

+ 97 - 25
paddlers/models/ppseg/models/losses/focal_loss.py

@@ -23,38 +23,110 @@ from paddlers.models.ppseg.cvlibs import manager
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
 class FocalLoss(nn.Layer):
 class FocalLoss(nn.Layer):
     """
     """
-    Focal Loss.
+    The implement of focal loss.
 
 
-    Code referenced from:
-    https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
+    The focal loss requires the label is 0 or 1 for now.
 
 
     Args:
     Args:
-        gamma (float): the coefficient of Focal Loss.
-        ignore_index (int64): Specifies a target value that is ignored
+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
+        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
+        ignore_index (int64, optional): Specifies a target value that is ignored
             and does not contribute to the input gradient. Default ``255``.
             and does not contribute to the input gradient. Default ``255``.
     """
     """
 
 
-    def __init__(self, gamma=2.0, ignore_index=255, edge_label=False):
-        super(FocalLoss, self).__init__()
+    def __init__(self, alpha=0.25, gamma=2.0, ignore_index=255):
+        super().__init__()
+        self.alpha = alpha
         self.gamma = gamma
         self.gamma = gamma
         self.ignore_index = ignore_index
         self.ignore_index = ignore_index
-        self.edge_label = edge_label
+        self.EPS = 1e-10
 
 
     def forward(self, logit, label):
     def forward(self, logit, label):
-        logit = paddle.reshape(
-            logit, [logit.shape[0], logit.shape[1], -1])  # N,C,H,W => N,C,H*W
-        logit = paddle.transpose(logit, [0, 2, 1])  # N,C,H*W => N,H*W,C
-        logit = paddle.reshape(logit,
-                               [-1, logit.shape[2]])  # N,H*W,C => N*H*W,C
-        label = paddle.reshape(label, [-1, 1])
-        range_ = paddle.arange(0, label.shape[0])
-        range_ = paddle.unsqueeze(range_, axis=-1)
-        label = paddle.cast(label, dtype='int64')
-        label = paddle.concat([range_, label], axis=-1)
-        logpt = F.log_softmax(logit)
-        logpt = paddle.gather_nd(logpt, label)
-
-        pt = paddle.exp(logpt.detach())
-        loss = -1 * (1 - pt)**self.gamma * logpt
-        loss = paddle.mean(loss)
-        return loss
+        """
+        Forward computation.
+
+        Args:
+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
+                (N, C, H, W), where C is number of classes.
+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
+                where each value is 0 <= label[i] <= C-1.
+        Returns:
+            (Tensor): The average loss.
+        """
+        assert logit.ndim == 4, "The ndim of logit should be 4."
+        assert logit.shape[1] == 2, "The channel of logit should be 2."
+        assert label.ndim == 3, "The ndim of label should be 3."
+
+        class_num = logit.shape[1]  # class num is 2
+        logit = paddle.transpose(logit, [0, 2, 3, 1])  # N,C,H,W => N,H,W,C
+
+        mask = label != self.ignore_index  # N,H,W
+        mask = paddle.unsqueeze(mask, 3)
+        mask = paddle.cast(mask, 'float32')
+        mask.stop_gradient = True
+
+        label = F.one_hot(label, class_num)  # N,H,W,C
+        label = paddle.cast(label, logit.dtype)
+        label.stop_gradient = True
+
+        loss = F.sigmoid_focal_loss(
+            logit=logit,
+            label=label,
+            alpha=self.alpha,
+            gamma=self.gamma,
+            reduction='none')
+        loss = loss * mask
+        avg_loss = paddle.sum(loss) / (
+            paddle.sum(paddle.cast(mask != 0., 'int32')) * class_num + self.EPS)
+        return avg_loss
+
+
+@manager.LOSSES.add_component
+class MultiClassFocalLoss(nn.Layer):
+    """
+    The implement of focal loss for multi class.
+
+    Args:
+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
+        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
+        ignore_index (int64, optional): Specifies a target value that is ignored
+            and does not contribute to the input gradient. Default ``255``.
+    """
+
+    def __init__(self, num_class, alpha=1.0, gamma=2.0, ignore_index=255):
+        super().__init__()
+        self.num_class = num_class
+        self.alpha = alpha
+        self.gamma = gamma
+        self.ignore_index = ignore_index
+        self.EPS = 1e-10
+
+    def forward(self, logit, label):
+        """
+        Forward computation.
+
+        Args:
+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
+                (N, C, H, W), where C is number of classes.
+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
+                where each value is 0 <= label[i] <= C-1.
+        Returns:
+            (Tensor): The average loss.
+        """
+        assert logit.ndim == 4, "The ndim of logit should be 4."
+        assert label.ndim == 3, "The ndim of label should be 3."
+
+        logit = paddle.transpose(logit, [0, 2, 3, 1])
+        label = label.astype('int64')
+        ce_loss = F.cross_entropy(
+            logit, label, ignore_index=self.ignore_index, reduction='none')
+
+        pt = paddle.exp(-ce_loss)
+        focal_loss = self.alpha * ((1 - pt)**self.gamma) * ce_loss
+
+        mask = paddle.cast(label != self.ignore_index, 'float32')
+        focal_loss *= mask
+        avg_loss = paddle.mean(focal_loss) / (paddle.mean(mask) + self.EPS)
+        return avg_loss

+ 22 - 0
paddlers/models/ppseg/models/losses/l1_loss.py

@@ -74,3 +74,25 @@ class L1Loss(nn.L1Loss):
 
 
     def __init__(self, reduction='mean', ignore_index=255):
     def __init__(self, reduction='mean', ignore_index=255):
         super().__init__(reduction=reduction)
         super().__init__(reduction=reduction)
+        self.ignore_index = ignore_index
+        self.EPS = 1e-10
+
+    def forward(self, input, label):
+        mask = label != self.ignore_index
+        mask = paddle.cast(mask, "float32")
+        label.stop_gradient = True
+        mask.stop_gradient = True
+
+        output = paddle.nn.functional.l1_loss(
+            input, label, "none", name=self.name) * mask
+
+        if self.reduction == "mean":
+            return paddle.mean(output) / (paddle.mean(mask) + self.EPS)
+        elif self.reduction == "none":
+            return output
+        elif self.reduction == "sum":
+            return paddle.sum(output)
+        else:
+            raise ValueError(
+                "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
+                "received %s, which is not allowed." % self.reduction)

+ 12 - 4
paddlers/models/ppseg/models/losses/lovasz_loss.py

@@ -124,8 +124,12 @@ def lovasz_hinge_flat(logits, labels):
     signs = 2. * labels - 1.
     signs = 2. * labels - 1.
     signs.stop_gradient = True
     signs.stop_gradient = True
     errors = 1. - logits * signs
     errors = 1. - logits * signs
-    errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
-                                                        'descending', True)
+    if hasattr(paddle, "_legacy_C_ops"):
+        errors_sorted, perm = paddle._legacy_C_ops.argsort(errors, 'axis', 0,
+                                                           'descending', True)
+    else:
+        errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
+                                                    'descending', True)
     errors_sorted.stop_gradient = False
     errors_sorted.stop_gradient = False
     gt_sorted = paddle.gather(labels, perm)
     gt_sorted = paddle.gather(labels, perm)
     grad = lovasz_grad(gt_sorted)
     grad = lovasz_grad(gt_sorted)
@@ -181,8 +185,12 @@ def lovasz_softmax_flat(probas, labels, classes='present'):
         else:
         else:
             class_pred = probas[:, c]
             class_pred = probas[:, c]
         errors = paddle.abs(fg - class_pred)
         errors = paddle.abs(fg - class_pred)
-        errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
-                                                            'descending', True)
+        if hasattr(paddle, "_legacy_C_ops"):
+            errors_sorted, perm = paddle._legacy_C_ops.argsort(
+                errors, 'axis', 0, 'descending', True)
+        else:
+            errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
+                                                        'descending', True)
         errors_sorted.stop_gradient = False
         errors_sorted.stop_gradient = False
 
 
         fg_sorted = paddle.gather(fg, perm)
         fg_sorted = paddle.gather(fg, perm)

+ 1 - 1
paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py

@@ -55,7 +55,7 @@ class OhemCrossEntropyLoss(nn.Layer):
 
 
         # get the label after ohem
         # get the label after ohem
         n, c, h, w = logit.shape
         n, c, h, w = logit.shape
-        label = label.reshape((-1, ))
+        label = label.reshape((-1, )).astype('int64')
         valid_mask = (label != self.ignore_index).astype('int64')
         valid_mask = (label != self.ignore_index).astype('int64')
         num_valid = valid_mask.sum()
         num_valid = valid_mask.sum()
         label = label * valid_mask
         label = label * valid_mask

+ 4 - 1
paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py

@@ -101,9 +101,12 @@ class PixelContrastCrossEntropyLoss(nn.Layer):
                 elif num_hard >= n_view / 2:
                 elif num_hard >= n_view / 2:
                     num_easy_keep = num_easy
                     num_easy_keep = num_easy
                     num_hard_keep = n_view - num_easy_keep
                     num_hard_keep = n_view - num_easy_keep
-                else:
+                elif num_easy >= n_view / 2:
                     num_hard_keep = num_hard
                     num_hard_keep = num_hard
                     num_easy_keep = n_view - num_hard_keep
                     num_easy_keep = n_view - num_hard_keep
+                else:
+                    num_hard_keep = num_hard
+                    num_easy_keep = num_easy
 
 
                 indices = None
                 indices = None
                 if num_hard > 0:
                 if num_hard > 0:

+ 7 - 5
paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py

@@ -92,6 +92,7 @@ class SemanticConnectivityLoss(nn.Layer):
                 label_num_conn, label_conn = cv2.connectedComponents(
                 label_num_conn, label_conn = cv2.connectedComponents(
                     labels_np_class.astype(np.uint8))
                     labels_np_class.astype(np.uint8))
 
 
+                origin_pred_num_conn = pred_num_conn
                 if pred_num_conn > 2 * label_num_conn:
                 if pred_num_conn > 2 * label_num_conn:
                     pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
                     pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
                 real_pred_num = pred_num_conn - 1
                 real_pred_num = pred_num_conn - 1
@@ -100,8 +101,9 @@ class SemanticConnectivityLoss(nn.Layer):
                 # Connected Components Matching and SC Loss Calculation
                 # Connected Components Matching and SC Loss Calculation
                 if real_label_num > 0 and real_pred_num > 0:
                 if real_label_num > 0 and real_pred_num > 0:
                     img_connectivity = compute_class_connectiveity(
                     img_connectivity = compute_class_connectiveity(
-                        pred_conn, label_conn, pred_num_conn, label_num_conn,
-                        pred_i, real_label_num, real_pred_num, zero)
+                        pred_conn, label_conn, pred_num_conn,
+                        origin_pred_num_conn, label_num_conn, pred_i,
+                        real_label_num, real_pred_num, zero)
                     sc_loss += 1 - img_connectivity
                     sc_loss += 1 - img_connectivity
                 elif real_label_num == 0 and real_pred_num == 0:
                 elif real_label_num == 0 and real_pred_num == 0:
                     # if no connected component, SC Loss = 0, so pass
                     # if no connected component, SC Loss = 0, so pass
@@ -122,12 +124,12 @@ class SemanticConnectivityLoss(nn.Layer):
 
 
 
 
 def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
 def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
-                                label_num_conn, pred, real_label_num,
-                                real_pred_num, zero):
+                                origin_pred_num_conn, label_num_conn, pred,
+                                real_label_num, real_pred_num, zero):
 
 
     pred_conn = paddle.to_tensor(pred_conn)
     pred_conn = paddle.to_tensor(pred_conn)
     label_conn = paddle.to_tensor(label_conn)
     label_conn = paddle.to_tensor(label_conn)
-    pred_conn = F.one_hot(pred_conn, pred_num_conn)
+    pred_conn = F.one_hot(pred_conn, origin_pred_num_conn)
     label_conn = F.one_hot(label_conn, label_num_conn)
     label_conn = F.one_hot(label_conn, label_num_conn)
 
 
     ious = paddle.zeros((real_label_num, real_pred_num))
     ious = paddle.zeros((real_label_num, real_pred_num))

+ 162 - 0
paddlers/models/ppseg/models/lraspp.py

@@ -0,0 +1,162 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.cvlibs import manager
+
+
+@manager.MODELS.add_component
+class LRASPP(nn.Layer):
+    """
+    Semantic segmentation model with a light R-ASPP head.
+    
+    The original article refers to
+        Howard, Andrew, et al. "Searching for mobilenetv3."
+        (https://arxiv.org/pdf/1909.11065.pdf)
+
+    Args:
+        num_classes (int): The number of target classes.
+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
+            has feat_channels, of which the length is 5.
+        backbone_indices (List(int), optional): The values indicate the indices of backbone output 
+            used as the input of the LR-ASPP head.
+            Default: [0, 1, 3].
+        lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head.
+            Default: [32, 64].
+        lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head.
+            Default: 128
+        resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head.
+            Default: bilinear.
+        use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use
+            a 49x49 kernel for average pooling.
+            Default: True.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=[0, 1, 3],
+                 lraspp_head_inter_chs=[32, 64],
+                 lraspp_head_out_ch=128,
+                 resize_mode='bilinear',
+                 use_gap=True,
+                 pretrained=None):
+        super().__init__()
+
+        # backbone
+        assert hasattr(backbone, 'feat_channels'), \
+            "The backbone should has feat_channels."
+        assert len(backbone.feat_channels) >= len(backbone_indices), \
+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
+        assert len(backbone.feat_channels) > max(backbone_indices), \
+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
+        self.backbone = backbone
+
+        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
+            "should not be lesser than 1"
+
+        # head
+        assert len(backbone_indices) == len(
+            lraspp_head_inter_chs
+        ) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs."
+        self.backbone_indices = backbone_indices
+
+        self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels,
+                                      lraspp_head_inter_chs, lraspp_head_out_ch,
+                                      num_classes, resize_mode, use_gap)
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+
+        feats_backbone = self.backbone(x)
+        assert len(feats_backbone) >= len(self.backbone_indices), \
+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
+
+        y = self.lraspp_head(feats_backbone)
+        y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False)
+        logit_list = [y]
+
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class LRASPPHead(nn.Layer):
+    def __init__(self,
+                 indices,
+                 in_chs,
+                 mid_chs,
+                 out_ch,
+                 n_classes,
+                 resize_mode,
+                 use_gap,
+                 align_corners=False):
+        super().__init__()
+
+        self.indices = indices[-2::-1]
+        self.in_chs = [in_chs[i] for i in indices[::-1]]
+        self.mid_chs = mid_chs[::-1]
+        self.convs = nn.LayerList()
+        self.conv_ups = nn.LayerList()
+        for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs):
+            self.convs.append(
+                nn.Conv2D(
+                    in_ch, mid_ch, kernel_size=1, bias_attr=False))
+            self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1))
+        self.conv_w = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=(49, 49), stride=(16, 20))
+            if not use_gap else nn.AdaptiveAvgPool2D(1),
+            nn.Conv2D(
+                self.in_chs[0], out_ch, 1, bias_attr=False),
+            nn.Sigmoid())
+        self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1)
+        self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False)
+        self.conv_out = nn.Conv2D(
+            out_ch, n_classes, kernel_size=1, bias_attr=False)
+
+        self.interp = partial(
+            F.interpolate, mode=resize_mode, align_corners=align_corners)
+
+    def forward(self, in_feat_list):
+        x = in_feat_list[-1]
+
+        x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:])
+        y = self.conv_t(x)
+
+        for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups):
+            feat = in_feat_list[idx]
+            y = self.interp(y, paddle.shape(feat)[2:])
+            y = paddle.concat([y, conv(feat)], axis=1)
+            y = conv_up(y)
+
+        y = self.conv_out(y)
+        return y

+ 1 - 1
paddlers/models/ppseg/models/mla_transformer.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 289 - 0
paddlers/models/ppseg/models/mobileseg.py

@@ -0,0 +1,289 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.cvlibs import manager
+
+
+@manager.MODELS.add_component
+class MobileSeg(nn.Layer):
+    """
+    The semantic segmentation models for mobile devices.
+
+    Args:
+        num_classes (int): The number of target classes.
+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
+            has feat_channels, of which the length is 5.
+        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
+            Default: [2, 3, 4].
+        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
+        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
+        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
+        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
+        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
+            Default: [64, 64, 64].
+        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
+            Default: bilinear.
+        use_last_fuse (bool, optional): Whether use fusion in the last. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=[1, 2, 3],
+                 cm_bin_sizes=[1, 2],
+                 cm_out_ch=64,
+                 arm_type='UAFMMobile',
+                 arm_out_chs=[32, 48, 64],
+                 seg_head_inter_chs=[32, 32, 32],
+                 resize_mode='bilinear',
+                 use_last_fuse=False,
+                 pretrained=None):
+        super().__init__()
+
+        # backbone
+        assert hasattr(backbone, 'feat_channels'), \
+            "The backbone should has feat_channels."
+        assert len(backbone.feat_channels) >= len(backbone_indices), \
+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
+        assert len(backbone.feat_channels) > max(backbone_indices), \
+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
+        self.backbone = backbone
+
+        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
+            "should not be lesser than 1"
+        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
+        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
+
+        # head
+        if len(arm_out_chs) == 1:
+            arm_out_chs = arm_out_chs * len(backbone_indices)
+        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
+            "arm_out_chs and backbone_indices should be equal"
+
+        self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs,
+                                        cm_bin_sizes, cm_out_ch, arm_type,
+                                        resize_mode, use_last_fuse)
+
+        if len(seg_head_inter_chs) == 1:
+            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
+        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
+            "seg_head_inter_chs and backbone_indices should be equal"
+        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
+        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
+            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+
+        feats_backbone = self.backbone(x)  # [x4, x8, x16, x32]
+        assert len(feats_backbone) >= len(self.backbone_indices), \
+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
+
+        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
+        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
+
+        if self.training:
+            logit_list = []
+            for x, seg_head in zip(feats_head, self.seg_heads):
+                x = seg_head(x)
+                logit_list.append(x)
+            logit_list = [
+                F.interpolate(
+                    x, x_hw, mode='bilinear', align_corners=False)
+                for x in logit_list
+            ]
+        else:
+            x = self.seg_heads[0](feats_head[0])
+            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
+            logit_list = [x]
+
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class MobileSegHead(nn.Layer):
+    """
+    The head of MobileSeg.
+
+    Args:
+        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
+        arm_out_chs (List(int)): The out channels of each arm module.
+        cm_bin_sizes (List(int)): The bin size of context module.
+        cm_out_ch (int): The output channel of the last context module.
+        arm_type (str): The type of attention refinement module.
+        resize_mode (str): The resize mode for the upsampling operation in decoder.
+    """
+
+    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
+                 arm_type, resize_mode, use_last_fuse):
+        super().__init__()
+
+        self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch,
+                                      cm_out_ch, cm_bin_sizes)
+
+        assert hasattr(layers,arm_type), \
+            "Not support arm_type ({})".format(arm_type)
+        arm_class = eval("layers." + arm_type)
+
+        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
+        for i in range(len(backbone_out_chs)):
+            low_chs = backbone_out_chs[i]
+            high_ch = cm_out_ch if i == len(
+                backbone_out_chs) - 1 else arm_out_chs[i + 1]
+            out_ch = arm_out_chs[i]
+            arm = arm_class(
+                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
+            self.arm_list.append(arm)
+
+        self.use_last_fuse = use_last_fuse
+        if self.use_last_fuse:
+            self.fuse_convs = nn.LayerList()
+            for i in range(1, len(arm_out_chs)):
+                conv = layers.SeparableConvBNReLU(
+                    arm_out_chs[i],
+                    arm_out_chs[0],
+                    kernel_size=3,
+                    bias_attr=False)
+                self.fuse_convs.append(conv)
+            self.last_conv = layers.SeparableConvBNReLU(
+                len(arm_out_chs) * arm_out_chs[0],
+                arm_out_chs[0],
+                kernel_size=3,
+                bias_attr=False)
+
+    def forward(self, in_feat_list):
+        """
+        Args:
+            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+        Returns:
+            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+                The length of in_feat_list and out_feat_list are the same.
+        """
+
+        high_feat = self.cm(in_feat_list[-1])
+        out_feat_list = []
+
+        for i in reversed(range(len(in_feat_list))):
+            low_feat = in_feat_list[i]
+            arm = self.arm_list[i]
+            high_feat = arm(low_feat, high_feat)
+            out_feat_list.insert(0, high_feat)
+
+        if self.use_last_fuse:
+            x_list = [out_feat_list[0]]
+            size = paddle.shape(out_feat_list[0])[2:]
+            for i, (x, conv
+                    ) in enumerate(zip(out_feat_list[1:], self.fuse_convs)):
+                x = conv(x)
+                x = F.interpolate(
+                    x, size=size, mode='bilinear', align_corners=False)
+                x_list.append(x)
+            x = paddle.concat(x_list, axis=1)
+            x = self.last_conv(x)
+            out_feat_list[0] = x
+
+        return out_feat_list
+
+
+class MobileContextModule(nn.Layer):
+    """
+    Context Module for Mobile Model.
+
+    Args:
+        in_channels (int): The number of input channels to pyramid pooling module.
+        inter_channels (int): The number of inter channels to pyramid pooling module.
+        out_channels (int): The number of output channels after pyramid pooling module.
+        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
+        align_corners (bool): An argument of F.interpolate. It should be set to False
+            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 inter_channels,
+                 out_channels,
+                 bin_sizes,
+                 align_corners=False):
+        super().__init__()
+
+        self.stages = nn.LayerList([
+            self._make_stage(in_channels, inter_channels, size)
+            for size in bin_sizes
+        ])
+
+        self.conv_out = layers.SeparableConvBNReLU(
+            in_channels=inter_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            bias_attr=False)
+
+        self.align_corners = align_corners
+
+    def _make_stage(self, in_channels, out_channels, size):
+        prior = nn.AdaptiveAvgPool2D(output_size=size)
+        conv = layers.ConvBNReLU(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
+        return nn.Sequential(prior, conv)
+
+    def forward(self, input):
+        out = None
+        input_shape = paddle.shape(input)[2:]
+
+        for stage in self.stages:
+            x = stage(input)
+            x = F.interpolate(
+                x,
+                input_shape,
+                mode='bilinear',
+                align_corners=self.align_corners)
+            if out is None:
+                out = x
+            else:
+                out += x
+
+        out = self.conv_out(out)
+        return out
+
+
+class SegHead(nn.Layer):
+    def __init__(self, in_chan, mid_chan, n_classes):
+        super().__init__()
+        self.conv = layers.SeparableConvBNReLU(
+            in_chan, mid_chan, kernel_size=3, bias_attr=False)
+        self.conv_out = nn.Conv2D(
+            mid_chan, n_classes, kernel_size=1, bias_attr=False)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x

+ 1 - 1
paddlers/models/ppseg/models/pointrend.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 3 - 13
paddlers/models/ppseg/models/portraitnet.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -50,19 +50,9 @@ class PortraitNet(nn.Layer):
         self.init_weight()
         self.init_weight()
 
 
     def forward(self, x):
     def forward(self, x):
-        img = x[:, :3, :, :]
-        img_ori = x[:, 3:, :, :]
-
-        feat_list = self.backbone(img)
+        feat_list = self.backbone(x)
         logits_list = self.head(feat_list)
         logits_list = self.head(feat_list)
-
-        feat_list = self.backbone(img_ori)
-        logits_ori_list = self.head(feat_list)
-
-        return [
-            logits_list[0], logits_ori_list[0], logits_list[1],
-            logits_ori_list[1]
-        ]
+        return [logits_list]
 
 
     def init_weight(self):
     def init_weight(self):
         if self.pretrained is not None:
         if self.pretrained is not None:

+ 273 - 0
paddlers/models/ppseg/models/pp_liteseg.py

@@ -0,0 +1,273 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.utils import utils
+
+
+@manager.MODELS.add_component
+class PPLiteSeg(nn.Layer):
+    """
+    The PP_LiteSeg implementation based on PaddlePaddle.
+
+    The original article refers to "Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu,
+    Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai,
+    Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LiteSeg: A Superior Real-Time Semantic
+    Segmentation Model. https://arxiv.org/abs/2204.02681".
+
+    Args:
+        num_classes (int): The number of target classes.
+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
+            has feat_channels, of which the length is 5.
+        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
+            Default: [2, 3, 4].
+        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
+        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
+        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
+        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
+        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
+            Default: [64, 64, 64].
+        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
+            Default: bilinear.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=[2, 3, 4],
+                 arm_type='UAFM_SpAtten',
+                 cm_bin_sizes=[1, 2, 4],
+                 cm_out_ch=128,
+                 arm_out_chs=[64, 96, 128],
+                 seg_head_inter_chs=[64, 64, 64],
+                 resize_mode='bilinear',
+                 pretrained=None):
+        super().__init__()
+
+        # backbone
+        assert hasattr(backbone, 'feat_channels'), \
+            "The backbone should has feat_channels."
+        assert len(backbone.feat_channels) >= len(backbone_indices), \
+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
+        assert len(backbone.feat_channels) > max(backbone_indices), \
+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
+        self.backbone = backbone
+
+        assert len(backbone_indices) > 1, "The lenght of backbone_indices " \
+            "should be greater than 1"
+        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
+        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
+
+        # head
+        if len(arm_out_chs) == 1:
+            arm_out_chs = arm_out_chs * len(backbone_indices)
+        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
+            "arm_out_chs and backbone_indices should be equal"
+
+        self.ppseg_head = PPLiteSegHead(backbone_out_chs, arm_out_chs,
+                                        cm_bin_sizes, cm_out_ch, arm_type,
+                                        resize_mode)
+
+        if len(seg_head_inter_chs) == 1:
+            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
+        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
+            "seg_head_inter_chs and backbone_indices should be equal"
+        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
+        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
+            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+
+        feats_backbone = self.backbone(x)  # [x2, x4, x8, x16, x32]
+        assert len(feats_backbone) >= len(self.backbone_indices), \
+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
+
+        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
+
+        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
+
+        if self.training:
+            logit_list = []
+
+            for x, seg_head in zip(feats_head, self.seg_heads):
+                x = seg_head(x)
+                logit_list.append(x)
+
+            logit_list = [
+                F.interpolate(
+                    x, x_hw, mode='bilinear', align_corners=False)
+                for x in logit_list
+            ]
+        else:
+            x = self.seg_heads[0](feats_head[0])
+            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
+            logit_list = [x]
+
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class PPLiteSegHead(nn.Layer):
+    """
+    The head of PPLiteSeg.
+
+    Args:
+        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
+        arm_out_chs (List(int)): The out channels of each arm module.
+        cm_bin_sizes (List(int)): The bin size of context module.
+        cm_out_ch (int): The output channel of the last context module.
+        arm_type (str): The type of attention refinement module.
+        resize_mode (str): The resize mode for the upsampling operation in decoder.
+    """
+
+    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
+                 arm_type, resize_mode):
+        super().__init__()
+
+        self.cm = PPContextModule(backbone_out_chs[-1], cm_out_ch, cm_out_ch,
+                                  cm_bin_sizes)
+
+        assert hasattr(layers,arm_type), \
+            "Not support arm_type ({})".format(arm_type)
+        arm_class = eval("layers." + arm_type)
+
+        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
+        for i in range(len(backbone_out_chs)):
+            low_chs = backbone_out_chs[i]
+            high_ch = cm_out_ch if i == len(
+                backbone_out_chs) - 1 else arm_out_chs[i + 1]
+            out_ch = arm_out_chs[i]
+            arm = arm_class(
+                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
+            self.arm_list.append(arm)
+
+    def forward(self, in_feat_list):
+        """
+        Args:
+            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+        Returns:
+            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+                The length of in_feat_list and out_feat_list are the same.
+        """
+
+        high_feat = self.cm(in_feat_list[-1])
+        out_feat_list = []
+
+        for i in reversed(range(len(in_feat_list))):
+            low_feat = in_feat_list[i]
+            arm = self.arm_list[i]
+            high_feat = arm(low_feat, high_feat)
+            out_feat_list.insert(0, high_feat)
+
+        return out_feat_list
+
+
+class PPContextModule(nn.Layer):
+    """
+    Simple Context module.
+
+    Args:
+        in_channels (int): The number of input channels to pyramid pooling module.
+        inter_channels (int): The number of inter channels to pyramid pooling module.
+        out_channels (int): The number of output channels after pyramid pooling module.
+        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
+        align_corners (bool): An argument of F.interpolate. It should be set to False
+            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 inter_channels,
+                 out_channels,
+                 bin_sizes,
+                 align_corners=False):
+        super().__init__()
+
+        self.stages = nn.LayerList([
+            self._make_stage(in_channels, inter_channels, size)
+            for size in bin_sizes
+        ])
+
+        self.conv_out = layers.ConvBNReLU(
+            in_channels=inter_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.align_corners = align_corners
+
+    def _make_stage(self, in_channels, out_channels, size):
+        prior = nn.AdaptiveAvgPool2D(output_size=size)
+        conv = layers.ConvBNReLU(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
+        return nn.Sequential(prior, conv)
+
+    def forward(self, input):
+        out = None
+        input_shape = paddle.shape(input)[2:]
+
+        for stage in self.stages:
+            x = stage(input)
+            x = F.interpolate(
+                x,
+                input_shape,
+                mode='bilinear',
+                align_corners=self.align_corners)
+            if out is None:
+                out = x
+            else:
+                out += x
+
+        out = self.conv_out(out)
+        return out
+
+
+class SegHead(nn.Layer):
+    def __init__(self, in_chan, mid_chan, n_classes):
+        super().__init__()
+        self.conv = layers.ConvBNReLU(
+            in_chan,
+            mid_chan,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias_attr=False)
+        self.conv_out = nn.Conv2D(
+            mid_chan, n_classes, kernel_size=1, bias_attr=False)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x

+ 6 - 2
paddlers/models/ppseg/models/pphumanseg_lite.py

@@ -27,13 +27,17 @@ __all__ = ['PPHumanSegLite']
 class PPHumanSegLite(nn.Layer):
 class PPHumanSegLite(nn.Layer):
     "A self-developed ultra lightweight model from paddlers.models.ppseg, is suitable for real-time scene segmentation on web or mobile terminals."
     "A self-developed ultra lightweight model from paddlers.models.ppseg, is suitable for real-time scene segmentation on web or mobile terminals."
 
 
-    def __init__(self, num_classes, pretrained=None, align_corners=False):
+    def __init__(self,
+                 num_classes,
+                 in_channels=3,
+                 pretrained=None,
+                 align_corners=False):
         super().__init__()
         super().__init__()
         self.pretrained = pretrained
         self.pretrained = pretrained
         self.num_classes = num_classes
         self.num_classes = num_classes
         self.align_corners = align_corners
         self.align_corners = align_corners
 
 
-        self.conv_bn0 = _ConvBNReLU(3, 36, 3, 2, 1)
+        self.conv_bn0 = _ConvBNReLU(in_channels, 36, 3, 2, 1)
         self.conv_bn1 = _ConvBNReLU(36, 18, 1, 1, 0)
         self.conv_bn1 = _ConvBNReLU(36, 18, 1, 1, 0)
 
 
         self.block1 = nn.Sequential(
         self.block1 = nn.Sequential(

+ 0 - 48
paddlers/models/ppseg/models/segformer.py

@@ -127,51 +127,3 @@ class SegFormer(nn.Layer):
                 mode='bilinear',
                 mode='bilinear',
                 align_corners=self.align_corners)
                 align_corners=self.align_corners)
         ]
         ]
-
-
-@manager.MODELS.add_component
-def SegFormer_B0(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B0'](),
-        embedding_dim=256,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B1(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B1'](),
-        embedding_dim=256,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B2(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B2'](),
-        embedding_dim=768,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B3(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B3'](),
-        embedding_dim=768,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B4(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B4'](),
-        embedding_dim=768,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def SegFormer_B5(**kwargs):
-    return SegFormer(
-        backbone=manager.BACKBONES['MixVisionTransformer_B5'](),
-        embedding_dim=768,
-        **kwargs)

+ 2 - 2
paddlers/models/ppseg/models/segnet.py

@@ -32,14 +32,14 @@ class SegNet(nn.Layer):
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
     """
     """
 
 
-    def __init__(self, num_classes, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
         super().__init__()
         super().__init__()
 
 
         # Encoder Module
         # Encoder Module
 
 
         self.enco1 = nn.Sequential(
         self.enco1 = nn.Sequential(
             layers.ConvBNReLU(
             layers.ConvBNReLU(
-                3, 64, 3, padding=1),
+                in_channels, 64, 3, padding=1),
             layers.ConvBNReLU(
             layers.ConvBNReLU(
                 64, 64, 3, padding=1))
                 64, 64, 3, padding=1))
 
 

+ 449 - 0
paddlers/models/ppseg/models/sinet.py

@@ -0,0 +1,449 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Refer to the origin implementation: https://github.com/clovaai/c3_sinet/blob/master/models/SINet.py
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.utils import utils
+
+CFG = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]], [[3, 1], [3, 1]],
+       [[5, 1], [3, 2]], [[5, 2], [3, 4]], [[3, 1], [3, 1]], [[5, 1], [5, 1]],
+       [[3, 2], [3, 4]], [[3, 1], [5, 2]]]
+
+
+@manager.MODELS.add_component
+class SINet(nn.Layer):
+    """
+    The SINet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Hyojin Park, Lars Lowe Sjösund, YoungJoon Yoo, Nicolas Monet, Jihwan Bang, Nojun Kwak
+    "SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules
+    and Information Blocking Decoder", (https://arxiv.org/abs/1911.09099).
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        config (List, optional): The config for SINet. Defualt use the CFG.
+        stage2_blocks (int, optional): The num of blocks in stage2. Default: 2.
+        stage3_blocks (int, optional): The num of blocks in stage3. Default: 8.
+        in_channels (int, optional): The channels of input image. Default: 3.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes=2,
+                 config=CFG,
+                 stage2_blocks=2,
+                 stage3_blocks=8,
+                 in_channels=3,
+                 pretrained=None):
+        super().__init__()
+        dim1 = 16
+        dim2 = 48
+        dim3 = 96
+
+        self.encoder = SINetEncoder(config, in_channels, num_classes,
+                                    stage2_blocks, stage3_blocks)
+
+        self.up = nn.UpsamplingBilinear2D(scale_factor=2)
+        self.bn_3 = nn.BatchNorm(num_classes)
+
+        self.level2_C = CBR(dim2, num_classes, 1, 1)
+        self.bn_2 = nn.BatchNorm(num_classes)
+
+        self.classifier = nn.Sequential(
+            nn.UpsamplingBilinear2D(scale_factor=2),
+            nn.Conv2D(
+                num_classes, num_classes, 3, 1, 1, bias_attr=False))
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, input):
+        output1 = self.encoder.level1(input)  # x2
+
+        output2_0 = self.encoder.level2_0(output1)  # x4
+        for i, layer in enumerate(self.encoder.level2):
+            if i == 0:
+                output2 = layer(output2_0)
+            else:
+                output2 = layer(output2)
+        output2_cat = self.encoder.BR2(paddle.concat([output2_0, output2], 1))
+
+        output3_0 = self.encoder.level3_0(output2_cat)  # x8
+        for i, layer in enumerate(self.encoder.level3):
+            if i == 0:
+                output3 = layer(output3_0)
+            else:
+                output3 = layer(output3)
+        output3_cat = self.encoder.BR3(paddle.concat([output3_0, output3], 1))
+        enc_final = self.encoder.classifier(output3_cat)  # x8
+
+        dec_stage1 = self.bn_3(self.up(enc_final))  # x4
+        stage1_confidence = paddle.max(F.softmax(dec_stage1), axis=1)
+        stage1_gate = (1 - stage1_confidence).unsqueeze(1)
+
+        dec_stage2_0 = self.level2_C(output2)  # x4
+        dec_stage2 = self.bn_2(
+            self.up(dec_stage2_0 * stage1_gate + dec_stage1))  # x2
+
+        out = self.classifier(dec_stage2)  # x
+
+        return [out]
+
+
+def channel_shuffle(x, groups):
+    x_shape = paddle.shape(x)
+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
+    num_channels = x.shape[1]
+    channels_per_group = num_channels // groups
+
+    # reshape
+    x = paddle.reshape(
+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
+
+    # transpose
+    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
+
+    # flatten
+    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
+
+    return x
+
+
+class CBR(nn.Layer):
+    '''
+    This class defines the convolution layer with batch normalization and PReLU activation
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+
+        self.conv = nn.Conv2D(
+            nIn,
+            nOut, (kSize, kSize),
+            stride=stride,
+            padding=(padding, padding),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(nOut)
+        self.act = nn.PReLU(nOut)
+
+    def forward(self, input):
+        output = self.conv(input)
+        output = self.bn(output)
+        output = self.act(output)
+        return output
+
+
+class SeparableCBR(nn.Layer):
+    '''
+    This class defines the convolution layer with batch normalization and PReLU activation
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                nIn,
+                nIn, (kSize, kSize),
+                stride=stride,
+                padding=(padding, padding),
+                groups=nIn,
+                bias_attr=False),
+            nn.Conv2D(
+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
+        self.bn = nn.BatchNorm(nOut)
+        self.act = nn.PReLU(nOut)
+
+    def forward(self, input):
+        output = self.conv(input)
+        output = self.bn(output)
+        output = self.act(output)
+        return output
+
+
+class SqueezeBlock(nn.Layer):
+    def __init__(self, exp_size, divide=4.0):
+        super(SqueezeBlock, self).__init__()
+
+        if divide > 1:
+            self.dense = nn.Sequential(
+                nn.Linear(exp_size, int(exp_size / divide)),
+                nn.PReLU(int(exp_size / divide)),
+                nn.Linear(int(exp_size / divide), exp_size),
+                nn.PReLU(exp_size), )
+        else:
+            self.dense = nn.Sequential(
+                nn.Linear(exp_size, exp_size), nn.PReLU(exp_size))
+
+    def forward(self, x):
+        alpha = F.adaptive_avg_pool2d(x, [1, 1])
+        alpha = paddle.squeeze(alpha, axis=[2, 3])
+        alpha = self.dense(alpha)
+        alpha = paddle.unsqueeze(alpha, axis=[2, 3])
+        out = x * alpha
+        return out
+
+
+class SESeparableCBR(nn.Layer):
+    '''
+    This class defines the convolution layer with batch normalization and PReLU activation
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1, divide=2.0):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                nIn,
+                nIn, (kSize, kSize),
+                stride=stride,
+                padding=(padding, padding),
+                groups=nIn,
+                bias_attr=False),
+            SqueezeBlock(
+                nIn, divide=divide),
+            nn.Conv2D(
+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
+
+        self.bn = nn.BatchNorm(nOut)
+        self.act = nn.PReLU(nOut)
+
+    def forward(self, input):
+        output = self.conv(input)
+        output = self.bn(output)
+        output = self.act(output)
+        return output
+
+
+class BR(nn.Layer):
+    '''
+    This class groups the batch normalization and PReLU activation
+    '''
+
+    def __init__(self, nOut):
+        super().__init__()
+        self.bn = nn.BatchNorm(nOut)
+        self.act = nn.PReLU(nOut)
+
+    def forward(self, input):
+        output = self.bn(input)
+        output = self.act(output)
+        return output
+
+
+class CB(nn.Layer):
+    '''
+    This class groups the convolution and batch normalization
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+        self.conv = nn.Conv2D(
+            nIn,
+            nOut, (kSize, kSize),
+            stride=stride,
+            padding=(padding, padding),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(nOut)
+
+    def forward(self, input):
+        output = self.conv(input)
+        output = self.bn(output)
+        return output
+
+
+class C(nn.Layer):
+    '''
+    This class is for a convolutional layer.
+    '''
+
+    def __init__(self, nIn, nOut, kSize, stride=1, group=1):
+        super().__init__()
+        padding = int((kSize - 1) / 2)
+        self.conv = nn.Conv2D(
+            nIn,
+            nOut, (kSize, kSize),
+            stride=stride,
+            padding=(padding, padding),
+            bias_attr=False,
+            groups=group)
+
+    def forward(self, input):
+        output = self.conv(input)
+        return output
+
+
+class S2block(nn.Layer):
+    '''
+    This class defines the dilated convolution.
+    '''
+
+    def __init__(self, nIn, nOut, kSize, avgsize):
+        super().__init__()
+
+        self.resolution_down = False
+        if avgsize > 1:
+            self.resolution_down = True
+            self.down_res = nn.AvgPool2D(avgsize, avgsize)
+            self.up_res = nn.UpsamplingBilinear2D(scale_factor=avgsize)
+            self.avgsize = avgsize
+
+        padding = int((kSize - 1) / 2)
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                nIn,
+                nIn,
+                kernel_size=(kSize, kSize),
+                stride=1,
+                padding=(padding, padding),
+                groups=nIn,
+                bias_attr=False),
+            nn.BatchNorm(nIn))
+
+        self.act_conv1x1 = nn.Sequential(
+            nn.PReLU(nIn),
+            nn.Conv2D(
+                nIn, nOut, kernel_size=1, stride=1, bias_attr=False), )
+
+        self.bn = nn.BatchNorm(nOut)
+
+    def forward(self, input):
+        if self.resolution_down:
+            input = self.down_res(input)
+        output = self.conv(input)
+
+        output = self.act_conv1x1(output)
+        if self.resolution_down:
+            output = self.up_res(output)
+        return self.bn(output)
+
+
+class S2module(nn.Layer):
+    '''
+    This class defines the ESP block, which is based on the following principle
+        Reduce ---> Split ---> Transform --> Merge
+    '''
+
+    def __init__(self, nIn, nOut, add=True, config=[[3, 1], [5, 1]]):
+        super().__init__()
+
+        group_n = len(config)
+        assert group_n == 2
+        n = int(nOut / group_n)
+        n1 = nOut - group_n * n
+
+        self.c1 = C(nIn, n, 1, 1, group=group_n)
+        # self.c1 = C(nIn, n, 1, 1)
+
+        for i in range(group_n):
+            if i == 0:
+                self.layer_0 = S2block(
+                    n, n + n1, kSize=config[i][0], avgsize=config[i][1])
+            else:
+                self.layer_1 = S2block(
+                    n, n, kSize=config[i][0], avgsize=config[i][1])
+
+        self.BR = BR(nOut)
+        self.add = add
+        self.group_n = group_n
+
+    def forward(self, input):
+        output1 = self.c1(input)
+        output1 = channel_shuffle(output1, self.group_n)
+        res_0 = self.layer_0(output1)
+        res_1 = self.layer_1(output1)
+        combine = paddle.concat([res_0, res_1], 1)
+
+        if self.add:
+            combine = input + combine
+        output = self.BR(combine)
+        return output
+
+
+class SINetEncoder(nn.Layer):
+    def __init__(self,
+                 config,
+                 in_channels=3,
+                 num_classes=2,
+                 stage2_blocks=2,
+                 stage3_blocks=8):
+        super().__init__()
+        assert stage2_blocks == 2
+        dim1 = 16
+        dim2 = 48
+        dim3 = 96
+
+        self.level1 = CBR(in_channels, 12, 3, 2)
+
+        self.level2_0 = SESeparableCBR(12, dim1, 3, 2, divide=1)
+
+        self.level2 = nn.LayerList()
+        for i in range(0, stage2_blocks):
+            if i == 0:
+                self.level2.append(
+                    S2module(
+                        dim1, dim2, config=config[i], add=False))
+            else:
+                self.level2.append(S2module(dim2, dim2, config=config[i]))
+        self.BR2 = BR(dim2 + dim1)
+
+        self.level3_0 = SESeparableCBR(dim2 + dim1, dim2, 3, 2, divide=2)
+        self.level3 = nn.LayerList()
+        for i in range(0, stage3_blocks):
+            if i == 0:
+                self.level3.append(
+                    S2module(
+                        dim2, dim3, config=config[2 + i], add=False))
+            else:
+                self.level3.append(S2module(dim3, dim3, config=config[2 + i]))
+        self.BR3 = BR(dim3 + dim2)
+
+        self.classifier = C(dim3 + dim2, num_classes, 1, 1)
+
+    def forward(self, input):
+        output1 = self.level1(input)  # x2
+
+        output2_0 = self.level2_0(output1)  # x4
+        for i, layer in enumerate(self.level2):
+            if i == 0:
+                output2 = layer(output2_0)
+            else:
+                output2 = layer(output2)
+
+        output3_0 = self.level3_0(
+            self.BR2(paddle.concat([output2_0, output2], 1)))  # x8
+        for i, layer in enumerate(self.level3):
+            if i == 0:
+                output3 = layer(output3_0)
+            else:
+                output3 = layer(output3)
+
+        output3_cat = self.BR3(paddle.concat([output3_0, output3], 1))
+        classifier = self.classifier(output3_cat)
+        return classifier

+ 1 - 1
paddlers/models/ppseg/models/stdcseg.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 155 - 0
paddlers/models/ppseg/models/topformer.py

@@ -0,0 +1,155 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.models import layers
+from paddlers.models.ppseg.utils import utils
+from paddlers.models.ppseg.models.backbones.top_transformer import ConvBNAct
+
+
+@manager.MODELS.add_component
+class TopFormer(nn.Layer):
+    """
+    The Token Pyramid Transformer(TopFormer) implementation based on PaddlePaddle.
+
+    The original article refers to
+    Zhang, Wenqiang, Zilong Huang, Guozhong Luo, Tao Chen, Xinggang Wang, Wenyu Liu, Gang Yu,
+    and Chunhua Shen. "TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation." 
+    In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition,
+    pp. 12083-12093. 2022.
+
+    This model refers to https://github.com/hustvl/TopFormer.
+
+    Args:
+        num_classes(int,optional): The unique number of target classes.
+        backbone(nn.Layer): Backbone network.
+        head_use_dw (bool, optional): Whether the head use depthwise convolutions. Default: False.
+        align_corners (bool, optional): Set the align_corners in resizing. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 head_use_dw=False,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+        self.backbone = backbone
+
+        head_in_channels = [
+            i for i in backbone.injection_out_channels if i is not None
+        ]
+        self.decode_head = TopFormerHead(
+            num_classes=num_classes,
+            in_channels=head_in_channels,
+            use_dw=head_use_dw,
+            align_corners=align_corners)
+
+        self.align_corners = align_corners
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+        x = self.backbone(x)  # len=3, 1/8,1/16,1/32
+        x = self.decode_head(x)
+        x = F.interpolate(
+            x, x_hw, mode='bilinear', align_corners=self.align_corners)
+
+        return [x]
+
+
+class TopFormerHead(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 in_index=[0, 1, 2],
+                 in_transform='multiple_select',
+                 use_dw=False,
+                 dropout_ratio=0.1,
+                 align_corners=False):
+        super().__init__()
+
+        self.in_index = in_index
+        self.in_transform = in_transform
+        self.align_corners = align_corners
+
+        self._init_inputs(in_channels, in_index, in_transform)
+        self.linear_fuse = ConvBNAct(
+            in_channels=self.last_channels,
+            out_channels=self.last_channels,
+            kernel_size=1,
+            stride=1,
+            groups=self.last_channels if use_dw else 1,
+            act=nn.ReLU)
+        self.dropout = nn.Dropout2D(dropout_ratio)
+        self.conv_seg = nn.Conv2D(
+            self.last_channels, num_classes, kernel_size=1)
+
+    def _init_inputs(self, in_channels, in_index, in_transform):
+        assert in_transform in [None, 'resize_concat', 'multiple_select']
+        if in_transform is not None:
+            assert len(in_channels) == len(in_index)
+            if in_transform == 'resize_concat':
+                self.last_channels = sum(in_channels)
+            else:
+                self.last_channels = in_channels[0]
+        else:
+            assert isinstance(in_channels, int)
+            assert isinstance(in_index, int)
+            self.last_channels = in_channels
+
+    def _transform_inputs(self, inputs):
+        if self.in_transform == 'resize_concat':
+            inputs = [inputs[i] for i in self.in_index]
+            inputs = [
+                F.interpolate(
+                    input_data=x,
+                    size=paddle.shape(inputs[0])[2:],
+                    mode='bilinear',
+                    align_corners=self.align_corners) for x in inputs
+            ]
+            inputs = paddle.concat(inputs, axis=1)
+        elif self.in_transform == 'multiple_select':
+            inputs_tmp = [inputs[i] for i in self.in_index]
+            inputs = inputs_tmp[0]
+            for x in inputs_tmp[1:]:
+                x = F.interpolate(
+                    x,
+                    size=paddle.shape(inputs)[2:],
+                    mode='bilinear',
+                    align_corners=self.align_corners)
+                inputs += x
+        else:
+            inputs = inputs[self.in_index]
+
+        return inputs
+
+    def forward(self, x):
+        x = self._transform_inputs(x)
+        x = self.linear_fuse(x)
+        x = self.dropout(x)
+        x = self.conv_seg(x)
+        return x

+ 5 - 5
paddlers/models/ppseg/models/u2net.py

@@ -34,15 +34,15 @@ class U2Net(nn.Layer):
 
 
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
-        in_ch (int, optional): Input channels. Default: 3.
+        in_channels (int, optional): Input channels. Default: 3.
         pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
         pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
 
 
     """
     """
 
 
-    def __init__(self, num_classes, in_ch=3, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
         super(U2Net, self).__init__()
         super(U2Net, self).__init__()
 
 
-        self.stage1 = RSU7(in_ch, 32, 64)
+        self.stage1 = RSU7(in_channels, 32, 64)
         self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
         self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
 
 
         self.stage2 = RSU6(64, 32, 128)
         self.stage2 = RSU6(64, 32, 128)
@@ -153,10 +153,10 @@ class U2Net(nn.Layer):
 class U2Netp(nn.Layer):
 class U2Netp(nn.Layer):
     """Please Refer to U2Net above."""
     """Please Refer to U2Net above."""
 
 
-    def __init__(self, num_classes, in_ch=3, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
         super(U2Netp, self).__init__()
         super(U2Netp, self).__init__()
 
 
-        self.stage1 = RSU7(in_ch, 16, 64)
+        self.stage1 = RSU7(in_channels, 16, 64)
         self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
         self.pool12 = nn.MaxPool2D(2, stride=2, ceil_mode=True)
 
 
         self.stage2 = RSU6(64, 16, 64)
         self.stage2 = RSU6(64, 16, 64)

+ 5 - 5
paddlers/models/ppseg/models/unet.py

@@ -36,18 +36,19 @@ class UNet(nn.Layer):
             is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
             is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
         use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
         use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
             If False, use resize_bilinear. Default: False.
             If False, use resize_bilinear. Default: False.
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
         pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None.
     """
     """
 
 
     def __init__(self,
     def __init__(self,
                  num_classes,
                  num_classes,
-                 input_channel=3,
                  align_corners=False,
                  align_corners=False,
                  use_deconv=False,
                  use_deconv=False,
+                 in_channels=3,
                  pretrained=None):
                  pretrained=None):
         super().__init__()
         super().__init__()
 
 
-        self.encode = Encoder(input_channel)
+        self.encode = Encoder(in_channels)
         self.decode = Decoder(align_corners, use_deconv=use_deconv)
         self.decode = Decoder(align_corners, use_deconv=use_deconv)
         self.cls = self.conv = nn.Conv2D(
         self.cls = self.conv = nn.Conv2D(
             in_channels=64,
             in_channels=64,
@@ -73,12 +74,11 @@ class UNet(nn.Layer):
 
 
 
 
 class Encoder(nn.Layer):
 class Encoder(nn.Layer):
-    def __init__(self, input_channel=3):
+    def __init__(self, in_channels=3):
         super().__init__()
         super().__init__()
 
 
         self.double_conv = nn.Sequential(
         self.double_conv = nn.Sequential(
-            layers.ConvBNReLU(input_channel, 64, 3),
-            layers.ConvBNReLU(64, 64, 3))
+            layers.ConvBNReLU(in_channels, 64, 3), layers.ConvBNReLU(64, 64, 3))
         down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
         down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]]
         self.down_sample_list = nn.LayerList([
         self.down_sample_list = nn.LayerList([
             self.down_sampling(channel[0], channel[1])
             self.down_sampling(channel[0], channel[1])

+ 2 - 2
paddlers/models/ppseg/models/unet_plusplus.py

@@ -31,8 +31,8 @@ class UNetPlusPlus(nn.Layer):
     (https://arxiv.org/abs/1807.10165).
     (https://arxiv.org/abs/1807.10165).
 
 
     Args:
     Args:
-        in_channels (int): The channel number of input image.
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channel number of input image. Default: 3.
         use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
         use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling.
             If False, use resize_bilinear. Default: False.
             If False, use resize_bilinear. Default: False.
         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@@ -42,8 +42,8 @@ class UNetPlusPlus(nn.Layer):
         """
         """
 
 
     def __init__(self,
     def __init__(self,
-                 in_channels,
                  num_classes,
                  num_classes,
+                 in_channels=3,
                  use_deconv=False,
                  use_deconv=False,
                  align_corners=False,
                  align_corners=False,
                  pretrained=None,
                  pretrained=None,

+ 173 - 0
paddlers/models/ppseg/models/upernet.py

@@ -0,0 +1,173 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddlers.models.ppseg import utils
+from paddlers.models.ppseg.cvlibs import manager
+from paddlers.models.ppseg.models import layers
+
+
+@manager.MODELS.add_component
+class UPerNet(nn.Layer):
+    """
+    The UPerNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Tete Xiao, et, al. "Unified Perceptual Parsing for Scene Understanding"
+    (https://arxiv.org/abs/1807.10221).
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
+        backbone_indices (tuple): Four values in the tuple indicate the indices of output of backbone.
+        channels (int): The channels of inter layers. Default: 512.
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        dropout_prob (float): Dropout ratio for upernet head. Default: 0.1.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices,
+                 channels=512,
+                 enable_auxiliary_loss=False,
+                 align_corners=False,
+                 dropout_prob=0.1,
+                 pretrained=None):
+        super().__init__()
+        self.backbone = backbone
+        self.backbone_indices = backbone_indices
+        self.in_channels = [
+            self.backbone.feat_channels[i] for i in backbone_indices
+        ]
+        self.align_corners = align_corners
+        self.pretrained = pretrained
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+        fpn_inplanes = [
+            self.backbone.feat_channels[i] for i in backbone_indices
+        ]
+        self.head = UPerNetHead(
+            num_classes=num_classes,
+            fpn_inplanes=fpn_inplanes,
+            dropout_prob=dropout_prob,
+            channels=channels,
+            enable_auxiliary_loss=self.enable_auxiliary_loss)
+        self.init_weight()
+
+    def forward(self, x):
+        feats = self.backbone(x)
+        feats = [feats[i] for i in self.backbone_indices]
+        logit_list = self.head(feats)
+        logit_list = [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class UPerNetHead(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 fpn_inplanes,
+                 channels,
+                 dropout_prob=0.1,
+                 enable_auxiliary_loss=False,
+                 align_corners=True):
+        super(UPerNetHead, self).__init__()
+        self.align_corners = align_corners
+        self.ppm = layers.PPModule(
+            in_channels=fpn_inplanes[-1],
+            out_channels=channels,
+            bin_sizes=(1, 2, 3, 6),
+            dim_reduction=True,
+            align_corners=True)
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.lateral_convs = nn.LayerList()
+        self.fpn_convs = nn.LayerList()
+
+        for fpn_inplane in fpn_inplanes[:-1]:
+            self.lateral_convs.append(
+                layers.ConvBNReLU(fpn_inplane, channels, 1))
+            self.fpn_convs.append(
+                layers.ConvBNReLU(
+                    channels, channels, 3, bias_attr=False))
+
+        if self.enable_auxiliary_loss:
+            self.aux_head = layers.AuxLayer(
+                fpn_inplanes[2],
+                fpn_inplanes[2],
+                num_classes,
+                dropout_prob=dropout_prob)
+
+        self.fpn_bottleneck = layers.ConvBNReLU(
+            len(fpn_inplanes) * channels, channels, 3, padding=1)
+
+        self.conv_last = nn.Sequential(
+            layers.ConvBNReLU(
+                len(fpn_inplanes) * channels, channels, 3, bias_attr=False),
+            nn.Conv2D(
+                channels, num_classes, kernel_size=1))
+        self.conv_seg = nn.Conv2D(channels, num_classes, kernel_size=1)
+
+    def forward(self, inputs):
+        laterals = []
+        for i, lateral_conv in enumerate(self.lateral_convs):
+            laterals.append(lateral_conv(inputs[i]))
+
+        laterals.append(self.ppm(inputs[-1]))
+        fpn_levels = len(laterals)
+        for i in range(fpn_levels - 1, 0, -1):
+            prev_shape = paddle.shape(laterals[i - 1])
+            laterals[i - 1] = laterals[i - 1] + F.interpolate(
+                laterals[i],
+                size=prev_shape[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+
+        fpn_outs = []
+        for i in range(fpn_levels - 1):
+            fpn_outs.append(self.fpn_convs[i](laterals[i]))
+        fpn_outs.append(laterals[-1])
+
+        for i in range(fpn_levels - 1, 0, -1):
+            fpn_outs[i] = F.interpolate(
+                fpn_outs[i],
+                size=paddle.shape(fpn_outs[0])[2:],
+                mode='bilinear',
+                align_corners=self.align_corners)
+        fuse_out = paddle.concat(fpn_outs, axis=1)
+        x = self.fpn_bottleneck(fuse_out)
+
+        x = self.conv_seg(x)
+        logits_list = [x]
+        if self.enable_auxiliary_loss:
+            aux_out = self.aux_head(inputs[2])
+            logits_list.append(aux_out)
+            return logits_list
+        else:
+            return logits_list

+ 8 - 1
paddlers/models/ppseg/transforms/functional.py

@@ -15,7 +15,14 @@
 import cv2
 import cv2
 import numpy as np
 import numpy as np
 from PIL import Image, ImageEnhance
 from PIL import Image, ImageEnhance
-from scipy.ndimage.morphology import distance_transform_edt
+from scipy.ndimage import distance_transform_edt
+
+
+def rescale_size(img_size, target_size):
+    scale = min(
+        max(target_size) / max(img_size), min(target_size) / min(img_size))
+    rescaled_size = [round(i * scale) for i in img_size]
+    return rescaled_size, scale
 
 
 
 
 def normalize(im, mean, std):
 def normalize(im, mean, std):

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 261 - 374
paddlers/models/ppseg/transforms/transforms.py


+ 0 - 1
paddlers/models/ppseg/utils/__init__.py

@@ -19,5 +19,4 @@ from .env import seg_env, get_sys_env
 from .utils import *
 from .utils import *
 from .timer import TimeAverager, calculate_eta
 from .timer import TimeAverager, calculate_eta
 from . import visualize
 from . import visualize
-from .config_check import config_check
 from .ema import EMA
 from .ema import EMA

+ 0 - 59
paddlers/models/ppseg/utils/config_check.py

@@ -1,59 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-
-
-def config_check(cfg, train_dataset=None, val_dataset=None):
-    """
-    To check config。
-
-    Args:
-        cfg (paddleseg.cvlibs.Config): An object of paddleseg.cvlibs.Config.
-        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
-        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
-    """
-
-    num_classes_check(cfg, train_dataset, val_dataset)
-
-
-def num_classes_check(cfg, train_dataset, val_dataset):
-    """"
-    Check that the num_classes in model, train_dataset and val_dataset is consistent.
-    """
-    num_classes_set = set()
-    if train_dataset and hasattr(train_dataset, 'num_classes'):
-        num_classes_set.add(train_dataset.num_classes)
-    if val_dataset and hasattr(val_dataset, 'num_classes'):
-        num_classes_set.add(val_dataset.num_classes)
-    if cfg.dic.get('model', None) and cfg.dic['model'].get('num_classes', None):
-        num_classes_set.add(cfg.dic['model'].get('num_classes'))
-    if (not cfg.train_dataset) and (not cfg.val_dataset):
-        raise ValueError(
-            'One of `train_dataset` or `val_dataset should be given, but there are none.'
-        )
-    if len(num_classes_set) == 0:
-        raise ValueError(
-            '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
-        )
-    elif len(num_classes_set) > 1:
-        raise ValueError(
-            '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
-            .format(num_classes_set))
-    else:
-        num_classes = num_classes_set.pop()
-        if train_dataset:
-            train_dataset.num_classes = num_classes
-        if val_dataset:
-            val_dataset.num_classes = num_classes

+ 1 - 1
paddlers/models/ppseg/utils/env/__init__.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License"
 # Licensed under the Apache License, Version 2.0 (the "License"
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 1 - 1
paddlers/models/ppseg/utils/env/seg_env.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022  PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License"
 # Licensed under the Apache License, Version 2.0 (the "License"
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 9 - 3
paddlers/models/ppseg/utils/env/sys_env.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@ import sys
 
 
 import cv2
 import cv2
 import paddle
 import paddle
+import paddlers.models.ppseg as ppseg
 
 
 IS_WINDOWS = sys.platform == 'win32'
 IS_WINDOWS = sys.platform == 'win32'
 
 
@@ -57,8 +58,12 @@ def _get_nvcc_info(cuda_home):
     if cuda_home is not None and os.path.isdir(cuda_home):
     if cuda_home is not None and os.path.isdir(cuda_home):
         try:
         try:
             nvcc = os.path.join(cuda_home, 'bin/nvcc')
             nvcc = os.path.join(cuda_home, 'bin/nvcc')
-            nvcc = subprocess.check_output(
-                "{} -V".format(nvcc), shell=True).decode()
+            if not IS_WINDOWS:
+                nvcc = subprocess.check_output(
+                    "{} -V".format(nvcc), shell=True).decode()
+            else:
+                nvcc = subprocess.check_output(
+                    "\"{}\" -V".format(nvcc), shell=True).decode()
             nvcc = nvcc.strip().split('\n')[-1]
             nvcc = nvcc.strip().split('\n')[-1]
         except subprocess.SubprocessError:
         except subprocess.SubprocessError:
             nvcc = "Not Available"
             nvcc = "Not Available"
@@ -116,6 +121,7 @@ def get_sys_env():
     except:
     except:
         pass
         pass
 
 
+    env_info['PaddleSeg'] = ppseg.__version__
     env_info['PaddlePaddle'] = paddle.__version__
     env_info['PaddlePaddle'] = paddle.__version__
     env_info['OpenCV'] = cv2.__version__
     env_info['OpenCV'] = cv2.__version__
 
 

+ 36 - 34
paddlers/models/ppseg/utils/metrics.py

@@ -135,37 +135,6 @@ def mean_iou(intersect_area, pred_area, label_area):
     return np.array(class_iou), miou
     return np.array(class_iou), miou
 
 
 
 
-def fwiou(intersect_area, pred_area, label_area):
-    """
-    Calculate iou.
-
-    Args:
-        intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
-        pred_area (Tensor): The prediction area on all classes.
-        label_area (Tensor): The ground truth area on all classes.
-
-    Returns:
-        np.ndarray: iou on all classes.
-        float: Frequency Weighted iou of all classes.
-        np.ndarray: Frequency of all classes.
-    """
-    intersect_area = intersect_area.numpy()
-    pred_area = pred_area.numpy()
-    label_area = label_area.numpy()
-    union = pred_area + label_area - intersect_area
-    class_iou = []
-    for i in range(len(intersect_area)):
-        if union[i] == 0:
-            iou = 0
-        else:
-            iou = intersect_area[i] / union[i]
-        class_iou.append(iou)
-    fw = label_area / np.sum(label_area)
-    fwious = np.array(fw) * np.array(class_iou)
-    fwiou = np.sum(fwious)
-    return np.array(class_iou), fwiou, fw
-
-
 def dice(intersect_area, pred_area, label_area):
 def dice(intersect_area, pred_area, label_area):
     """
     """
     Calculate DICE.
     Calculate DICE.
@@ -194,6 +163,7 @@ def dice(intersect_area, pred_area, label_area):
     return np.array(class_dice), mdice
     return np.array(class_dice), mdice
 
 
 
 
+# This is a deprecated function, please use class_measurement function.
 def accuracy(intersect_area, pred_area):
 def accuracy(intersect_area, pred_area):
     """
     """
     Calculate accuracy
     Calculate accuracy
@@ -219,6 +189,38 @@ def accuracy(intersect_area, pred_area):
     return np.array(class_acc), macc
     return np.array(class_acc), macc
 
 
 
 
+def class_measurement(intersect_area, pred_area, label_area):
+    """
+    Calculate accuracy, calss precision and class recall.
+
+    Args:
+        intersect_area (Tensor): The intersection area of prediction and ground truth on all classes.
+        pred_area (Tensor): The prediction area on all classes.
+        label_area (Tensor): The ground truth area on all classes.
+
+    Returns:
+        float: The mean accuracy.
+        np.ndarray: The precision of all classes.
+        np.ndarray: The recall of all classes.
+    """
+    intersect_area = intersect_area.numpy()
+    pred_area = pred_area.numpy()
+    label_area = label_area.numpy()
+
+    mean_acc = np.sum(intersect_area) / np.sum(pred_area)
+    class_precision = []
+    class_recall = []
+    for i in range(len(intersect_area)):
+        precision = 0 if pred_area[i] == 0 \
+            else intersect_area[i] / pred_area[i]
+        recall = 0 if label_area[i] == 0 \
+            else intersect_area[i] / label_area[i]
+        class_precision.append(precision)
+        class_recall.append(recall)
+
+    return mean_acc, np.array(class_precision), np.array(class_recall)
+
+
 def kappa(intersect_area, pred_area, label_area):
 def kappa(intersect_area, pred_area, label_area):
     """
     """
     Calculate kappa coefficient
     Calculate kappa coefficient
@@ -231,9 +233,9 @@ def kappa(intersect_area, pred_area, label_area):
     Returns:
     Returns:
         float: kappa coefficient.
         float: kappa coefficient.
     """
     """
-    intersect_area = intersect_area.numpy()
-    pred_area = pred_area.numpy()
-    label_area = label_area.numpy()
+    intersect_area = intersect_area.numpy().astype(np.float64)
+    pred_area = pred_area.numpy().astype(np.float64)
+    label_area = label_area.numpy().astype(np.float64)
     total_area = np.sum(label_area)
     total_area = np.sum(label_area)
     po = np.sum(intersect_area) / total_area
     po = np.sum(intersect_area) / total_area
     pe = np.sum(pred_area * label_area) / (total_area * total_area)
     pe = np.sum(pred_area * label_area) / (total_area * total_area)

+ 1 - 1
paddlers/models/ppseg/utils/train_profiler.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 2 - 0
paddlers/models/ppseg/utils/utils.py

@@ -160,6 +160,8 @@ def get_image_list(image_path):
             for f in files:
             for f in files:
                 if '.ipynb_checkpoints' in root:
                 if '.ipynb_checkpoints' in root:
                     continue
                     continue
+                if f.startswith('.'):
+                    continue
                 if os.path.splitext(f)[-1] in valid_suffix:
                 if os.path.splitext(f)[-1] in valid_suffix:
                     image_list.append(os.path.join(root, f))
                     image_list.append(os.path.join(root, f))
     else:
     else:

+ 39 - 1
paddlers/models/ppseg/utils/visualize.py

@@ -63,7 +63,7 @@ def get_pseudo_color_map(pred, color_map=None):
         pred (numpy.ndarray): the origin predicted image.
         pred (numpy.ndarray): the origin predicted image.
         color_map (list, optional): the palette color map. Default: None,
         color_map (list, optional): the palette color map. Default: None,
             use paddleseg's default color map.
             use paddleseg's default color map.
-    
+
     Returns:
     Returns:
         (numpy.ndarray): the pseduo image.
         (numpy.ndarray): the pseduo image.
     """
     """
@@ -103,3 +103,41 @@ def get_color_map_list(num_classes, custom_color=None):
     if custom_color:
     if custom_color:
         color_map[:len(custom_color)] = custom_color
         color_map[:len(custom_color)] = custom_color
     return color_map
     return color_map
+
+
+def paste_images(image_list):
+    """
+    Paste all image to a image.
+    Args:
+        image_list (List or Tuple): The images to be pasted and their size are the same.
+    Returns:
+        result_img (PIL.Image): The pasted image.
+    """
+    assert isinstance(image_list,
+                      (list, tuple)), "image_list should be a list or tuple"
+    assert len(
+        image_list) > 1, "The length of image_list should be greater than 1"
+
+    pil_img_list = []
+    for img in image_list:
+        if isinstance(img, str):
+            assert os.path.exists(img), "The image is not existed: {}".format(
+                img)
+            img = PILImage.open(img)
+            img = np.array(img)
+        elif isinstance(img, np.ndarray):
+            img = PILImage.fromarray(img)
+        pil_img_list.append(img)
+
+    sample_img = pil_img_list[0]
+    size = sample_img.size
+    for img in pil_img_list:
+        assert size == img.size, "The image size in image_list should be the same"
+
+    width, height = sample_img.size
+    result_img = PILImage.new(sample_img.mode,
+                              (width * len(pil_img_list), height))
+    for i, img in enumerate(pil_img_list):
+        result_img.paste(img, box=(width * i, 0))
+
+    return result_img

+ 8 - 55
paddlers/rs_models/cd/losses/fccdn_loss.py

@@ -43,42 +43,13 @@ class DiceLoss(nn.Layer):
         return self.soft_dice_loss(y_pred.astype(paddle.float32), y_true)
         return self.soft_dice_loss(y_pred.astype(paddle.float32), y_true)
 
 
 
 
-class MultiClassDiceLoss(nn.Layer):
-    def __init__(
-            self,
-            weight,
-            batch=True,
-            ignore_index=-1,
-            do_softmax=False,
-            **kwargs, ):
-        super(MultiClassDiceLoss, self).__init__()
-        self.ignore_index = ignore_index
-        self.weight = weight
-        self.do_softmax = do_softmax
-        self.binary_diceloss = DiceLoss(batch)
-
-    def forward(self, y_pred, y_true):
-        if self.do_softmax:
-            y_pred = paddle.nn.functional.softmax(y_pred, axis=1)
-        y_true = F.one_hot(y_true.long(), y_pred.shape[1]).permute(0, 3, 1, 2)
-        total_loss = 0.0
-        tmp_i = 0.0
-        for i in range(y_pred.shape[1]):
-            if i != self.ignore_index:
-                diceloss = self.binary_diceloss(y_pred[:, i, :, :],
-                                                y_true[:, i, :, :])
-                total_loss += paddle.multiply(diceloss, self.weight[i])
-                tmp_i += 1.0
-        return total_loss / tmp_i
-
-
 class DiceBCELoss(nn.Layer):
 class DiceBCELoss(nn.Layer):
     """Binary change detection task loss"""
     """Binary change detection task loss"""
 
 
     def __init__(self):
     def __init__(self):
         super(DiceBCELoss, self).__init__()
         super(DiceBCELoss, self).__init__()
         self.bce_loss = nn.BCELoss()
         self.bce_loss = nn.BCELoss()
-        self.binnary_dice = DiceLoss()
+        self.binary_dice = DiceLoss()
 
 
     def forward(self, scores, labels, do_sigmoid=True):
     def forward(self, scores, labels, do_sigmoid=True):
         if len(scores.shape) > 3:
         if len(scores.shape) > 3:
@@ -87,29 +58,11 @@ class DiceBCELoss(nn.Layer):
             labels = labels.squeeze(1)
             labels = labels.squeeze(1)
         if do_sigmoid:
         if do_sigmoid:
             scores = paddle.nn.functional.sigmoid(scores.clone())
             scores = paddle.nn.functional.sigmoid(scores.clone())
-        diceloss = self.binnary_dice(scores, labels)
+        diceloss = self.binary_dice(scores, labels)
         bceloss = self.bce_loss(scores, labels)
         bceloss = self.bce_loss(scores, labels)
         return diceloss + bceloss
         return diceloss + bceloss
 
 
 
 
-class McDiceBCELoss(nn.Layer):
-    """Multi-class change detection task loss"""
-
-    def __init__(self, weight, do_sigmoid=True):
-        super(McDiceBCELoss, self).__init__()
-        self.ce_loss = nn.CrossEntropyLoss(weight)
-        self.dice = MultiClassDiceLoss(weight, do_sigmoid)
-
-    def forward(self, scores, labels):
-        if len(scores.shape) < 4:
-            scores = scores.unsqueeze(1)
-        if len(labels.shape) < 4:
-            labels = labels.unsqueeze(1)
-        diceloss = self.dice(scores, labels)
-        bceloss = self.ce_loss(scores, labels)
-        return diceloss + bceloss
-
-
 def fccdn_ssl_loss(logits_list, labels):
 def fccdn_ssl_loss(logits_list, labels):
     """
     """
     Self-supervised learning loss for change detection.
     Self-supervised learning loss for change detection.
@@ -160,11 +113,11 @@ def fccdn_ssl_loss(logits_list, labels):
 
 
     # Seg loss
     # Seg loss
     labels_downsample = labels_downsample.astype(paddle.float32)
     labels_downsample = labels_downsample.astype(paddle.float32)
-    loss_aux = 0.2 * criterion_ssl(out1, pred_seg_post_tmp1, False)
-    loss_aux += 0.2 * criterion_ssl(out2, pred_seg_pre_tmp1, False)
-    loss_aux += 0.2 * criterion_ssl(
-        out3, labels_downsample - pred_seg_post_tmp2, False)
-    loss_aux += 0.2 * criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
-                                    False)
+    loss_aux = criterion_ssl(out1, pred_seg_post_tmp1, False)
+    loss_aux += criterion_ssl(out2, pred_seg_pre_tmp1, False)
+    loss_aux += criterion_ssl(out3, labels_downsample - pred_seg_post_tmp2,
+                              False)
+    loss_aux += criterion_ssl(out4, labels_downsample - pred_seg_pre_tmp2,
+                              False)
 
 
     return loss_aux
     return loss_aux

+ 1 - 1
paddlers/rs_models/clas/__init__.py

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
 
 
-from .condensenet_v2 import CondenseNetV2_a, CondenseNetV2_b, CondenseNetV2_c
+from .condensenetv2 import CondenseNetV2_A, CondenseNetV2_B, CondenseNetV2_C

+ 442 - 442
paddlers/rs_models/clas/condensenet_v2.py → paddlers/rs_models/clas/condensenetv2.py

@@ -1,442 +1,442 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-This code is based on https://github.com/AgentMaker/Paddle-Image-Models
-Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
-Apache License [see LICENSE for details]
-"""
-
-import paddle
-import paddle.nn as nn
-
-__all__ = ["CondenseNetV2_a", "CondenseNetV2_b", "CondenseNetV2_c"]
-
-
-class SELayer(nn.Layer):
-    def __init__(self, inplanes, reduction=16):
-        super(SELayer, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
-        self.fc = nn.Sequential(
-            nn.Linear(
-                inplanes, inplanes // reduction, bias_attr=False),
-            nn.ReLU(),
-            nn.Linear(
-                inplanes // reduction, inplanes, bias_attr=False),
-            nn.Sigmoid(), )
-
-    def forward(self, x):
-        b, c, _, _ = x.shape
-        y = self.avg_pool(x).reshape((b, c))
-        y = self.fc(y).reshape((b, c, 1, 1))
-        return x * paddle.expand(y, shape=x.shape)
-
-
-class HS(nn.Layer):
-    def __init__(self):
-        super(HS, self).__init__()
-        self.relu6 = nn.ReLU6()
-
-    def forward(self, inputs):
-        return inputs * self.relu6(inputs + 3) / 6
-
-
-class Conv(nn.Sequential):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            padding=0,
-            groups=1,
-            activation="ReLU",
-            bn_momentum=0.9, ):
-        super(Conv, self).__init__()
-        self.add_sublayer(
-            "norm", nn.BatchNorm2D(
-                in_channels, momentum=bn_momentum))
-        if activation == "ReLU":
-            self.add_sublayer("activation", nn.ReLU())
-        elif activation == "HS":
-            self.add_sublayer("activation", HS())
-        else:
-            raise NotImplementedError
-        self.add_sublayer(
-            "conv",
-            nn.Conv2D(
-                in_channels,
-                out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=padding,
-                bias_attr=False,
-                groups=groups, ), )
-
-
-def ShuffleLayer(x, groups):
-    batchsize, num_channels, height, width = x.shape
-    channels_per_group = num_channels // groups
-    # Reshape
-    x = x.reshape((batchsize, groups, channels_per_group, height, width))
-    # Transpose
-    x = x.transpose((0, 2, 1, 3, 4))
-    # Reshape
-    x = x.reshape((batchsize, groups * channels_per_group, height, width))
-    return x
-
-
-def ShuffleLayerTrans(x, groups):
-    batchsize, num_channels, height, width = x.shape
-    channels_per_group = num_channels // groups
-    # Reshape
-    x = x.reshape((batchsize, channels_per_group, groups, height, width))
-    # Transpose
-    x = x.transpose((0, 2, 1, 3, 4))
-    # Reshape
-    x = x.reshape((batchsize, channels_per_group * groups, height, width))
-    return x
-
-
-class CondenseLGC(nn.Layer):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            padding=0,
-            groups=1,
-            activation="ReLU", ):
-        super(CondenseLGC, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.groups = groups
-        self.norm = nn.BatchNorm2D(self.in_channels)
-        if activation == "ReLU":
-            self.activation = nn.ReLU()
-        elif activation == "HS":
-            self.activation = HS()
-        else:
-            raise NotImplementedError
-        self.conv = nn.Conv2D(
-            self.in_channels,
-            self.out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=self.groups,
-            bias_attr=False, )
-        self.register_buffer(
-            "index", paddle.zeros(
-                (self.in_channels, ), dtype="int64"))
-
-    def forward(self, x):
-        x = paddle.index_select(x, self.index, axis=1)
-        x = self.norm(x)
-        x = self.activation(x)
-        x = self.conv(x)
-        x = ShuffleLayer(x, self.groups)
-        return x
-
-
-class CondenseSFR(nn.Layer):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            padding=0,
-            groups=1,
-            activation="ReLU", ):
-        super(CondenseSFR, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.groups = groups
-        self.norm = nn.BatchNorm2D(self.in_channels)
-        if activation == "ReLU":
-            self.activation = nn.ReLU()
-        elif activation == "HS":
-            self.activation = HS()
-        else:
-            raise NotImplementedError
-        self.conv = nn.Conv2D(
-            self.in_channels,
-            self.out_channels,
-            kernel_size=kernel_size,
-            padding=padding,
-            groups=self.groups,
-            bias_attr=False,
-            stride=stride, )
-        self.register_buffer("index",
-                             paddle.zeros(
-                                 (self.out_channels, self.out_channels)))
-
-    def forward(self, x):
-        x = self.norm(x)
-        x = self.activation(x)
-        x = ShuffleLayerTrans(x, self.groups)
-        x = self.conv(x)  # SIZE: N, C, H, W
-        N, C, H, W = x.shape
-        x = x.reshape((N, C, H * W))
-        x = x.transpose((0, 2, 1))  # SIZE: N, HW, C
-        # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
-        x = paddle.matmul(x, self.index)
-        x = x.transpose((0, 2, 1))  # SIZE: N, C, HW
-        x = x.reshape((N, C, H, W))  # SIZE: N, C, HW
-        return x
-
-
-class _SFR_DenseLayer(nn.Layer):
-    def __init__(
-            self,
-            in_channels,
-            growth_rate,
-            group_1x1,
-            group_3x3,
-            group_trans,
-            bottleneck,
-            activation,
-            use_se=False, ):
-        super(_SFR_DenseLayer, self).__init__()
-        self.group_1x1 = group_1x1
-        self.group_3x3 = group_3x3
-        self.group_trans = group_trans
-        self.use_se = use_se
-        # 1x1 conv i --> b*k
-        self.conv_1 = CondenseLGC(
-            in_channels,
-            bottleneck * growth_rate,
-            kernel_size=1,
-            groups=self.group_1x1,
-            activation=activation, )
-        # 3x3 conv b*k --> k
-        self.conv_2 = Conv(
-            bottleneck * growth_rate,
-            growth_rate,
-            kernel_size=3,
-            padding=1,
-            groups=self.group_3x3,
-            activation=activation, )
-        # 1x1 res conv k(8-16-32)--> i (k*l)
-        self.sfr = CondenseSFR(
-            growth_rate,
-            in_channels,
-            kernel_size=1,
-            groups=self.group_trans,
-            activation=activation, )
-        if self.use_se:
-            self.se = SELayer(inplanes=growth_rate, reduction=1)
-
-    def forward(self, x):
-        x_ = x
-        x = self.conv_1(x)
-        x = self.conv_2(x)
-        if self.use_se:
-            x = self.se(x)
-        sfr_feature = self.sfr(x)
-        y = x_ + sfr_feature
-        return paddle.concat([y, x], 1)
-
-
-class _SFR_DenseBlock(nn.Sequential):
-    def __init__(
-            self,
-            num_layers,
-            in_channels,
-            growth_rate,
-            group_1x1,
-            group_3x3,
-            group_trans,
-            bottleneck,
-            activation,
-            use_se, ):
-        super(_SFR_DenseBlock, self).__init__()
-        for i in range(num_layers):
-            layer = _SFR_DenseLayer(
-                in_channels + i * growth_rate,
-                growth_rate,
-                group_1x1,
-                group_3x3,
-                group_trans,
-                bottleneck,
-                activation,
-                use_se, )
-            self.add_sublayer("denselayer_%d" % (i + 1), layer)
-
-
-class _Transition(nn.Layer):
-    def __init__(self):
-        super(_Transition, self).__init__()
-        self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
-
-    def forward(self, x):
-        x = self.pool(x)
-        return x
-
-
-class CondenseNetV2(nn.Layer):
-    def __init__(
-            self,
-            stages,
-            growth,
-            HS_start_block,
-            SE_start_block,
-            fc_channel,
-            group_1x1,
-            group_3x3,
-            group_trans,
-            bottleneck,
-            last_se_reduction,
-            in_channels=3,
-            class_num=1000, ):
-        super(CondenseNetV2, self).__init__()
-        self.stages = stages
-        self.growth = growth
-        self.in_channels = in_channels
-        self.class_num = class_num
-        self.last_se_reduction = last_se_reduction
-        assert len(self.stages) == len(self.growth)
-        self.progress = 0.0
-
-        self.init_stride = 2
-        self.pool_size = 7
-
-        self.features = nn.Sequential()
-        # Initial nChannels should be 3
-        self.num_features = 2 * self.growth[0]
-        # Dense-block 1 (224x224)
-        self.features.add_sublayer(
-            "init_conv",
-            nn.Conv2D(
-                in_channels,
-                self.num_features,
-                kernel_size=3,
-                stride=self.init_stride,
-                padding=1,
-                bias_attr=False, ), )
-        for i in range(len(self.stages)):
-            activation = "HS" if i >= HS_start_block else "ReLU"
-            use_se = True if i >= SE_start_block else False
-            # Dense-block i
-            self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
-                           activation, use_se)
-
-        self.fc = nn.Linear(self.num_features, fc_channel)
-        self.fc_act = HS()
-
-        # Classifier layer
-        if class_num > 0:
-            self.classifier = nn.Linear(fc_channel, class_num)
-        self._initialize()
-
-    def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
-                  activation, use_se):
-        # Check if ith is the last one
-        last = i == len(self.stages) - 1
-        block = _SFR_DenseBlock(
-            num_layers=self.stages[i],
-            in_channels=self.num_features,
-            growth_rate=self.growth[i],
-            group_1x1=group_1x1,
-            group_3x3=group_3x3,
-            group_trans=group_trans,
-            bottleneck=bottleneck,
-            activation=activation,
-            use_se=use_se, )
-        self.features.add_sublayer("denseblock_%d" % (i + 1), block)
-        self.num_features += self.stages[i] * self.growth[i]
-        if not last:
-            trans = _Transition()
-            self.features.add_sublayer("transition_%d" % (i + 1), trans)
-        else:
-            self.features.add_sublayer("norm_last",
-                                       nn.BatchNorm2D(self.num_features))
-            self.features.add_sublayer("relu_last", nn.ReLU())
-            self.features.add_sublayer("pool_last",
-                                       nn.AvgPool2D(self.pool_size))
-            # if useSE:
-            self.features.add_sublayer(
-                "se_last",
-                SELayer(
-                    self.num_features, reduction=self.last_se_reduction))
-
-    def forward(self, x):
-        features = self.features(x)
-        out = features.reshape((features.shape[0], features.shape[1] *
-                                features.shape[2] * features.shape[3]))
-        out = self.fc(out)
-        out = self.fc_act(out)
-
-        if self.class_num > 0:
-            out = self.classifier(out)
-
-        return out
-
-    def _initialize(self):
-        # Initialize
-        for m in self.sublayers():
-            if isinstance(m, nn.Conv2D):
-                nn.initializer.KaimingNormal()(m.weight)
-            elif isinstance(m, nn.BatchNorm2D):
-                nn.initializer.Constant(value=1.0)(m.weight)
-                nn.initializer.Constant(value=0.0)(m.bias)
-
-
-def CondenseNetV2_a(**kwargs):
-    model = CondenseNetV2(
-        stages=[1, 1, 4, 6, 8],
-        growth=[8, 8, 16, 32, 64],
-        HS_start_block=2,
-        SE_start_block=3,
-        fc_channel=828,
-        group_1x1=8,
-        group_3x3=8,
-        group_trans=8,
-        bottleneck=4,
-        last_se_reduction=16,
-        **kwargs)
-    return model
-
-
-def CondenseNetV2_b(**kwargs):
-    model = CondenseNetV2(
-        stages=[2, 4, 6, 8, 6],
-        growth=[6, 12, 24, 48, 96],
-        HS_start_block=2,
-        SE_start_block=3,
-        fc_channel=1024,
-        group_1x1=6,
-        group_3x3=6,
-        group_trans=6,
-        bottleneck=4,
-        last_se_reduction=16,
-        **kwargs)
-    return model
-
-
-def CondenseNetV2_c(**kwargs):
-    model = CondenseNetV2(
-        stages=[4, 6, 8, 10, 8],
-        growth=[8, 16, 32, 64, 128],
-        HS_start_block=2,
-        SE_start_block=3,
-        fc_channel=1024,
-        group_1x1=8,
-        group_3x3=8,
-        group_trans=8,
-        bottleneck=4,
-        last_se_reduction=16,
-        **kwargs)
-    return model
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/AgentMaker/Paddle-Image-Models
+Ths copyright of AgentMaker/Paddle-Image-Models is as follows:
+Apache License [see LICENSE for details]
+"""
+
+import paddle
+import paddle.nn as nn
+
+__all__ = ["CondenseNetV2_A", "CondenseNetV2_B", "CondenseNetV2_C"]
+
+
+class SELayer(nn.Layer):
+    def __init__(self, inplanes, reduction=16):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
+        self.fc = nn.Sequential(
+            nn.Linear(
+                inplanes, inplanes // reduction, bias_attr=False),
+            nn.ReLU(),
+            nn.Linear(
+                inplanes // reduction, inplanes, bias_attr=False),
+            nn.Sigmoid(), )
+
+    def forward(self, x):
+        b, c, _, _ = x.shape
+        y = self.avg_pool(x).reshape((b, c))
+        y = self.fc(y).reshape((b, c, 1, 1))
+        return x * paddle.expand(y, shape=x.shape)
+
+
+class HS(nn.Layer):
+    def __init__(self):
+        super(HS, self).__init__()
+        self.relu6 = nn.ReLU6()
+
+    def forward(self, inputs):
+        return inputs * self.relu6(inputs + 3) / 6
+
+
+class Conv(nn.Sequential):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            padding=0,
+            groups=1,
+            activation="ReLU",
+            bn_momentum=0.9, ):
+        super(Conv, self).__init__()
+        self.add_sublayer(
+            "norm", nn.BatchNorm2D(
+                in_channels, momentum=bn_momentum))
+        if activation == "ReLU":
+            self.add_sublayer("activation", nn.ReLU())
+        elif activation == "HS":
+            self.add_sublayer("activation", HS())
+        else:
+            raise NotImplementedError
+        self.add_sublayer(
+            "conv",
+            nn.Conv2D(
+                in_channels,
+                out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                bias_attr=False,
+                groups=groups, ), )
+
+
+def ShuffleLayer(x, groups):
+    batchsize, num_channels, height, width = x.shape
+    channels_per_group = num_channels // groups
+    # Reshape
+    x = x.reshape((batchsize, groups, channels_per_group, height, width))
+    # Transpose
+    x = x.transpose((0, 2, 1, 3, 4))
+    # Reshape
+    x = x.reshape((batchsize, groups * channels_per_group, height, width))
+    return x
+
+
+def ShuffleLayerTrans(x, groups):
+    batchsize, num_channels, height, width = x.shape
+    channels_per_group = num_channels // groups
+    # Reshape
+    x = x.reshape((batchsize, channels_per_group, groups, height, width))
+    # Transpose
+    x = x.transpose((0, 2, 1, 3, 4))
+    # Reshape
+    x = x.reshape((batchsize, channels_per_group * groups, height, width))
+    return x
+
+
+class CondenseLGC(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            padding=0,
+            groups=1,
+            activation="ReLU", ):
+        super(CondenseLGC, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.groups = groups
+        self.norm = nn.BatchNorm2D(self.in_channels)
+        if activation == "ReLU":
+            self.activation = nn.ReLU()
+        elif activation == "HS":
+            self.activation = HS()
+        else:
+            raise NotImplementedError
+        self.conv = nn.Conv2D(
+            self.in_channels,
+            self.out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=self.groups,
+            bias_attr=False, )
+        self.register_buffer(
+            "index", paddle.zeros(
+                (self.in_channels, ), dtype="int64"))
+
+    def forward(self, x):
+        x = paddle.index_select(x, self.index, axis=1)
+        x = self.norm(x)
+        x = self.activation(x)
+        x = self.conv(x)
+        x = ShuffleLayer(x, self.groups)
+        return x
+
+
+class CondenseSFR(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            padding=0,
+            groups=1,
+            activation="ReLU", ):
+        super(CondenseSFR, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.groups = groups
+        self.norm = nn.BatchNorm2D(self.in_channels)
+        if activation == "ReLU":
+            self.activation = nn.ReLU()
+        elif activation == "HS":
+            self.activation = HS()
+        else:
+            raise NotImplementedError
+        self.conv = nn.Conv2D(
+            self.in_channels,
+            self.out_channels,
+            kernel_size=kernel_size,
+            padding=padding,
+            groups=self.groups,
+            bias_attr=False,
+            stride=stride, )
+        self.register_buffer("index",
+                             paddle.zeros(
+                                 (self.out_channels, self.out_channels)))
+
+    def forward(self, x):
+        x = self.norm(x)
+        x = self.activation(x)
+        x = ShuffleLayerTrans(x, self.groups)
+        x = self.conv(x)  # SIZE: N, C, H, W
+        N, C, H, W = x.shape
+        x = x.reshape((N, C, H * W))
+        x = x.transpose((0, 2, 1))  # SIZE: N, HW, C
+        # x SIZE: N, HW, C; self.index SIZE: C, C; OUTPUT SIZE: N, HW, C
+        x = paddle.matmul(x, self.index)
+        x = x.transpose((0, 2, 1))  # SIZE: N, C, HW
+        x = x.reshape((N, C, H, W))  # SIZE: N, C, HW
+        return x
+
+
+class _SFR_DenseLayer(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            growth_rate,
+            group_1x1,
+            group_3x3,
+            group_trans,
+            bottleneck,
+            activation,
+            use_se=False, ):
+        super(_SFR_DenseLayer, self).__init__()
+        self.group_1x1 = group_1x1
+        self.group_3x3 = group_3x3
+        self.group_trans = group_trans
+        self.use_se = use_se
+        # 1x1 conv i --> b*k
+        self.conv_1 = CondenseLGC(
+            in_channels,
+            bottleneck * growth_rate,
+            kernel_size=1,
+            groups=self.group_1x1,
+            activation=activation, )
+        # 3x3 conv b*k --> k
+        self.conv_2 = Conv(
+            bottleneck * growth_rate,
+            growth_rate,
+            kernel_size=3,
+            padding=1,
+            groups=self.group_3x3,
+            activation=activation, )
+        # 1x1 res conv k(8-16-32)--> i (k*l)
+        self.sfr = CondenseSFR(
+            growth_rate,
+            in_channels,
+            kernel_size=1,
+            groups=self.group_trans,
+            activation=activation, )
+        if self.use_se:
+            self.se = SELayer(inplanes=growth_rate, reduction=1)
+
+    def forward(self, x):
+        x_ = x
+        x = self.conv_1(x)
+        x = self.conv_2(x)
+        if self.use_se:
+            x = self.se(x)
+        sfr_feature = self.sfr(x)
+        y = x_ + sfr_feature
+        return paddle.concat([y, x], 1)
+
+
+class _SFR_DenseBlock(nn.Sequential):
+    def __init__(
+            self,
+            num_layers,
+            in_channels,
+            growth_rate,
+            group_1x1,
+            group_3x3,
+            group_trans,
+            bottleneck,
+            activation,
+            use_se, ):
+        super(_SFR_DenseBlock, self).__init__()
+        for i in range(num_layers):
+            layer = _SFR_DenseLayer(
+                in_channels + i * growth_rate,
+                growth_rate,
+                group_1x1,
+                group_3x3,
+                group_trans,
+                bottleneck,
+                activation,
+                use_se, )
+            self.add_sublayer("denselayer_%d" % (i + 1), layer)
+
+
+class _Transition(nn.Layer):
+    def __init__(self):
+        super(_Transition, self).__init__()
+        self.pool = nn.AvgPool2D(kernel_size=2, stride=2)
+
+    def forward(self, x):
+        x = self.pool(x)
+        return x
+
+
+class CondenseNetV2(nn.Layer):
+    def __init__(
+            self,
+            stages,
+            growth,
+            HS_start_block,
+            SE_start_block,
+            fc_channel,
+            group_1x1,
+            group_3x3,
+            group_trans,
+            bottleneck,
+            last_se_reduction,
+            in_channels=3,
+            class_num=1000, ):
+        super(CondenseNetV2, self).__init__()
+        self.stages = stages
+        self.growth = growth
+        self.in_channels = in_channels
+        self.class_num = class_num
+        self.last_se_reduction = last_se_reduction
+        assert len(self.stages) == len(self.growth)
+        self.progress = 0.0
+
+        self.init_stride = 2
+        self.pool_size = 7
+
+        self.features = nn.Sequential()
+        # Initial nChannels should be 3
+        self.num_features = 2 * self.growth[0]
+        # Dense-block 1 (224x224)
+        self.features.add_sublayer(
+            "init_conv",
+            nn.Conv2D(
+                in_channels,
+                self.num_features,
+                kernel_size=3,
+                stride=self.init_stride,
+                padding=1,
+                bias_attr=False, ), )
+        for i in range(len(self.stages)):
+            activation = "HS" if i >= HS_start_block else "ReLU"
+            use_se = True if i >= SE_start_block else False
+            # Dense-block i
+            self.add_block(i, group_1x1, group_3x3, group_trans, bottleneck,
+                           activation, use_se)
+
+        self.fc = nn.Linear(self.num_features, fc_channel)
+        self.fc_act = HS()
+
+        # Classifier layer
+        if class_num > 0:
+            self.classifier = nn.Linear(fc_channel, class_num)
+        self._initialize()
+
+    def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
+                  activation, use_se):
+        # Check if ith is the last one
+        last = i == len(self.stages) - 1
+        block = _SFR_DenseBlock(
+            num_layers=self.stages[i],
+            in_channels=self.num_features,
+            growth_rate=self.growth[i],
+            group_1x1=group_1x1,
+            group_3x3=group_3x3,
+            group_trans=group_trans,
+            bottleneck=bottleneck,
+            activation=activation,
+            use_se=use_se, )
+        self.features.add_sublayer("denseblock_%d" % (i + 1), block)
+        self.num_features += self.stages[i] * self.growth[i]
+        if not last:
+            trans = _Transition()
+            self.features.add_sublayer("transition_%d" % (i + 1), trans)
+        else:
+            self.features.add_sublayer("norm_last",
+                                       nn.BatchNorm2D(self.num_features))
+            self.features.add_sublayer("relu_last", nn.ReLU())
+            self.features.add_sublayer("pool_last",
+                                       nn.AvgPool2D(self.pool_size))
+            # if useSE:
+            self.features.add_sublayer(
+                "se_last",
+                SELayer(
+                    self.num_features, reduction=self.last_se_reduction))
+
+    def forward(self, x):
+        features = self.features(x)
+        out = features.reshape((features.shape[0], features.shape[1] *
+                                features.shape[2] * features.shape[3]))
+        out = self.fc(out)
+        out = self.fc_act(out)
+
+        if self.class_num > 0:
+            out = self.classifier(out)
+
+        return out
+
+    def _initialize(self):
+        # Initialize
+        for m in self.sublayers():
+            if isinstance(m, nn.Conv2D):
+                nn.initializer.KaimingNormal()(m.weight)
+            elif isinstance(m, nn.BatchNorm2D):
+                nn.initializer.Constant(value=1.0)(m.weight)
+                nn.initializer.Constant(value=0.0)(m.bias)
+
+
+def CondenseNetV2_A(**kwargs):
+    model = CondenseNetV2(
+        stages=[1, 1, 4, 6, 8],
+        growth=[8, 8, 16, 32, 64],
+        HS_start_block=2,
+        SE_start_block=3,
+        fc_channel=828,
+        group_1x1=8,
+        group_3x3=8,
+        group_trans=8,
+        bottleneck=4,
+        last_se_reduction=16,
+        **kwargs)
+    return model
+
+
+def CondenseNetV2_B(**kwargs):
+    model = CondenseNetV2(
+        stages=[2, 4, 6, 8, 6],
+        growth=[6, 12, 24, 48, 96],
+        HS_start_block=2,
+        SE_start_block=3,
+        fc_channel=1024,
+        group_1x1=6,
+        group_3x3=6,
+        group_trans=6,
+        bottleneck=4,
+        last_se_reduction=16,
+        **kwargs)
+    return model
+
+
+def CondenseNetV2_C(**kwargs):
+    model = CondenseNetV2(
+        stages=[4, 6, 8, 10, 8],
+        growth=[8, 16, 32, 64, 128],
+        HS_start_block=2,
+        SE_start_block=3,
+        fc_channel=1024,
+        group_1x1=8,
+        group_3x3=8,
+        group_trans=8,
+        bottleneck=4,
+        last_se_reduction=16,
+        **kwargs)
+    return model

+ 1 - 1
paddlers/tasks/change_detector.py

@@ -1067,7 +1067,7 @@ class FCCDN(BaseChangeDetector):
             return {
             return {
                 'types':
                 'types':
                 [seg_losses.CrossEntropyLoss(), cmcd.losses.fccdn_ssl_loss],
                 [seg_losses.CrossEntropyLoss(), cmcd.losses.fccdn_ssl_loss],
-                'coef': [1.0, 1.0]
+                'coef': [1.0, 0.2]
             }
             }
         else:
         else:
             raise ValueError(
             raise ValueError(

+ 14 - 10
paddlers/tasks/classifier.py

@@ -34,9 +34,7 @@ from paddlers.utils.checkpoint import cls_pretrain_weights_dict
 from paddlers.transforms import Resize, decode_image
 from paddlers.transforms import Resize, decode_image
 from .base import BaseModel
 from .base import BaseModel
 
 
-__all__ = [
-    "ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b"
-]
+__all__ = ["ResNet50_vd", "MobileNetV3", "HRNet", "CondenseNetV2"]
 
 
 
 
 class BaseClassifier(BaseModel):
 class BaseClassifier(BaseModel):
@@ -600,13 +598,13 @@ class ResNet50_vd(BaseClassifier):
             **params)
             **params)
 
 
 
 
-class MobileNetV3_small_x1_0(BaseClassifier):
+class MobileNetV3(BaseClassifier):
     def __init__(self,
     def __init__(self,
                  num_classes=2,
                  num_classes=2,
                  use_mixed_loss=False,
                  use_mixed_loss=False,
                  losses=None,
                  losses=None,
                  **params):
                  **params):
-        super(MobileNetV3_small_x1_0, self).__init__(
+        super(MobileNetV3, self).__init__(
             model_name='MobileNetV3_small_x1_0',
             model_name='MobileNetV3_small_x1_0',
             num_classes=num_classes,
             num_classes=num_classes,
             use_mixed_loss=use_mixed_loss,
             use_mixed_loss=use_mixed_loss,
@@ -614,13 +612,13 @@ class MobileNetV3_small_x1_0(BaseClassifier):
             **params)
             **params)
 
 
 
 
-class HRNet_W18_C(BaseClassifier):
+class HRNet(BaseClassifier):
     def __init__(self,
     def __init__(self,
                  num_classes=2,
                  num_classes=2,
                  use_mixed_loss=False,
                  use_mixed_loss=False,
                  losses=None,
                  losses=None,
                  **params):
                  **params):
-        super(HRNet_W18_C, self).__init__(
+        super(HRNet, self).__init__(
             model_name='HRNet_W18_C',
             model_name='HRNet_W18_C',
             num_classes=num_classes,
             num_classes=num_classes,
             use_mixed_loss=use_mixed_loss,
             use_mixed_loss=use_mixed_loss,
@@ -628,15 +626,21 @@ class HRNet_W18_C(BaseClassifier):
             **params)
             **params)
 
 
 
 
-class CondenseNetV2_b(BaseClassifier):
+class CondenseNetV2(BaseClassifier):
     def __init__(self,
     def __init__(self,
                  num_classes=2,
                  num_classes=2,
                  use_mixed_loss=False,
                  use_mixed_loss=False,
                  losses=None,
                  losses=None,
+                 in_channels=3,
+                 arch='A',
                  **params):
                  **params):
-        super(CondenseNetV2_b, self).__init__(
-            model_name='CondenseNetV2_b',
+        if arch not in ('A', 'B', 'C'):
+            raise ValueError("{} is not a supported architecture.".format(arch))
+        model_name = 'CondenseNetV2_' + arch
+        super(CondenseNetV2, self).__init__(
+            model_name=model_name,
             num_classes=num_classes,
             num_classes=num_classes,
             use_mixed_loss=use_mixed_loss,
             use_mixed_loss=use_mixed_loss,
             losses=losses,
             losses=losses,
+            in_channels=in_channels,
             **params)
             **params)

+ 1 - 1
paddlers/tasks/restorer.py

@@ -773,7 +773,7 @@ class LESRCNN(BaseRestorer):
                  group=1,
                  group=1,
                  **params):
                  **params):
         params.update({
         params.update({
-            'scale': sr_factor,
+            'scale': sr_factor if sr_factor is not None else 1,
             'multi_scale': multi_scale,
             'multi_scale': multi_scale,
             'group': group
             'group': group
         })
         })

+ 21 - 11
paddlers/tasks/segmenter.py

@@ -185,14 +185,7 @@ class BaseSegmenter(BaseModel):
                 )
                 )
             losses = [getattr(seg_losses, loss)() for loss in losses]
             losses = [getattr(seg_losses, loss)() for loss in losses]
             loss_type = [seg_losses.MixedLoss(losses=losses, coef=list(coef))]
             loss_type = [seg_losses.MixedLoss(losses=losses, coef=list(coef))]
-        if self.model_name == 'FastSCNN':
-            loss_type *= 2
-            loss_coef = [1.0, 0.4]
-        elif self.model_name == 'BiSeNetV2':
-            loss_type *= 5
-            loss_coef = [1.0] * 5
-        else:
-            loss_coef = [1.0]
+        loss_coef = [1.0]
         losses = {'types': loss_type, 'coef': loss_coef}
         losses = {'types': loss_type, 'coef': loss_coef}
         return losses
         return losses
 
 
@@ -761,7 +754,7 @@ class UNet(BaseSegmenter):
         })
         })
         super(UNet, self).__init__(
         super(UNet, self).__init__(
             model_name='UNet',
             model_name='UNet',
-            input_channel=in_channels,
+            in_channels=in_channels,
             num_classes=num_classes,
             num_classes=num_classes,
             use_mixed_loss=use_mixed_loss,
             use_mixed_loss=use_mixed_loss,
             losses=losses,
             losses=losses,
@@ -789,7 +782,7 @@ class DeepLabV3P(BaseSegmenter):
         if params.get('with_net', True):
         if params.get('with_net', True):
             with DisablePrint():
             with DisablePrint():
                 backbone = getattr(ppseg.models, backbone)(
                 backbone = getattr(ppseg.models, backbone)(
-                    input_channel=in_channels, output_stride=output_stride)
+                    in_channels=in_channels, output_stride=output_stride)
         else:
         else:
             backbone = None
             backbone = None
         params.update({
         params.update({
@@ -809,6 +802,7 @@ class DeepLabV3P(BaseSegmenter):
 
 
 class FastSCNN(BaseSegmenter):
 class FastSCNN(BaseSegmenter):
     def __init__(self,
     def __init__(self,
+                 in_channels=3,
                  num_classes=2,
                  num_classes=2,
                  use_mixed_loss=False,
                  use_mixed_loss=False,
                  losses=None,
                  losses=None,
@@ -817,14 +811,22 @@ class FastSCNN(BaseSegmenter):
         params.update({'align_corners': align_corners})
         params.update({'align_corners': align_corners})
         super(FastSCNN, self).__init__(
         super(FastSCNN, self).__init__(
             model_name='FastSCNN',
             model_name='FastSCNN',
+            in_channels=in_channels,
             num_classes=num_classes,
             num_classes=num_classes,
             use_mixed_loss=use_mixed_loss,
             use_mixed_loss=use_mixed_loss,
             losses=losses,
             losses=losses,
             **params)
             **params)
 
 
+    def default_loss(self):
+        losses = super(FastSCNN, self).default_loss()
+        losses['types'] *= 2
+        losses['coef'] = [1.0, 0.4]
+        return losses
+
 
 
 class HRNet(BaseSegmenter):
 class HRNet(BaseSegmenter):
     def __init__(self,
     def __init__(self,
+                 in_channels=3,
                  num_classes=2,
                  num_classes=2,
                  width=48,
                  width=48,
                  use_mixed_loss=False,
                  use_mixed_loss=False,
@@ -839,7 +841,7 @@ class HRNet(BaseSegmenter):
         if params.get('with_net', True):
         if params.get('with_net', True):
             with DisablePrint():
             with DisablePrint():
                 backbone = getattr(ppseg.models, self.backbone_name)(
                 backbone = getattr(ppseg.models, self.backbone_name)(
-                    align_corners=align_corners)
+                    in_channels=in_channels, align_corners=align_corners)
         else:
         else:
             backbone = None
             backbone = None
 
 
@@ -855,6 +857,7 @@ class HRNet(BaseSegmenter):
 
 
 class BiSeNetV2(BaseSegmenter):
 class BiSeNetV2(BaseSegmenter):
     def __init__(self,
     def __init__(self,
+                 in_channels=3,
                  num_classes=2,
                  num_classes=2,
                  use_mixed_loss=False,
                  use_mixed_loss=False,
                  losses=None,
                  losses=None,
@@ -863,11 +866,18 @@ class BiSeNetV2(BaseSegmenter):
         params.update({'align_corners': align_corners})
         params.update({'align_corners': align_corners})
         super(BiSeNetV2, self).__init__(
         super(BiSeNetV2, self).__init__(
             model_name='BiSeNetV2',
             model_name='BiSeNetV2',
+            in_channels=in_channels,
             num_classes=num_classes,
             num_classes=num_classes,
             use_mixed_loss=use_mixed_loss,
             use_mixed_loss=use_mixed_loss,
             losses=losses,
             losses=losses,
             **params)
             **params)
 
 
+    def default_loss(self):
+        losses = super(BiSeNetV2, self).default_loss()
+        losses['types'] *= 5
+        losses['coef'] = [1.0] * 5
+        return losses
+
 
 
 class FarSeg(BaseSegmenter):
 class FarSeg(BaseSegmenter):
     def __init__(self,
     def __init__(self,

+ 5 - 4
paddlers/utils/checkpoint.py

@@ -493,11 +493,12 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
             num_params_loaded = 0
             num_params_loaded = 0
             for k in model_state_dict:
             for k in model_state_dict:
                 if k not in param_state_dict:
                 if k not in param_state_dict:
-                    logging.warning("{} is not in pretrained model".format(k))
+                    logging.warning("{} is not in the pretrained model.".format(
+                        k))
                 elif list(param_state_dict[k].shape) != list(model_state_dict[k]
                 elif list(param_state_dict[k].shape) != list(model_state_dict[k]
                                                              .shape):
                                                              .shape):
                     logging.warning(
                     logging.warning(
-                        "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
+                        "[SKIP] Shape of parameters {} do not match. (pretrained: {} vs actual: {})"
                         .format(k, param_state_dict[k].shape, model_state_dict[
                         .format(k, param_state_dict[k].shape, model_state_dict[
                             k].shape))
                             k].shape))
                 else:
                 else:
@@ -507,11 +508,11 @@ def load_pretrain_weights(model, pretrain_weights=None, model_name=None):
             logging.info("There are {}/{} variables loaded into {}.".format(
             logging.info("There are {}/{} variables loaded into {}.".format(
                 num_params_loaded, len(model_state_dict), model_name))
                 num_params_loaded, len(model_state_dict), model_name))
         else:
         else:
-            raise ValueError('The pretrained model directory is not Found: {}'.
+            raise ValueError('The pretrained model directory is not found: {}'.
                              format(pretrain_weights))
                              format(pretrain_weights))
     else:
     else:
         logging.info(
         logging.info(
-            'No pretrained model to load, {} will be trained from scratch.'.
+            'No pretrained model to load. {} will be trained from scratch.'.
             format(model_name))
             format(model_name))
 
 
 
 

+ 4 - 0
test_tipc/README.md

@@ -32,6 +32,7 @@
 | 变化检测 | FC-Siam-conc | 支持 | - | - | - |
 | 变化检测 | FC-Siam-conc | 支持 | - | - | - |
 | 变化检测 | FC-Siam-diff | 支持 | - | - | - |
 | 变化检测 | FC-Siam-diff | 支持 | - | - | - |
 | 变化检测 | ChangeFormer | 支持 | - | - | - |
 | 变化检测 | ChangeFormer | 支持 | - | - | - |
+| 场景分类 | CondenseNet V2 | 支持 | - | - | - |
 | 场景分类 | HRNet | 支持 | - | - | - |
 | 场景分类 | HRNet | 支持 | - | - | - |
 | 场景分类 | MobileNetV3 | 支持 | - | - | - |
 | 场景分类 | MobileNetV3 | 支持 | - | - | - |
 | 场景分类 | ResNet50-vd | 支持 | - | - | - |
 | 场景分类 | ResNet50-vd | 支持 | - | - | - |
@@ -43,8 +44,11 @@
 | 目标检测 | PP-YOLO Tiny | 支持 | - | - | - |
 | 目标检测 | PP-YOLO Tiny | 支持 | - | - | - |
 | 目标检测 | PP-YOLOv2 | 支持 | - | - | - |
 | 目标检测 | PP-YOLOv2 | 支持 | - | - | - |
 | 目标检测 | YOLOv3 | 支持 | - | - | - |
 | 目标检测 | YOLOv3 | 支持 | - | - | - |
+| 图像分割 | BiSeNet V2 | 支持 | - | - | - |
 | 图像分割 | DeepLab V3+ | 支持 | - | - | - |
 | 图像分割 | DeepLab V3+ | 支持 | - | - | - |
 | 图像分割 | FarSeg | 支持 | - | - | - |
 | 图像分割 | FarSeg | 支持 | - | - | - |
+| 图像分割 | Fast-SCNN | 支持 | - | - | - |
+| 图像分割 | HRNet | 支持 | - | - | - |
 | 图像分割 | UNet | 支持 | - | - | - |
 | 图像分割 | UNet | 支持 | - | - | - |
 
 
 ## 3 测试工具简介
 ## 3 测试工具简介

+ 1 - 0
test_tipc/config_utils.py

@@ -119,6 +119,7 @@ def parse_args(*args, **kwargs):
     # Global settings
     # Global settings
     parser.add_argument('cmd', choices=['train', 'eval'])
     parser.add_argument('cmd', choices=['train', 'eval'])
     parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
     parser.add_argument('task', choices=['cd', 'clas', 'det', 'res', 'seg'])
+    parser.add_argument('--seed', type=int, default=None)
 
 
     # Data
     # Data
     parser.add_argument('--datasets', type=dict, default={})
     parser.add_argument('--datasets', type=dict, default={})

+ 2 - 0
test_tipc/configs/cd/_base_/airchange.yaml

@@ -1,5 +1,7 @@
 # Basic configurations of AirChange dataset
 # Basic configurations of AirChange dataset
 
 
+seed: 1024
+
 datasets:
 datasets:
     train: !Node
     train: !Node
         type: CDDataset
         type: CDDataset

+ 2 - 0
test_tipc/configs/cd/_base_/levircd.yaml

@@ -1,5 +1,7 @@
 # Basic configurations of LEVIR-CD dataset
 # Basic configurations of LEVIR-CD dataset
 
 
+seed: 1024
+
 datasets:
 datasets:
     train: !Node
     train: !Node
         type: CDDataset
         type: CDDataset

+ 0 - 8
test_tipc/configs/cd/bit/bit.yaml

@@ -1,8 +0,0 @@
-# Basic configurations of BIT
-
-_base_: ../_base_/airchange.yaml
-
-save_dir: ./test_tipc/output/cd/bit/
-
-model: !Node
-    type: BIT

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.