Bobholamovic vor 3 Jahren
Ursprung
Commit
5834df2fad
100 geänderte Dateien mit 5806 neuen und 907 gelöschten Zeilen
  1. 1 1
      paddlers/models/ppseg/__init__.py
  2. 15 92
      paddlers/models/ppseg/core/infer.py
  3. 17 19
      paddlers/models/ppseg/core/predict.py
  4. 43 35
      paddlers/models/ppseg/core/train.py
  5. 73 33
      paddlers/models/ppseg/core/val.py
  6. 2 2
      paddlers/models/ppseg/cvlibs/callbacks.py
  7. 179 32
      paddlers/models/ppseg/cvlibs/config.py
  8. 2 2
      paddlers/models/ppseg/cvlibs/manager.py
  9. 30 4
      paddlers/models/ppseg/cvlibs/param_init.py
  10. 1 0
      paddlers/models/ppseg/datasets/__init__.py
  11. 22 14
      paddlers/models/ppseg/datasets/ade.py
  12. 5 5
      paddlers/models/ppseg/datasets/chase_db1.py
  13. 3 3
      paddlers/models/ppseg/datasets/cityscapes.py
  14. 3 3
      paddlers/models/ppseg/datasets/cocostuff.py
  15. 35 27
      paddlers/models/ppseg/datasets/dataset.py
  16. 5 5
      paddlers/models/ppseg/datasets/drive.py
  17. 6 6
      paddlers/models/ppseg/datasets/eg1800.py
  18. 5 5
      paddlers/models/ppseg/datasets/hrf.py
  19. 4 4
      paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py
  20. 4 4
      paddlers/models/ppseg/datasets/optic_disc_seg.py
  21. 3 3
      paddlers/models/ppseg/datasets/pascal_context.py
  22. 2 2
      paddlers/models/ppseg/datasets/pp_humanseg14k.py
  23. 135 0
      paddlers/models/ppseg/datasets/pssl.py
  24. 5 5
      paddlers/models/ppseg/datasets/stare.py
  25. 6 6
      paddlers/models/ppseg/datasets/supervisely.py
  26. 5 5
      paddlers/models/ppseg/datasets/voc.py
  27. 9 0
      paddlers/models/ppseg/models/__init__.py
  28. 3 3
      paddlers/models/ppseg/models/ann.py
  29. 6 6
      paddlers/models/ppseg/models/attention_unet.py
  30. 4 0
      paddlers/models/ppseg/models/backbones/__init__.py
  31. 318 0
      paddlers/models/ppseg/models/backbones/ghostnet.py
  32. 6 4
      paddlers/models/ppseg/models/backbones/hrnet.py
  33. 974 0
      paddlers/models/ppseg/models/backbones/lite_hrnet.py
  34. 6 6
      paddlers/models/ppseg/models/backbones/mix_transformer.py
  35. 217 116
      paddlers/models/ppseg/models/backbones/mobilenetv2.py
  36. 317 183
      paddlers/models/ppseg/models/backbones/mobilenetv3.py
  37. 7 6
      paddlers/models/ppseg/models/backbones/resnet_vd.py
  38. 315 0
      paddlers/models/ppseg/models/backbones/shufflenetv2.py
  39. 120 66
      paddlers/models/ppseg/models/backbones/stdcnet.py
  40. 7 7
      paddlers/models/ppseg/models/backbones/swin_transformer.py
  41. 716 0
      paddlers/models/ppseg/models/backbones/top_transformer.py
  42. 2 2
      paddlers/models/ppseg/models/backbones/transformer_utils.py
  43. 6 6
      paddlers/models/ppseg/models/backbones/vision_transformer.py
  44. 11 6
      paddlers/models/ppseg/models/backbones/xception_deeplab.py
  45. 13 11
      paddlers/models/ppseg/models/bisenet.py
  46. 3 3
      paddlers/models/ppseg/models/bisenetv1.py
  47. 174 0
      paddlers/models/ppseg/models/ccnet.py
  48. 3 3
      paddlers/models/ppseg/models/danet.py
  49. 403 0
      paddlers/models/ppseg/models/ddrnet.py
  50. 5 5
      paddlers/models/ppseg/models/decoupled_segnet.py
  51. 3 3
      paddlers/models/ppseg/models/deeplab.py
  52. 3 3
      paddlers/models/ppseg/models/dmnet.py
  53. 3 3
      paddlers/models/ppseg/models/dnlnet.py
  54. 6 4
      paddlers/models/ppseg/models/emanet.py
  55. 3 3
      paddlers/models/ppseg/models/encnet.py
  56. 6 4
      paddlers/models/ppseg/models/enet.py
  57. 3 3
      paddlers/models/ppseg/models/espnet.py
  58. 3 3
      paddlers/models/ppseg/models/espnetv1.py
  59. 16 6
      paddlers/models/ppseg/models/fast_scnn.py
  60. 3 3
      paddlers/models/ppseg/models/fastfcn.py
  61. 3 3
      paddlers/models/ppseg/models/fcn.py
  62. 3 3
      paddlers/models/ppseg/models/gcnet.py
  63. 4 4
      paddlers/models/ppseg/models/ginet.py
  64. 198 0
      paddlers/models/ppseg/models/glore.py
  65. 5 5
      paddlers/models/ppseg/models/gscnn.py
  66. 6 4
      paddlers/models/ppseg/models/hardnet.py
  67. 3 3
      paddlers/models/ppseg/models/hrnet_contrast.py
  68. 3 3
      paddlers/models/ppseg/models/isanet.py
  69. 2 1
      paddlers/models/ppseg/models/layers/__init__.py
  70. 1 1
      paddlers/models/ppseg/models/layers/activation.py
  71. 127 1
      paddlers/models/ppseg/models/layers/attention.py
  72. 58 1
      paddlers/models/ppseg/models/layers/layer_libs.py
  73. 1 1
      paddlers/models/ppseg/models/layers/nonlocal2d.py
  74. 1 1
      paddlers/models/ppseg/models/layers/pyramid_pool.py
  75. 285 0
      paddlers/models/ppseg/models/layers/tensor_fusion.py
  76. 133 0
      paddlers/models/ppseg/models/layers/tensor_fusion_helper.py
  77. 2 2
      paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py
  78. 1 1
      paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py
  79. 2 4
      paddlers/models/ppseg/models/losses/cross_entropy_loss.py
  80. 2 2
      paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py
  81. 2 2
      paddlers/models/ppseg/models/losses/detail_aggregate_loss.py
  82. 44 23
      paddlers/models/ppseg/models/losses/dice_loss.py
  83. 2 2
      paddlers/models/ppseg/models/losses/edge_attention_loss.py
  84. 98 26
      paddlers/models/ppseg/models/losses/focal_loss.py
  85. 1 1
      paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py
  86. 1 1
      paddlers/models/ppseg/models/losses/kl_loss.py
  87. 23 1
      paddlers/models/ppseg/models/losses/l1_loss.py
  88. 13 5
      paddlers/models/ppseg/models/losses/lovasz_loss.py
  89. 1 1
      paddlers/models/ppseg/models/losses/mean_square_error_loss.py
  90. 1 1
      paddlers/models/ppseg/models/losses/mixed_loss.py
  91. 2 2
      paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py
  92. 2 2
      paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py
  93. 5 2
      paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py
  94. 1 1
      paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py
  95. 1 1
      paddlers/models/ppseg/models/losses/rmi_loss.py
  96. 8 6
      paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py
  97. 1 1
      paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py
  98. 162 0
      paddlers/models/ppseg/models/lraspp.py
  99. 4 4
      paddlers/models/ppseg/models/mla_transformer.py
  100. 289 0
      paddlers/models/ppseg/models/mobileseg.py

+ 1 - 1
paddlers/models/ppseg/__init__.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 15 - 92
paddlers/models/ppseg/core/infer.py

@@ -21,88 +21,16 @@ import paddle
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
 
 
-def get_reverse_list(ori_shape, transforms):
-    """
-    get reverse list of transform.
-
-    Args:
-        ori_shape (list): Origin shape of image.
-        transforms (list): List of transform.
-
-    Returns:
-        list: List of tuple, there are two format:
-            ('resize', (h, w)) The image shape before resize,
-            ('padding', (h, w)) The image shape before padding.
-    """
-    reverse_list = []
-    h, w = ori_shape[0], ori_shape[1]
-    for op in transforms:
-        if op.__class__.__name__ in ['Resize']:
-            reverse_list.append(('resize', (h, w)))
-            h, w = op.target_size[0], op.target_size[1]
-        if op.__class__.__name__ in ['ResizeByLong']:
-            reverse_list.append(('resize', (h, w)))
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            short_edge = int(round(short_edge * op.long_size / long_edge))
-            long_edge = op.long_size
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-        if op.__class__.__name__ in ['ResizeByShort']:
-            reverse_list.append(('resize', (h, w)))
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            long_edge = int(round(long_edge * op.short_size / short_edge))
-            short_edge = op.short_size
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-        if op.__class__.__name__ in ['Pad']:
-            reverse_list.append(('padding', (h, w)))
-            w, h = op.target_size[0], op.target_size[1]
-        if op.__class__.__name__ in ['PadByAspectRatio']:
-            reverse_list.append(('padding', (h, w)))
-            ratio = w / h
-            if ratio == op.aspect_ratio:
-                pass
-            elif ratio > op.aspect_ratio:
-                h = int(w / op.aspect_ratio)
-            else:
-                w = int(h * op.aspect_ratio)
-        if op.__class__.__name__ in ['LimitLong']:
-            long_edge = max(h, w)
-            short_edge = min(h, w)
-            if ((op.max_long is not None) and (long_edge > op.max_long)):
-                reverse_list.append(('resize', (h, w)))
-                long_edge = op.max_long
-                short_edge = int(round(short_edge * op.max_long / long_edge))
-            elif ((op.min_long is not None) and (long_edge < op.min_long)):
-                reverse_list.append(('resize', (h, w)))
-                long_edge = op.min_long
-                short_edge = int(round(short_edge * op.min_long / long_edge))
-            if h > w:
-                h = long_edge
-                w = short_edge
-            else:
-                w = long_edge
-                h = short_edge
-    return reverse_list
-
-
-def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
+def reverse_transform(pred, trans_info, mode='nearest'):
     """recover pred to origin shape"""
     """recover pred to origin shape"""
-    reverse_list = get_reverse_list(ori_shape, transforms)
     intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
     intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64]
     dtype = pred.dtype
     dtype = pred.dtype
-    for item in reverse_list[::-1]:
-        if item[0] == 'resize':
+    for item in trans_info[::-1]:
+        if isinstance(item[0], list):
+            trans_mode = item[0][0]
+        else:
+            trans_mode = item[0]
+        if trans_mode == 'resize':
             h, w = item[1][0], item[1][1]
             h, w = item[1][0], item[1][1]
             if paddle.get_device() == 'cpu' and dtype in intTypeList:
             if paddle.get_device() == 'cpu' and dtype in intTypeList:
                 pred = paddle.cast(pred, 'float32')
                 pred = paddle.cast(pred, 'float32')
@@ -110,7 +38,7 @@ def reverse_transform(pred, ori_shape, transforms, mode='nearest'):
                 pred = paddle.cast(pred, dtype)
                 pred = paddle.cast(pred, dtype)
             else:
             else:
                 pred = F.interpolate(pred, (h, w), mode=mode)
                 pred = F.interpolate(pred, (h, w), mode=mode)
-        elif item[0] == 'padding':
+        elif trans_mode == 'padding':
             h, w = item[1][0], item[1][1]
             h, w = item[1][0], item[1][1]
             pred = pred[:, :, 0:h, 0:w]
             pred = pred[:, :, 0:h, 0:w]
         else:
         else:
@@ -205,8 +133,7 @@ def slide_inference(model, im, crop_size, stride):
 
 
 def inference(model,
 def inference(model,
               im,
               im,
-              ori_shape=None,
-              transforms=None,
+              trans_info=None,
               is_slide=False,
               is_slide=False,
               stride=None,
               stride=None,
               crop_size=None):
               crop_size=None):
@@ -216,8 +143,7 @@ def inference(model,
     Args:
     Args:
         model (paddle.nn.Layer): model to get logits of image.
         model (paddle.nn.Layer): model to get logits of image.
         im (Tensor): the input image.
         im (Tensor): the input image.
-        ori_shape (list): Origin shape of image.
-        transforms (list): Transforms for image.
+        trans_info (list): Image shape informating changed process. Default: None.
         is_slide (bool): Whether to infer by sliding window. Default: False.
         is_slide (bool): Whether to infer by sliding window. Default: False.
         crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
         crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
         stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
         stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
@@ -239,8 +165,8 @@ def inference(model,
         logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
         logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
     if hasattr(model, 'data_format') and model.data_format == 'NHWC':
     if hasattr(model, 'data_format') and model.data_format == 'NHWC':
         logit = logit.transpose((0, 3, 1, 2))
         logit = logit.transpose((0, 3, 1, 2))
-    if ori_shape is not None:
-        logit = reverse_transform(logit, ori_shape, transforms, mode='bilinear')
+    if trans_info is not None:
+        logit = reverse_transform(logit, trans_info, mode='bilinear')
         pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
         pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
         return pred, logit
         return pred, logit
     else:
     else:
@@ -249,8 +175,7 @@ def inference(model,
 
 
 def aug_inference(model,
 def aug_inference(model,
                   im,
                   im,
-                  ori_shape,
-                  transforms,
+                  trans_info,
                   scales=1.0,
                   scales=1.0,
                   flip_horizontal=False,
                   flip_horizontal=False,
                   flip_vertical=False,
                   flip_vertical=False,
@@ -263,8 +188,7 @@ def aug_inference(model,
     Args:
     Args:
         model (paddle.nn.Layer): model to get logits of image.
         model (paddle.nn.Layer): model to get logits of image.
         im (Tensor): the input image.
         im (Tensor): the input image.
-        ori_shape (list): Origin shape of image.
-        transforms (list): Transforms for image.
+        trans_info (list): Transforms for image.
         scales (float|tuple|list):  Scales for resize. Default: 1.
         scales (float|tuple|list):  Scales for resize. Default: 1.
         flip_horizontal (bool): Whether to flip horizontally. Default: False.
         flip_horizontal (bool): Whether to flip horizontally. Default: False.
         flip_vertical (bool): Whether to flip vertically. Default: False.
         flip_vertical (bool): Whether to flip vertically. Default: False.
@@ -302,8 +226,7 @@ def aug_inference(model,
             logit = F.softmax(logit, axis=1)
             logit = F.softmax(logit, axis=1)
             final_logit = final_logit + logit
             final_logit = final_logit + logit
 
 
-    final_logit = reverse_transform(
-        final_logit, ori_shape, transforms, mode='bilinear')
+    final_logit = reverse_transform(final_logit, trans_info, mode='bilinear')
     pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
     pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
 
 
     return pred, final_logit
     return pred, final_logit

+ 17 - 19
paddlers/models/ppseg/core/predict.py

@@ -19,9 +19,9 @@ import cv2
 import numpy as np
 import numpy as np
 import paddle
 import paddle
 
 
-from paddlers.models.ppseg import utils
-from paddlers.models.ppseg.core import infer
-from paddlers.models.ppseg.utils import logger, progbar, visualize
+from paddleseg import utils
+from paddleseg.core import infer
+from paddleseg.utils import logger, progbar, visualize
 
 
 
 
 def mkdir(path):
 def mkdir(path):
@@ -36,6 +36,15 @@ def partition_list(arr, m):
     return [arr[i:i + n] for i in range(0, len(arr), n)]
     return [arr[i:i + n] for i in range(0, len(arr), n)]
 
 
 
 
+def preprocess(im_path, transforms):
+    data = {}
+    data['img'] = im_path
+    data = transforms(data)
+    data['img'] = data['img'][np.newaxis, ...]
+    data['img'] = paddle.to_tensor(data['img'])
+    return data
+
+
 def predict(model,
 def predict(model,
             model_path,
             model_path,
             transforms,
             transforms,
@@ -89,18 +98,13 @@ def predict(model,
     color_map = visualize.get_color_map_list(256, custom_color=custom_color)
     color_map = visualize.get_color_map_list(256, custom_color=custom_color)
     with paddle.no_grad():
     with paddle.no_grad():
         for i, im_path in enumerate(img_lists[local_rank]):
         for i, im_path in enumerate(img_lists[local_rank]):
-            im = cv2.imread(im_path)
-            ori_shape = im.shape[:2]
-            im, _ = transforms(im)
-            im = im[np.newaxis, ...]
-            im = paddle.to_tensor(im)
+            data = preprocess(im_path, transforms)
 
 
             if aug_pred:
             if aug_pred:
                 pred, _ = infer.aug_inference(
                 pred, _ = infer.aug_inference(
                     model,
                     model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=transforms.transforms,
+                    data['img'],
+                    trans_info=data['trans_info'],
                     scales=scales,
                     scales=scales,
                     flip_horizontal=flip_horizontal,
                     flip_horizontal=flip_horizontal,
                     flip_vertical=flip_vertical,
                     flip_vertical=flip_vertical,
@@ -110,9 +114,8 @@ def predict(model,
             else:
             else:
                 pred, _ = infer.inference(
                 pred, _ = infer.inference(
                     model,
                     model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=transforms.transforms,
+                    data['img'],
+                    trans_info=data['trans_info'],
                     is_slide=is_slide,
                     is_slide=is_slide,
                     stride=stride,
                     stride=stride,
                     crop_size=crop_size)
                     crop_size=crop_size)
@@ -141,9 +144,4 @@ def predict(model,
             mkdir(pred_saved_path)
             mkdir(pred_saved_path)
             pred_mask.save(pred_saved_path)
             pred_mask.save(pred_saved_path)
 
 
-            # pred_im = utils.visualize(im_path, pred, weight=0.0)
-            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
-            # mkdir(pred_saved_path)
-            # cv2.imwrite(pred_saved_path, pred_im)
-
             progbar_pred.update(i + 1)
             progbar_pred.update(i + 1)

+ 43 - 35
paddlers/models/ppseg/core/train.py

@@ -20,10 +20,9 @@ import shutil
 import paddle
 import paddle
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.utils import (TimeAverager, calculate_eta, resume,
-                                         logger, worker_init_fn, train_profiler,
-                                         op_flops_funs)
-from paddlers.models.ppseg.core.val import evaluate
+from paddleseg.utils import (TimeAverager, calculate_eta, resume, logger,
+                             worker_init_fn, train_profiler, op_flops_funs)
+from paddleseg.core.val import evaluate
 
 
 
 
 def check_logits_losses(logits_list, losses):
 def check_logits_losses(logits_list, losses):
@@ -35,17 +34,15 @@ def check_logits_losses(logits_list, losses):
             .format(len_logits, len_losses))
             .format(len_logits, len_losses))
 
 
 
 
-def loss_computation(logits_list, labels, losses, edges=None):
+def loss_computation(logits_list, labels, edges, losses):
     check_logits_losses(logits_list, losses)
     check_logits_losses(logits_list, losses)
     loss_list = []
     loss_list = []
     for i in range(len(logits_list)):
     for i in range(len(logits_list)):
         logits = logits_list[i]
         logits = logits_list[i]
         loss_i = losses['types'][i]
         loss_i = losses['types'][i]
         coef_i = losses['coef'][i]
         coef_i = losses['coef'][i]
-
-        if loss_i.__class__.__name__ in ('BCELoss', 'FocalLoss'
-                                         ) and loss_i.edge_label:
-            # If use edges as labels According to loss type.
+        if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
+            # Use edges as labels According to loss type.
             loss_list.append(coef_i * loss_i(logits, edges))
             loss_list.append(coef_i * loss_i(logits, edges))
         elif loss_i.__class__.__name__ == 'MixedLoss':
         elif loss_i.__class__.__name__ == 'MixedLoss':
             mixed_loss_list = loss_i(logits, labels)
             mixed_loss_list = loss_i(logits, labels)
@@ -75,13 +72,14 @@ def train(model,
           keep_checkpoint_max=5,
           keep_checkpoint_max=5,
           test_config=None,
           test_config=None,
           precision='fp32',
           precision='fp32',
+          amp_level='O1',
           profiler_options=None,
           profiler_options=None,
           to_static_training=False):
           to_static_training=False):
     """
     """
     Launch training.
     Launch training.
 
 
     Args:
     Args:
-        model(nn.Layer): A sementic segmentation model.
+        model(nn.Layer): A semantic segmentation model.
         train_dataset (paddle.io.Dataset): Used to read and process training datasets.
         train_dataset (paddle.io.Dataset): Used to read and process training datasets.
         val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
         val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
         optimizer (paddle.optimizer.Optimizer): The optimizer.
         optimizer (paddle.optimizer.Optimizer): The optimizer.
@@ -98,6 +96,9 @@ def train(model,
         keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
         keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
         test_config(dict, optional): Evaluation config.
         test_config(dict, optional): Evaluation config.
         precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
         precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the training is normal.
+        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, 
+            the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators 
+            parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
         profiler_options (str, optional): The option of train profiler.
         profiler_options (str, optional): The option of train profiler.
         to_static_training (bool, optional): Whether to use @to_static for training.
         to_static_training (bool, optional): Whether to use @to_static for training.
     """
     """
@@ -112,7 +113,18 @@ def train(model,
     if not os.path.isdir(save_dir):
     if not os.path.isdir(save_dir):
         if os.path.exists(save_dir):
         if os.path.exists(save_dir):
             os.remove(save_dir)
             os.remove(save_dir)
-        os.makedirs(save_dir)
+        os.makedirs(save_dir, exist_ok=True)
+
+    # use amp
+    if precision == 'fp16':
+        logger.info('use AMP to train. AMP level = {}'.format(amp_level))
+        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
+        if amp_level == 'O2':
+            model, optimizer = paddle.amp.decorate(
+                models=model,
+                optimizers=optimizer,
+                level='O2',
+                save_dtype='float32')
 
 
     if nranks > 1:
     if nranks > 1:
         paddle.distributed.fleet.init(is_collective=True)
         paddle.distributed.fleet.init(is_collective=True)
@@ -130,18 +142,13 @@ def train(model,
         return_list=True,
         return_list=True,
         worker_init_fn=worker_init_fn, )
         worker_init_fn=worker_init_fn, )
 
 
-    # use amp
-    if precision == 'fp16':
-        logger.info('use amp to train')
-        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)
-
     if use_vdl:
     if use_vdl:
         from visualdl import LogWriter
         from visualdl import LogWriter
         log_writer = LogWriter(save_dir)
         log_writer = LogWriter(save_dir)
 
 
     if to_static_training:
     if to_static_training:
         model = paddle.jit.to_static(model)
         model = paddle.jit.to_static(model)
-        logger.info("Successfully to apply @to_static")
+        logger.info("Successfully applied @to_static")
 
 
     avg_loss = 0.0
     avg_loss = 0.0
     avg_loss_list = []
     avg_loss_list = []
@@ -164,30 +171,29 @@ def train(model,
                 else:
                 else:
                     break
                     break
             reader_cost_averager.record(time.time() - batch_start)
             reader_cost_averager.record(time.time() - batch_start)
-            images = data[0]
-            labels = data[1].astype('int64')
+            images = data['img']
+            labels = data['label'].astype('int64')
             edges = None
             edges = None
-            if len(data) == 3:
-                edges = data[2].astype('int64')
+            if 'edge' in data.keys():
+                edges = data['edge'].astype('int64')
             if hasattr(model, 'data_format') and model.data_format == 'NHWC':
             if hasattr(model, 'data_format') and model.data_format == 'NHWC':
                 images = images.transpose((0, 2, 3, 1))
                 images = images.transpose((0, 2, 3, 1))
 
 
             if precision == 'fp16':
             if precision == 'fp16':
                 with paddle.amp.auto_cast(
                 with paddle.amp.auto_cast(
+                        level=amp_level,
                         enable=True,
                         enable=True,
                         custom_white_list={
                         custom_white_list={
                             "elementwise_add", "batch_norm", "sync_batch_norm"
                             "elementwise_add", "batch_norm", "sync_batch_norm"
                         },
                         },
                         custom_black_list={'bilinear_interp_v2'}):
                         custom_black_list={'bilinear_interp_v2'}):
-                    if nranks > 1:
-                        logits_list = ddp_model(images)
-                    else:
-                        logits_list = model(images)
+                    logits_list = ddp_model(images) if nranks > 1 else model(
+                        images)
                     loss_list = loss_computation(
                     loss_list = loss_computation(
                         logits_list=logits_list,
                         logits_list=logits_list,
                         labels=labels,
                         labels=labels,
-                        losses=losses,
-                        edges=edges)
+                        edges=edges,
+                        losses=losses)
                     loss = sum(loss_list)
                     loss = sum(loss_list)
 
 
                 scaled = scaler.scale(loss)  # scale the loss
                 scaled = scaler.scale(loss)  # scale the loss
@@ -197,15 +203,12 @@ def train(model,
                 else:
                 else:
                     scaler.minimize(optimizer, scaled)  # update parameters
                     scaler.minimize(optimizer, scaled)  # update parameters
             else:
             else:
-                if nranks > 1:
-                    logits_list = ddp_model(images)
-                else:
-                    logits_list = model(images)
+                logits_list = ddp_model(images) if nranks > 1 else model(images)
                 loss_list = loss_computation(
                 loss_list = loss_computation(
                     logits_list=logits_list,
                     logits_list=logits_list,
                     labels=labels,
                     labels=labels,
-                    losses=losses,
-                    edges=edges)
+                    edges=edges,
+                    losses=losses)
                 loss = sum(loss_list)
                 loss = sum(loss_list)
                 loss.backward()
                 loss.backward()
                 # if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
                 # if the optimizer is ReduceOnPlateau, the loss is the one which has been pass into step.
@@ -278,7 +281,12 @@ def train(model,
                     test_config = {}
                     test_config = {}
 
 
                 mean_iou, acc, _, _, _ = evaluate(
                 mean_iou, acc, _, _, _ = evaluate(
-                    model, val_dataset, num_workers=num_workers, **test_config)
+                    model,
+                    val_dataset,
+                    num_workers=num_workers,
+                    precision=precision,
+                    amp_level=amp_level,
+                    **test_config)
 
 
                 model.train()
                 model.train()
 
 
@@ -314,7 +322,7 @@ def train(model,
             batch_start = time.time()
             batch_start = time.time()
 
 
     # Calculate flops.
     # Calculate flops.
-    if local_rank == 0:
+    if local_rank == 0 and not (precision == 'fp16' and amp_level == 'O2'):
         _, c, h, w = images.shape
         _, c, h, w = images.shape
         _ = paddle.flops(
         _ = paddle.flops(
             model, [1, c, h, w],
             model, [1, c, h, w],

+ 73 - 33
paddlers/models/ppseg/core/val.py

@@ -19,8 +19,8 @@ import time
 import paddle
 import paddle
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar
-from paddlers.models.ppseg.core import infer
+from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar
+from paddleseg.core import infer
 
 
 np.set_printoptions(suppress=True)
 np.set_printoptions(suppress=True)
 
 
@@ -34,6 +34,8 @@ def evaluate(model,
              is_slide=False,
              is_slide=False,
              stride=None,
              stride=None,
              crop_size=None,
              crop_size=None,
+             precision='fp32',
+             amp_level='O1',
              num_workers=0,
              num_workers=0,
              print_detail=True,
              print_detail=True,
              auc_roc=False):
              auc_roc=False):
@@ -41,7 +43,7 @@ def evaluate(model,
     Launch evalution.
     Launch evalution.
 
 
     Args:
     Args:
-        model(nn.Layer): A sementic segmentation model.
+        model(nn.Layer): A semantic segmentation model.
         eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
         eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
         aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
         aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
         scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
         scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
@@ -52,6 +54,8 @@ def evaluate(model,
             It should be provided when `is_slide` is True.
             It should be provided when `is_slide` is True.
         crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
         crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
             It should be provided when `is_slide` is True.
             It should be provided when `is_slide` is True.
+        precision (str, optional): Use AMP if precision='fp16'. If precision='fp32', the evaluation is normal.
+        amp_level (str, optional): Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, the input data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel and batchnorm. Default is O1(amp)
         num_workers (int, optional): Num workers for data loader. Default: 0.
         num_workers (int, optional): Num workers for data loader. Default: 0.
         print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
         print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
         auc_roc(bool, optional): whether add auc_roc metric
         auc_roc(bool, optional): whether add auc_roc metric
@@ -93,32 +97,66 @@ def evaluate(model,
     batch_cost_averager = TimeAverager()
     batch_cost_averager = TimeAverager()
     batch_start = time.time()
     batch_start = time.time()
     with paddle.no_grad():
     with paddle.no_grad():
-        for iter, (im, label) in enumerate(loader):
+        for iter, data in enumerate(loader):
             reader_cost_averager.record(time.time() - batch_start)
             reader_cost_averager.record(time.time() - batch_start)
-            label = label.astype('int64')
+            label = data['label'].astype('int64')
 
 
-            ori_shape = label.shape[-2:]
             if aug_eval:
             if aug_eval:
-                pred, logits = infer.aug_inference(
-                    model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=eval_dataset.transforms.transforms,
-                    scales=scales,
-                    flip_horizontal=flip_horizontal,
-                    flip_vertical=flip_vertical,
-                    is_slide=is_slide,
-                    stride=stride,
-                    crop_size=crop_size)
+                if precision == 'fp16':
+                    with paddle.amp.auto_cast(
+                            level=amp_level,
+                            enable=True,
+                            custom_white_list={
+                                "elementwise_add", "batch_norm",
+                                "sync_batch_norm"
+                            },
+                            custom_black_list={'bilinear_interp_v2'}):
+                        pred, logits = infer.aug_inference(
+                            model,
+                            data['img'],
+                            trans_info=data['trans_info'],
+                            scales=scales,
+                            flip_horizontal=flip_horizontal,
+                            flip_vertical=flip_vertical,
+                            is_slide=is_slide,
+                            stride=stride,
+                            crop_size=crop_size)
+                else:
+                    pred, logits = infer.aug_inference(
+                        model,
+                        data['img'],
+                        trans_info=data['trans_info'],
+                        scales=scales,
+                        flip_horizontal=flip_horizontal,
+                        flip_vertical=flip_vertical,
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
             else:
             else:
-                pred, logits = infer.inference(
-                    model,
-                    im,
-                    ori_shape=ori_shape,
-                    transforms=eval_dataset.transforms.transforms,
-                    is_slide=is_slide,
-                    stride=stride,
-                    crop_size=crop_size)
+                if precision == 'fp16':
+                    with paddle.amp.auto_cast(
+                            level=amp_level,
+                            enable=True,
+                            custom_white_list={
+                                "elementwise_add", "batch_norm",
+                                "sync_batch_norm"
+                            },
+                            custom_black_list={'bilinear_interp_v2'}):
+                        pred, logits = infer.inference(
+                            model,
+                            data['img'],
+                            trans_info=data['trans_info'],
+                            is_slide=is_slide,
+                            stride=stride,
+                            crop_size=crop_size)
+                else:
+                    pred, logits = infer.inference(
+                        model,
+                        data['img'],
+                        trans_info=data['trans_info'],
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
 
 
             intersect_area, pred_area, label_area = metrics.calculate_area(
             intersect_area, pred_area, label_area = metrics.calculate_area(
                 pred,
                 pred,
@@ -175,12 +213,12 @@ def evaluate(model,
             batch_cost_averager.reset()
             batch_cost_averager.reset()
             batch_start = time.time()
             batch_start = time.time()
 
 
-    class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
-                                       label_area_all)
-    class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
-    kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
-    class_dice, mdice = metrics.dice(intersect_area_all, pred_area_all,
-                                     label_area_all)
+    metrics_input = (intersect_area_all, pred_area_all, label_area_all)
+    class_iou, miou = metrics.mean_iou(*metrics_input)
+    acc, class_precision, class_recall = metrics.class_measurement(
+        *metrics_input)
+    kappa = metrics.kappa(*metrics_input)
+    class_dice, mdice = metrics.dice(*metrics_input)
 
 
     if auc_roc:
     if auc_roc:
         auc_roc = metrics.auc_roc(
         auc_roc = metrics.auc_roc(
@@ -193,5 +231,7 @@ def evaluate(model,
         infor = infor + auc_infor if auc_roc else infor
         infor = infor + auc_infor if auc_roc else infor
         logger.info(infor)
         logger.info(infor)
         logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
         logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
-        logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
-    return miou, acc, class_iou, class_acc, kappa
+        logger.info("[EVAL] Class Precision: \n" + str(
+            np.round(class_precision, 4)))
+        logger.info("[EVAL] Class Recall: \n" + str(np.round(class_recall, 4)))
+    return miou, acc, class_iou, class_precision, kappa

+ 2 - 2
paddlers/models/ppseg/cvlibs/callbacks.py

@@ -19,8 +19,8 @@ import numpy as np
 import paddle
 import paddle
 from paddle.distributed.parallel import ParallelEnv
 from paddle.distributed.parallel import ParallelEnv
 from visualdl import LogWriter
 from visualdl import LogWriter
-from paddlers.models.ppseg.utils.progbar import Progbar
-import paddlers.models.ppseg.utils.logger as logger
+from paddleseg.utils.progbar import Progbar
+import paddleseg.utils.logger as logger
 
 
 
 
 class CallbackList(object):
 class CallbackList(object):

+ 179 - 32
paddlers/models/ppseg/cvlibs/config.py

@@ -15,12 +15,15 @@
 import codecs
 import codecs
 import os
 import os
 from typing import Any, Dict, Generic
 from typing import Any, Dict, Generic
+import warnings
+from ast import literal_eval
 
 
 import paddle
 import paddle
 import yaml
 import yaml
+import six
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import logger
+from paddleseg.cvlibs import manager
+from paddleseg.utils import logger
 
 
 
 
 class Config(object):
 class Config(object):
@@ -51,7 +54,7 @@ class Config(object):
 
 
     Examples:
     Examples:
 
 
-        from paddlers.models.ppseg.cvlibs.config import Config
+        from paddleseg.cvlibs.config import Config
 
 
         # Create a cfg object with yaml file path.
         # Create a cfg object with yaml file path.
         cfg = Config(yaml_cfg_path)
         cfg = Config(yaml_cfg_path)
@@ -69,7 +72,8 @@ class Config(object):
                  path: str,
                  path: str,
                  learning_rate: float=None,
                  learning_rate: float=None,
                  batch_size: int=None,
                  batch_size: int=None,
-                 iters: int=None):
+                 iters: int=None,
+                 opts: list=None):
         if not path:
         if not path:
             raise ValueError('Please specify the configuration file path.')
             raise ValueError('Please specify the configuration file path.')
 
 
@@ -84,7 +88,18 @@ class Config(object):
             raise RuntimeError('Config file should in yaml format!')
             raise RuntimeError('Config file should in yaml format!')
 
 
         self.update(
         self.update(
-            learning_rate=learning_rate, batch_size=batch_size, iters=iters)
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            iters=iters,
+            opts=opts)
+
+        model_cfg = self.dic.get('model', None)
+        if model_cfg is None:
+            raise RuntimeError('No model specified in the configuration file.')
+        if (not self.train_dataset_config) and (not self.val_dataset_config):
+            raise ValueError(
+                'One of `train_dataset` or `val_dataset should be given, but there are none.'
+            )
 
 
     def _update_dic(self, dic, base_dic):
     def _update_dic(self, dic, base_dic):
         """
         """
@@ -121,7 +136,8 @@ class Config(object):
     def update(self,
     def update(self,
                learning_rate: float=None,
                learning_rate: float=None,
                batch_size: int=None,
                batch_size: int=None,
-               iters: int=None):
+               iters: int=None,
+               opts: list=None):
         '''Update config'''
         '''Update config'''
         if learning_rate:
         if learning_rate:
             if 'lr_scheduler' in self.dic:
             if 'lr_scheduler' in self.dic:
@@ -135,6 +151,27 @@ class Config(object):
         if iters:
         if iters:
             self.dic['iters'] = iters
             self.dic['iters'] = iters
 
 
+        # fix parameters by --opts of command
+        if opts is not None:
+            if len(opts) % 2 != 0 or len(opts) == 0:
+                raise ValueError(
+                    "Command line options config `--opts` format error! It should be even length like: k1 v1 k2 v2 ... Please check it: {}".
+                    format(opts))
+            for key, value in zip(opts[0::2], opts[1::2]):
+                if isinstance(value, six.string_types):
+                    try:
+                        value = literal_eval(value)
+                    except ValueError:
+                        pass
+                    except SyntaxError:
+                        pass
+                key_list = key.split('.')
+                dic = self.dic
+                for subkey in key_list[:-1]:
+                    dic.setdefault(subkey, dict())
+                    dic = dic[subkey]
+                dic[key_list[-1]] = value
+
     @property
     @property
     def batch_size(self) -> int:
     def batch_size(self) -> int:
         return self.dic.get('batch_size', 1)
         return self.dic.get('batch_size', 1)
@@ -153,13 +190,32 @@ class Config(object):
                 'No `lr_scheduler` specified in the configuration file.')
                 'No `lr_scheduler` specified in the configuration file.')
         params = self.dic.get('lr_scheduler')
         params = self.dic.get('lr_scheduler')
 
 
+        use_warmup = False
+        if 'warmup_iters' in params:
+            use_warmup = True
+            warmup_iters = params.pop('warmup_iters')
+            assert 'warmup_start_lr' in params, \
+                "When use warmup, please set warmup_start_lr and warmup_iters in lr_scheduler"
+            warmup_start_lr = params.pop('warmup_start_lr')
+            end_lr = params['learning_rate']
+
         lr_type = params.pop('type')
         lr_type = params.pop('type')
         if lr_type == 'PolynomialDecay':
         if lr_type == 'PolynomialDecay':
-            params.setdefault('decay_steps', self.iters)
+            iters = self.iters - warmup_iters if use_warmup else self.iters
+            iters = max(iters, 1)
+            params.setdefault('decay_steps', iters)
             params.setdefault('end_lr', 0)
             params.setdefault('end_lr', 0)
             params.setdefault('power', 0.9)
             params.setdefault('power', 0.9)
+        lr_sche = getattr(paddle.optimizer.lr, lr_type)(**params)
+
+        if use_warmup:
+            lr_sche = paddle.optimizer.lr.LinearWarmup(
+                learning_rate=lr_sche,
+                warmup_steps=warmup_iters,
+                start_lr=warmup_start_lr,
+                end_lr=end_lr)
 
 
-        return getattr(paddle.optimizer.lr, lr_type)(**params)
+        return lr_sche
 
 
     @property
     @property
     def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
     def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
@@ -202,15 +258,33 @@ class Config(object):
         args = self.optimizer_args
         args = self.optimizer_args
         optimizer_type = args.pop('type')
         optimizer_type = args.pop('type')
 
 
+        params = self.model.parameters()
+        if 'backbone_lr_mult' in args:
+            if not hasattr(self.model, 'backbone'):
+                logger.warning('The backbone_lr_mult is not effective because'
+                               ' the model does not have backbone')
+            else:
+                backbone_lr_mult = args.pop('backbone_lr_mult')
+                backbone_params = self.model.backbone.parameters()
+                backbone_params_id = [id(x) for x in backbone_params]
+                other_params = [
+                    x for x in params if id(x) not in backbone_params_id
+                ]
+                params = [{
+                    'params': backbone_params,
+                    'learning_rate': backbone_lr_mult
+                }, {
+                    'params': other_params
+                }]
+
         if optimizer_type == 'sgd':
         if optimizer_type == 'sgd':
-            return paddle.optimizer.Momentum(
-                lr, parameters=self.model.parameters(), **args)
+            return paddle.optimizer.Momentum(lr, parameters=params, **args)
         elif optimizer_type == 'adam':
         elif optimizer_type == 'adam':
-            return paddle.optimizer.Adam(
-                lr, parameters=self.model.parameters(), **args)
+            return paddle.optimizer.Adam(lr, parameters=params, **args)
         elif optimizer_type in paddle.optimizer.__all__:
         elif optimizer_type in paddle.optimizer.__all__:
-            return getattr(paddle.optimizer, optimizer_type)(
-                lr, parameters=self.model.parameters(), **args)
+            return getattr(paddle.optimizer, optimizer_type)(lr,
+                                                             parameters=params,
+                                                             **args)
 
 
         raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))
         raise RuntimeError('Unknown optimizer type {}.'.format(optimizer_type))
 
 
@@ -295,24 +369,6 @@ class Config(object):
     @property
     @property
     def model(self) -> paddle.nn.Layer:
     def model(self) -> paddle.nn.Layer:
         model_cfg = self.dic.get('model').copy()
         model_cfg = self.dic.get('model').copy()
-        if not model_cfg:
-            raise RuntimeError('No model specified in the configuration file.')
-        if not 'num_classes' in model_cfg:
-            num_classes = None
-            if self.train_dataset_config:
-                if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
-                    num_classes = self.train_dataset_class.NUM_CLASSES
-                elif hasattr(self.train_dataset, 'num_classes'):
-                    num_classes = self.train_dataset.num_classes
-            elif self.val_dataset_config:
-                if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
-                    num_classes = self.val_dataset_class.NUM_CLASSES
-                elif hasattr(self.val_dataset, 'num_classes'):
-                    num_classes = self.val_dataset.num_classes
-
-            if num_classes is not None:
-                model_cfg['num_classes'] = num_classes
-
         if not self._model:
         if not self._model:
             self._model = self._load_object(model_cfg)
             self._model = self._load_object(model_cfg)
         return self._model
         return self._model
@@ -401,3 +457,94 @@ class Config(object):
 
 
     def __str__(self) -> str:
     def __str__(self) -> str:
         return yaml.dump(self.dic)
         return yaml.dump(self.dic)
+
+    @property
+    def val_transforms(self) -> list:
+        """Get val_transform from val_dataset"""
+        _val_dataset = self.val_dataset_config
+        if not _val_dataset:
+            return []
+        _transforms = _val_dataset.get('transforms', [])
+        transforms = []
+        for i in _transforms:
+            transforms.append(self._load_object(i))
+        return transforms
+
+    def check_sync_info(self) -> None:
+        """
+        Check and sync the info, such as num_classes and img_channels, 
+        between the config of model, train_dataset and val_dataset.
+        """
+        self._check_sync_num_classes()
+        self._check_sync_img_channels()
+
+    def _check_sync_num_classes(self):
+        num_classes_set = set()
+
+        if self.dic['model'].get('num_classes', None) is not None:
+            num_classes_set.add(self.dic['model'].get('num_classes'))
+        if self.train_dataset_config:
+            if hasattr(self.train_dataset_class, 'NUM_CLASSES'):
+                num_classes_set.add(self.train_dataset_class.NUM_CLASSES)
+            elif 'num_classes' in self.train_dataset_config:
+                num_classes_set.add(self.train_dataset_config['num_classes'])
+        if self.val_dataset_config:
+            if hasattr(self.val_dataset_class, 'NUM_CLASSES'):
+                num_classes_set.add(self.val_dataset_class.NUM_CLASSES)
+            elif 'num_classes' in self.val_dataset_config:
+                num_classes_set.add(self.val_dataset_config['num_classes'])
+
+        if len(num_classes_set) == 0:
+            raise ValueError(
+                '`num_classes` is not found. Please set it in model, train_dataset or val_dataset'
+            )
+        elif len(num_classes_set) > 1:
+            raise ValueError(
+                '`num_classes` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
+                .format(num_classes_set))
+
+        num_classes = num_classes_set.pop()
+        self.dic['model']['num_classes'] = num_classes
+        if self.train_dataset_config and \
+            (not hasattr(self.train_dataset_class, 'NUM_CLASSES')):
+            self.dic['train_dataset']['num_classes'] = num_classes
+        if self.val_dataset_config and \
+            (not hasattr(self.val_dataset_class, 'NUM_CLASSES')):
+            self.dic['val_dataset']['num_classes'] = num_classes
+
+    def _check_sync_img_channels(self):
+        img_channels_set = set()
+        model_cfg = self.dic['model']
+
+        # If the model has backbone, in_channels is the input params of backbone.
+        # Otherwise, in_channels is the input params of the model.
+        if 'backbone' in model_cfg:
+            x = model_cfg['backbone'].get('in_channels', None)
+            if x is not None:
+                img_channels_set.add(x)
+        elif model_cfg.get('in_channels', None) is not None:
+            img_channels_set.add(model_cfg.get('in_channels'))
+        if self.train_dataset_config and \
+            ('img_channels' in self.train_dataset_config):
+            img_channels_set.add(self.train_dataset_config['img_channels'])
+        if self.val_dataset_config and \
+            ('img_channels' in self.val_dataset_config):
+            img_channels_set.add(self.val_dataset_config['img_channels'])
+
+        if len(img_channels_set) > 1:
+            raise ValueError(
+                '`img_channels` is not consistent: {}. Please set it consistently in model or train_dataset or val_dataset'
+                .format(img_channels_set))
+
+        img_channels = 3 if len(img_channels_set) == 0 \
+            else img_channels_set.pop()
+        if 'backbone' in model_cfg:
+            self.dic['model']['backbone']['in_channels'] = img_channels
+        else:
+            self.dic['model']['in_channels'] = img_channels
+        if self.train_dataset_config and \
+            self.train_dataset_config['type'] == "Dataset":
+            self.dic['train_dataset']['img_channels'] = img_channels
+        if self.val_dataset_config and \
+            self.val_dataset_config['type'] == "Dataset":
+            self.dic['val_dataset']['img_channels'] = img_channels

+ 2 - 2
paddlers/models/ppseg/cvlibs/manager.py

@@ -31,7 +31,7 @@ class ComponentManager:
 
 
     Examples 1:
     Examples 1:
 
 
-        from paddlers.models.ppseg.cvlibs.manager import ComponentManager
+        from paddleseg.cvlibs.manager import ComponentManager
 
 
         model_manager = ComponentManager()
         model_manager = ComponentManager()
 
 
@@ -49,7 +49,7 @@ class ComponentManager:
     Examples 2:
     Examples 2:
 
 
         # Or an easier way, using it as a Python decorator, while just add it above the class declaration.
         # Or an easier way, using it as a Python decorator, while just add it above the class declaration.
-        from paddlers.models.ppseg.cvlibs.manager import ComponentManager
+        from paddleseg.cvlibs.manager import ComponentManager
 
 
         model_manager = ComponentManager()
         model_manager = ComponentManager()
 
 

+ 30 - 4
paddlers/models/ppseg/cvlibs/param_init.py

@@ -24,7 +24,7 @@ def constant_init(param, **kwargs):
 
 
     Examples:
     Examples:
 
 
-        from paddlers.models.ppseg.cvlibs import param_init
+        from paddleseg.cvlibs import param_init
         import paddle.nn as nn
         import paddle.nn as nn
 
 
         linear = nn.Linear(2, 4)
         linear = nn.Linear(2, 4)
@@ -46,7 +46,7 @@ def normal_init(param, **kwargs):
 
 
     Examples:
     Examples:
 
 
-        from paddlers.models.ppseg.cvlibs import param_init
+        from paddleseg.cvlibs import param_init
         import paddle.nn as nn
         import paddle.nn as nn
 
 
         linear = nn.Linear(2, 4)
         linear = nn.Linear(2, 4)
@@ -79,7 +79,7 @@ def kaiming_normal_init(param, **kwargs):
 
 
     Examples:
     Examples:
 
 
-        from paddlers.models.ppseg.cvlibs import param_init
+        from paddleseg.cvlibs import param_init
         import paddle.nn as nn
         import paddle.nn as nn
 
 
         linear = nn.Linear(2, 4)
         linear = nn.Linear(2, 4)
@@ -109,7 +109,7 @@ def kaiming_uniform(param, **kwargs):
 
 
     Examples:
     Examples:
 
 
-        from paddlers.models.ppseg.cvlibs import param_init
+        from paddleseg.cvlibs import param_init
         import paddle.nn as nn
         import paddle.nn as nn
 
 
         linear = nn.Linear(2, 4)
         linear = nn.Linear(2, 4)
@@ -118,3 +118,29 @@ def kaiming_uniform(param, **kwargs):
 
 
     initializer = nn.initializer.KaimingUniform(**kwargs)
     initializer = nn.initializer.KaimingUniform(**kwargs)
     initializer(param, param.block)
     initializer(param, param.block)
+
+
+def xavier_uniform(param, **kwargs):
+    r"""
+    This implements the Xavier weight initializer from the paper
+    `Understanding the difficulty of training deep feedforward neural
+    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
+    by Xavier Glorot and Yoshua Bengio.
+    This initializer is designed to keep the scale of the gradients
+    approximately same in all the layers. In case of Uniform distribution,
+    the range is [-x, x], where
+    .. math::
+        x = \sqrt{\frac{6.0}{fan\_in + fan\_out}}
+    Args:
+        param (Tensor): Tensor that needs to be initialized.
+
+    Examples:
+
+        from paddleseg.cvlibs import param_init
+        import paddle.nn as nn
+
+        linear = nn.Linear(2, 4)
+        param_init.xavier_uniform(linear.weight)
+    """
+    initializer = nn.initializer.XavierUniform(**kwargs)
+    initializer(param, param.block)

+ 1 - 0
paddlers/models/ppseg/datasets/__init__.py

@@ -27,3 +27,4 @@ from .drive import DRIVE
 from .hrf import HRF
 from .hrf import HRF
 from .chase_db1 import CHASEDB1
 from .chase_db1 import CHASEDB1
 from .pp_humanseg14k import PPHumanSeg14K
 from .pp_humanseg14k import PPHumanSeg14K
+from .pssl import PSSLDataset

+ 22 - 14
paddlers/models/ppseg/datasets/ade.py

@@ -17,12 +17,12 @@ import os
 import numpy as np
 import numpy as np
 from PIL import Image
 from PIL import Image
 
 
-from paddlers.models.ppseg.datasets import Dataset
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
-import paddlers.models.ppseg.transforms.functional as F
+from paddleseg.datasets import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+import paddleseg.transforms.functional as F
 
 
 URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"
 URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"
 
 
@@ -89,23 +89,31 @@ class ADE20K(Dataset):
             self.file_list.append([img_path, label_path])
             self.file_list.append([img_path, label_path])
 
 
     def __getitem__(self, idx):
     def __getitem__(self, idx):
+        data = {}
+        data['trans_info'] = []
         image_path, label_path = self.file_list[idx]
         image_path, label_path = self.file_list[idx]
+        data['img'] = image_path
+        data['gt_fields'] = [
+        ]  # If key in gt_fields, the data[key] have transforms synchronous.
+
         if self.mode == 'val':
         if self.mode == 'val':
-            im, _ = self.transforms(im=image_path)
+            data = self.transforms(data)
             label = np.asarray(Image.open(label_path))
             label = np.asarray(Image.open(label_path))
             # The class 0 is ignored. And it will equal to 255 after
             # The class 0 is ignored. And it will equal to 255 after
             # subtracted 1, because the dtype of label is uint8.
             # subtracted 1, because the dtype of label is uint8.
             label = label - 1
             label = label - 1
             label = label[np.newaxis, :, :]
             label = label[np.newaxis, :, :]
-            return im, label
+            data['label'] = label
+            return data
         else:
         else:
-            im, label = self.transforms(im=image_path, label=label_path)
-            label = label - 1
+            data['label'] = label_path
+            data['gt_fields'].append('label')
+            data = self.transforms(data)
+            data['label'] = data['label'] - 1
             # Recover the ignore pixels adding by transform
             # Recover the ignore pixels adding by transform
-            label[label == 254] = 255
+            data['label'][data['label'] == 254] = 255
             if self.edge:
             if self.edge:
                 edge_mask = F.mask_to_binary_edge(
                 edge_mask = F.mask_to_binary_edge(
                     label, radius=2, num_classes=self.num_classes)
                     label, radius=2, num_classes=self.num_classes)
-                return im, label, edge_mask
-            else:
-                return im, label
+                data['edge'] = edge_mask
+            return data

+ 5 - 5
paddlers/models/ppseg/datasets/chase_db1.py

@@ -14,11 +14,11 @@
 
 
 import os
 import os
 
 
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
-from paddlers.models.ppseg.datasets import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+from paddleseg.datasets import Dataset
 
 
 URL = 'https://bj.bcebos.com/paddleseg/dataset/chase_db1/chase_db1.zip'
 URL = 'https://bj.bcebos.com/paddleseg/dataset/chase_db1/chase_db1.zip'
 
 

+ 3 - 3
paddlers/models/ppseg/datasets/cityscapes.py

@@ -15,9 +15,9 @@
 import os
 import os
 import glob
 import glob
 
 
-from paddlers.models.ppseg.datasets import Dataset
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
+from paddleseg.datasets import Dataset
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
 
 
 
 
 @manager.DATASETS.add_component
 @manager.DATASETS.add_component

+ 3 - 3
paddlers/models/ppseg/datasets/cocostuff.py

@@ -15,9 +15,9 @@
 import os
 import os
 import glob
 import glob
 
 
-from paddlers.models.ppseg.datasets import Dataset
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
+from paddleseg.datasets import Dataset
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
 
 
 
 
 @manager.DATASETS.add_component
 @manager.DATASETS.add_component

+ 35 - 27
paddlers/models/ppseg/datasets/dataset.py

@@ -18,9 +18,9 @@ import paddle
 import numpy as np
 import numpy as np
 from PIL import Image
 from PIL import Image
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
-import paddlers.models.ppseg.transforms.functional as F
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+import paddleseg.transforms.functional as F
 
 
 
 
 @manager.DATASETS.add_component
 @manager.DATASETS.add_component
@@ -46,10 +46,10 @@ class Dataset(paddle.io.Dataset):
 
 
         Examples:
         Examples:
 
 
-            import paddlers.models.ppseg.transforms as T
-            from paddlers.models.ppseg.datasets import Dataset
+            import paddleseg.transforms as T
+            from paddleseg.datasets import Dataset
 
 
-            transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
+            transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
             dataset_root = 'dataset_root_path'
             dataset_root = 'dataset_root_path'
             train_path = 'train_path'
             train_path = 'train_path'
             num_classes = 2
             num_classes = 2
@@ -62,10 +62,11 @@ class Dataset(paddle.io.Dataset):
     """
     """
 
 
     def __init__(self,
     def __init__(self,
-                 transforms,
+                 mode,
                  dataset_root,
                  dataset_root,
+                 transforms,
                  num_classes,
                  num_classes,
-                 mode='train',
+                 img_channels=3,
                  train_path=None,
                  train_path=None,
                  val_path=None,
                  val_path=None,
                  test_path=None,
                  test_path=None,
@@ -73,10 +74,11 @@ class Dataset(paddle.io.Dataset):
                  ignore_index=255,
                  ignore_index=255,
                  edge=False):
                  edge=False):
         self.dataset_root = dataset_root
         self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
+        self.transforms = Compose(transforms, img_channels=img_channels)
         self.file_list = list()
         self.file_list = list()
         self.mode = mode.lower()
         self.mode = mode.lower()
         self.num_classes = num_classes
         self.num_classes = num_classes
+        self.img_channels = img_channels
         self.ignore_index = ignore_index
         self.ignore_index = ignore_index
         self.edge = edge
         self.edge = edge
 
 
@@ -84,13 +86,18 @@ class Dataset(paddle.io.Dataset):
             raise ValueError(
             raise ValueError(
                 "mode should be 'train', 'val' or 'test', but got {}.".format(
                 "mode should be 'train', 'val' or 'test', but got {}.".format(
                     self.mode))
                     self.mode))
-
-        if self.transforms is None:
-            raise ValueError("`transforms` is necessary, but it is None.")
-
         if not os.path.exists(self.dataset_root):
         if not os.path.exists(self.dataset_root):
             raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
             raise FileNotFoundError('there is not `dataset_root`: {}.'.format(
                 self.dataset_root))
                 self.dataset_root))
+        if self.transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+        if num_classes < 1:
+            raise ValueError(
+                "`num_classes` should be greater than 1, but got {}".format(
+                    num_classes))
+        if img_channels not in [1, 3]:
+            raise ValueError("`img_channels` should in [1, 3], but got {}".
+                             format(img_channels))
 
 
         if self.mode == 'train':
         if self.mode == 'train':
             if train_path is None:
             if train_path is None:
@@ -139,24 +146,25 @@ class Dataset(paddle.io.Dataset):
                 self.file_list.append([image_path, label_path])
                 self.file_list.append([image_path, label_path])
 
 
     def __getitem__(self, idx):
     def __getitem__(self, idx):
+        data = {}
+        data['trans_info'] = []
         image_path, label_path = self.file_list[idx]
         image_path, label_path = self.file_list[idx]
-        if self.mode == 'test':
-            im, _ = self.transforms(im=image_path)
-            im = im[np.newaxis, ...]
-            return im, image_path
-        elif self.mode == 'val':
-            im, _ = self.transforms(im=image_path)
-            label = np.asarray(Image.open(label_path))
-            label = label[np.newaxis, :, :]
-            return im, label
+        data['img'] = image_path
+        data['label'] = label_path
+        # If key in gt_fields, the data[key] have transforms synchronous.
+        data['gt_fields'] = []
+        if self.mode == 'val':
+            data = self.transforms(data)
+            data['label'] = data['label'][np.newaxis, :, :]
+
         else:
         else:
-            im, label = self.transforms(im=image_path, label=label_path)
+            data['gt_fields'].append('label')
+            data = self.transforms(data)
             if self.edge:
             if self.edge:
                 edge_mask = F.mask_to_binary_edge(
                 edge_mask = F.mask_to_binary_edge(
-                    label, radius=2, num_classes=self.num_classes)
-                return im, label, edge_mask
-            else:
-                return im, label
+                    data['label'], radius=2, num_classes=self.num_classes)
+                data['edge'] = edge_mask
+        return data
 
 
     def __len__(self):
     def __len__(self):
         return len(self.file_list)
         return len(self.file_list)

+ 5 - 5
paddlers/models/ppseg/datasets/drive.py

@@ -14,11 +14,11 @@
 
 
 import os
 import os
 
 
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
-from paddlers.models.ppseg.datasets import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+from paddleseg.datasets import Dataset
 
 
 URL = 'https://bj.bcebos.com/paddleseg/dataset/drive/drive.zip'
 URL = 'https://bj.bcebos.com/paddleseg/dataset/drive/drive.zip'
 
 

+ 6 - 6
paddlers/models/ppseg/datasets/eg1800.py

@@ -18,12 +18,12 @@ import copy
 import cv2
 import cv2
 import numpy as np
 import numpy as np
 
 
-from paddlers.models.ppseg.datasets import Dataset
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-import paddlers.models.ppseg.transforms.functional as F
+from paddleseg.datasets import Dataset
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+import paddleseg.transforms.functional as F
 
 
 URL = "https://paddleseg.bj.bcebos.com/dataset/EG1800.zip"
 URL = "https://paddleseg.bj.bcebos.com/dataset/EG1800.zip"
 
 

+ 5 - 5
paddlers/models/ppseg/datasets/hrf.py

@@ -14,11 +14,11 @@
 
 
 import os
 import os
 
 
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
-from paddlers.models.ppseg.datasets import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+from paddleseg.datasets import Dataset
 
 
 URL = 'https://bj.bcebos.com/paddleseg/dataset/hrf/hrf.zip'
 URL = 'https://bj.bcebos.com/paddleseg/dataset/hrf/hrf.zip'
 
 

+ 4 - 4
paddlers/models/ppseg/datasets/mini_deep_globe_road_extraction.py

@@ -15,10 +15,10 @@
 import os
 import os
 
 
 from .dataset import Dataset
 from .dataset import Dataset
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
 
 
 URL = "https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip"
 URL = "https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip"
 
 

+ 4 - 4
paddlers/models/ppseg/datasets/optic_disc_seg.py

@@ -15,10 +15,10 @@
 import os
 import os
 
 
 from .dataset import Dataset
 from .dataset import Dataset
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
 
 
 URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
 URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
 
 

+ 3 - 3
paddlers/models/ppseg/datasets/pascal_context.py

@@ -15,9 +15,9 @@
 import os
 import os
 
 
 from PIL import Image
 from PIL import Image
-from paddlers.models.ppseg.datasets import Dataset
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
+from paddleseg.datasets import Dataset
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
 
 
 
 
 @manager.DATASETS.add_component
 @manager.DATASETS.add_component

+ 2 - 2
paddlers/models/ppseg/datasets/pp_humanseg14k.py

@@ -15,8 +15,8 @@
 import os
 import os
 
 
 from .dataset import Dataset
 from .dataset import Dataset
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
 
 
 
 
 @manager.DATASETS.add_component
 @manager.DATASETS.add_component

+ 135 - 0
paddlers/models/ppseg/datasets/pssl.py

@@ -0,0 +1,135 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+from paddleseg.datasets import Dataset
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+
+@manager.DATASETS.add_component
+class PSSLDataset(Dataset):
+    """
+    The PSSL dataset for segmentation. PSSL is short for Pseudo Semantic Segmentation Labels, where the pseudo label
+    is computed by the Consensus explanation algorithm.
+
+    The PSSL refers to "Distilling Ensemble of Explanations for Weakly-Supervised Pre-Training of Image Segmentation 
+    Models" (https://arxiv.org/abs/2207.03335). 
+    
+    The Consensus explanation refers to "Cross-Model Consensus of Explanations and Beyond for Image Classification 
+    Models: An Empirical Study" (https://arxiv.org/abs/2109.00707).
+
+    To use this dataset, we need to additionally prepare the orignal ImageNet dataset, which has the folder structure
+    as follows:
+
+        imagenet_root
+        |
+        |--train
+        |  |--n01440764
+        |  |  |--n01440764_10026.JPEG
+        |  |  |--...
+        |  |--nxxxxxxxx
+        |  |--...
+
+    where only the "train" set is needed.
+
+    The PSSL dataset has the folder structure as follows:
+
+        pssl_root
+        |
+        |--train
+        |  |--n01440764
+        |  |  |--n01440764_10026.JPEG_eiseg.npz
+        |  |  |--...
+        |  |--nxxxxxxxx
+        |  |--...
+        |
+        |--imagenet_lsvrc_2015_synsets.txt
+        |--train.txt
+
+    where "train.txt" and "imagenet_lsvrc_2015_synsets.txt" are included in the PSSL dataset.
+
+    Args:
+        transforms (list): Transforms for image.
+        imagenet_root (str): The path to the original ImageNet dataset.
+        pssl_root (str): The path to the PSSL dataset.
+        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        edge (bool, optional): Whether to compute edge while training. Default: False.
+    """
+    ignore_index = 1001  # 0~999 is target class, 1000 is bg
+    NUM_CLASSES = 1001  # consider target class and bg
+
+    def __init__(self,
+                 transforms,
+                 imagenet_root,
+                 pssl_root,
+                 mode='train',
+                 edge=False):
+        mode = mode.lower()
+        if mode not in ['train']:
+            raise ValueError("mode should be 'train', but got {}.".format(mode))
+        if transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+
+        self.transforms = Compose(transforms)
+        self.mode = mode
+        self.edge = edge
+
+        self.num_classes = self.NUM_CLASSES
+        self.ignore_index = self.num_classes  # 1001
+        self.file_list = []
+        self.class_id_dict = {}
+
+        if imagenet_root is None or not os.path.isdir(pssl_root):
+            raise ValueError(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+
+        train_list_file = os.path.join(pssl_root, "train.txt")
+        if not os.path.exists(train_list_file):
+            raise ValueError("Train list file isn't exists.")
+        for idx, line in enumerate(open(train_list_file)):
+            # line: train/n04118776/n04118776_45912.JPEG_eiseg.npz
+            label_path = line.strip()
+            img_path = label_path.split('.JPEG')[0] + '.JPEG'
+            label_path = os.path.join(pssl_root, label_path)
+            img_path = os.path.join(imagenet_root, img_path)
+            self.file_list.append([img_path, label_path])
+
+        # mapping class name to class id.
+        class_id_file = os.path.join(pssl_root,
+                                     "imagenet_lsvrc_2015_synsets.txt")
+        if not os.path.exists(class_id_file):
+            raise ValueError("Class id file isn't exists.")
+        for idx, line in enumerate(open(class_id_file)):
+            class_name = line.strip()
+            self.class_id_dict[class_name] = idx
+
+    def __getitem__(self, idx):
+        image_path, label_path = self.file_list[idx]
+
+        # transform label
+        class_name = (image_path.split('/')[-1]).split('_')[0]
+        class_id = self.class_id_dict[class_name]
+
+        pssl_seg = np.load(label_path)['arr_0']
+        gt_semantic_seg = np.zeros_like(pssl_seg, dtype=np.int64) + 1000
+        # [0, 999] for imagenet classes, 1000 for background, others(-1) will be ignored during training.
+        gt_semantic_seg[pssl_seg == 1] = class_id
+
+        im, label = self.transforms(im=image_path, label=gt_semantic_seg)
+
+        return im, label

+ 5 - 5
paddlers/models/ppseg/datasets/stare.py

@@ -14,11 +14,11 @@
 
 
 import os
 import os
 
 
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
-from paddlers.models.ppseg.datasets import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+from paddleseg.datasets import Dataset
 
 
 URL = 'https://bj.bcebos.com/paddleseg/dataset/stare/stare.zip'
 URL = 'https://bj.bcebos.com/paddleseg/dataset/stare/stare.zip'
 
 

+ 6 - 6
paddlers/models/ppseg/datasets/supervisely.py

@@ -18,12 +18,12 @@ import copy
 import cv2
 import cv2
 import numpy as np
 import numpy as np
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
-from paddlers.models.ppseg.datasets import Dataset
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-import paddlers.models.ppseg.transforms.functional as F
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+from paddleseg.datasets import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+import paddleseg.transforms.functional as F
 
 
 URL = "https://paddleseg.bj.bcebos.com/dataset/Supervisely_face.zip"
 URL = "https://paddleseg.bj.bcebos.com/dataset/Supervisely_face.zip"
 
 

+ 5 - 5
paddlers/models/ppseg/datasets/voc.py

@@ -14,11 +14,11 @@
 
 
 import os
 import os
 
 
-from paddlers.models.ppseg.datasets import Dataset
-from paddlers.models.ppseg.utils.download import download_file_and_uncompress
-from paddlers.models.ppseg.utils import seg_env
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.transforms import Compose
+from paddleseg.datasets import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.utils import seg_env
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
 
 
 URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
 URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
 
 

+ 9 - 0
paddlers/models/ppseg/models/__init__.py

@@ -49,9 +49,18 @@ from .segnet import SegNet
 from .encnet import ENCNet
 from .encnet import ENCNet
 from .hrnet_contrast import HRNetW48Contrast
 from .hrnet_contrast import HRNetW48Contrast
 from .espnet import ESPNetV2
 from .espnet import ESPNetV2
+from .pp_liteseg import PPLiteSeg
 from .dmnet import DMNet
 from .dmnet import DMNet
 from .espnetv1 import ESPNetV1
 from .espnetv1 import ESPNetV1
 from .enet import ENet
 from .enet import ENet
 from .bisenetv1 import BiseNetV1
 from .bisenetv1 import BiseNetV1
 from .fastfcn import FastFCN
 from .fastfcn import FastFCN
 from .pfpnnet import PFPNNet
 from .pfpnnet import PFPNNet
+from .glore import GloRe
+from .ddrnet import DDRNet_23
+from .ccnet import CCNet
+from .mobileseg import MobileSeg
+from .upernet import UPerNet
+from .sinet import SINet
+from .lraspp import LRASPP
+from .topformer import TopFormer

+ 3 - 3
paddlers/models/ppseg/models/ann.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 6 - 6
paddlers/models/ppseg/models/attention_unet.py

@@ -14,9 +14,9 @@
 
 
 import paddle
 import paddle
 import paddle.nn as nn
 import paddle.nn as nn
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg import utils
 import numpy as np
 import numpy as np
 
 
 
 
@@ -35,13 +35,13 @@ class AttentionUNet(nn.Layer):
 
 
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
     """
     """
 
 
-    def __init__(self, num_classes, pretrained=None):
+    def __init__(self, num_classes, in_channels=3, pretrained=None):
         super().__init__()
         super().__init__()
-        n_channels = 3
-        self.encoder = Encoder(n_channels, [64, 128, 256, 512])
+        self.encoder = Encoder(in_channels, [64, 128, 256, 512])
         filters = np.array([64, 128, 256, 512, 1024])
         filters = np.array([64, 128, 256, 512, 1024])
         self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
         self.up5 = UpConv(ch_in=filters[4], ch_out=filters[3])
         self.att5 = AttentionBlock(
         self.att5 = AttentionBlock(

+ 4 - 0
paddlers/models/ppseg/models/backbones/__init__.py

@@ -21,3 +21,7 @@ from .swin_transformer import *
 from .mobilenetv2 import *
 from .mobilenetv2 import *
 from .mix_transformer import *
 from .mix_transformer import *
 from .stdcnet import *
 from .stdcnet import *
+from .lite_hrnet import *
+from .shufflenetv2 import *
+from .ghostnet import *
+from .top_transformer import *

+ 318 - 0
paddlers/models/ppseg/models/backbones/ghostnet.py

@@ -0,0 +1,318 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
+
+import math
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Uniform, KaimingNormal
+
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils, logger
+
+__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 groups=1,
+                 act="relu",
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=KaimingNormal(), name=name + "_weights"),
+            bias_attr=False)
+        bn_name = name + "_bn"
+
+        self._batch_norm = BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(
+                name=bn_name + "_scale", regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(
+                name=bn_name + "_offset", regularizer=L2Decay(0.0)),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class SEBlock(nn.Layer):
+    def __init__(self, num_channels, reduction_ratio=4, name=None):
+        super(SEBlock, self).__init__()
+        self.pool2d_gap = AdaptiveAvgPool2D(1)
+        self._num_channels = num_channels
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        med_ch = num_channels // reduction_ratio
+        self.squeeze = Linear(
+            num_channels,
+            med_ch,
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
+            bias_attr=ParamAttr(name=name + "_1_offset"))
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = Linear(
+            med_ch,
+            num_channels,
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
+            bias_attr=ParamAttr(name=name + "_2_offset"))
+
+    def forward(self, inputs):
+        pool = self.pool2d_gap(inputs)
+        pool = paddle.squeeze(pool, axis=[2, 3])
+        squeeze = self.squeeze(pool)
+        squeeze = F.relu(squeeze)
+        excitation = self.excitation(squeeze)
+        excitation = paddle.clip(x=excitation, min=0, max=1)
+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
+        out = paddle.multiply(inputs, excitation)
+        return out
+
+
+class GhostModule(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 output_channels,
+                 kernel_size=1,
+                 ratio=2,
+                 dw_size=3,
+                 stride=1,
+                 relu=True,
+                 name=None):
+        super(GhostModule, self).__init__()
+        init_channels = int(math.ceil(output_channels / ratio))
+        new_channels = int(init_channels * (ratio - 1))
+        self.primary_conv = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=init_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            groups=1,
+            act="relu" if relu else None,
+            name=name + "_primary_conv")
+        self.cheap_operation = ConvBNLayer(
+            in_channels=init_channels,
+            out_channels=new_channels,
+            kernel_size=dw_size,
+            stride=1,
+            groups=init_channels,
+            act="relu" if relu else None,
+            name=name + "_cheap_operation")
+
+    def forward(self, inputs):
+        x = self.primary_conv(inputs)
+        y = self.cheap_operation(x)
+        out = paddle.concat([x, y], axis=1)
+        return out
+
+
+class GhostBottleneck(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 hidden_dim,
+                 output_channels,
+                 kernel_size,
+                 stride,
+                 use_se,
+                 name=None):
+        super(GhostBottleneck, self).__init__()
+        self._stride = stride
+        self._use_se = use_se
+        self._num_channels = in_channels
+        self._output_channels = output_channels
+        self.ghost_module_1 = GhostModule(
+            in_channels=in_channels,
+            output_channels=hidden_dim,
+            kernel_size=1,
+            stride=1,
+            relu=True,
+            name=name + "_ghost_module_1")
+        if stride == 2:
+            self.depthwise_conv = ConvBNLayer(
+                in_channels=hidden_dim,
+                out_channels=hidden_dim,
+                kernel_size=kernel_size,
+                stride=stride,
+                groups=hidden_dim,
+                act=None,
+                name=name +
+                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
+            )
+        if use_se:
+            self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
+        self.ghost_module_2 = GhostModule(
+            in_channels=hidden_dim,
+            output_channels=output_channels,
+            kernel_size=1,
+            relu=False,
+            name=name + "_ghost_module_2")
+        if stride != 1 or in_channels != output_channels:
+            self.shortcut_depthwise = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                groups=in_channels,
+                act=None,
+                name=name +
+                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
+            )
+            self.shortcut_conv = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=output_channels,
+                kernel_size=1,
+                stride=1,
+                groups=1,
+                act=None,
+                name=name + "_shortcut_conv")
+
+    def forward(self, inputs):
+        x = self.ghost_module_1(inputs)
+        if self._stride == 2:
+            x = self.depthwise_conv(x)
+        if self._use_se:
+            x = self.se_block(x)
+        x = self.ghost_module_2(x)
+        if self._stride == 1 and self._num_channels == self._output_channels:
+            shortcut = inputs
+        else:
+            shortcut = self.shortcut_depthwise(inputs)
+            shortcut = self.shortcut_conv(shortcut)
+        return paddle.add(x=x, y=shortcut)
+
+
+class GhostNet(nn.Layer):
+    def __init__(self, scale, in_channels=3, pretrained=None):
+        super(GhostNet, self).__init__()
+        self.cfgs = [
+            # k, t, c, SE, s
+            [3, 16, 16, 0, 1],
+            [3, 48, 24, 0, 2],
+            [3, 72, 24, 0, 1],  # x4
+            [5, 72, 40, 1, 2],
+            [5, 120, 40, 1, 1],  # x8
+            [3, 240, 80, 0, 2],
+            [3, 200, 80, 0, 1],
+            [3, 184, 80, 0, 1],
+            [3, 184, 80, 0, 1],
+            [3, 480, 112, 1, 1],
+            [3, 672, 112, 1, 1],  # x16
+            [5, 672, 160, 1, 2],
+            [5, 960, 160, 0, 1],
+            [5, 960, 160, 1, 1],
+            [5, 960, 160, 0, 1],
+            [5, 960, 160, 1, 1]  # x32
+        ]
+        self.scale = scale
+        self.pretrained = pretrained
+
+        output_channels = int(self._make_divisible(16 * self.scale, 4))
+        self.conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=output_channels,
+            kernel_size=3,
+            stride=2,
+            groups=1,
+            act="relu",
+            name="conv1")
+
+        # build inverted residual blocks
+        self.out_index = [2, 4, 10, 15]
+        self.feat_channels = []
+        self.ghost_bottleneck_list = []
+        for idx, (k, exp_size, c, use_se, s) in enumerate(self.cfgs):
+            in_channels = output_channels
+            output_channels = int(self._make_divisible(c * self.scale, 4))
+            hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
+            ghost_bottleneck = self.add_sublayer(
+                name="_ghostbottleneck_" + str(idx),
+                sublayer=GhostBottleneck(
+                    in_channels=in_channels,
+                    hidden_dim=hidden_dim,
+                    output_channels=output_channels,
+                    kernel_size=k,
+                    stride=s,
+                    use_se=use_se,
+                    name="_ghostbottleneck_" + str(idx)))
+            self.ghost_bottleneck_list.append(ghost_bottleneck)
+            if idx in self.out_index:
+                self.feat_channels.append(output_channels)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, inputs):
+        feat_list = []
+        x = self.conv1(inputs)
+        for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
+            x = ghost_bottleneck(x)
+            if idx in self.out_index:
+                feat_list.append(x)
+        return feat_list
+
+    def _make_divisible(self, v, divisor, min_value=None):
+        """
+        This function is taken from the original tf repo.
+        It ensures that all layers have a channel number that is divisible by 8
+        It can be seen here:
+        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+        """
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+        # Make sure that round down does not go down by more than 10%.
+        if new_v < 0.9 * v:
+            new_v += divisor
+        return new_v
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x0_5(**kwargs):
+    model = GhostNet(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x1_0(**kwargs):
+    model = GhostNet(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def GhostNet_x1_3(**kwargs):
+    model = GhostNet(scale=1.3, **kwargs)
+    return model

+ 6 - 4
paddlers/models/ppseg/models/backbones/hrnet.py

@@ -18,9 +18,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager, param_init
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 __all__ = [
 __all__ = [
     "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
     "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
@@ -37,6 +37,7 @@ class HRNet(nn.Layer):
     (https://arxiv.org/pdf/1908.07919.pdf).
     (https://arxiv.org/pdf/1908.07919.pdf).
 
 
     Args:
     Args:
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path of pretrained model.
         pretrained (str, optional): The path of pretrained model.
         stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
         stage1_num_modules (int, optional): Number of modules for stage1. Default 1.
         stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
         stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4).
@@ -56,6 +57,7 @@ class HRNet(nn.Layer):
     """
     """
 
 
     def __init__(self,
     def __init__(self,
+                 in_channels=3,
                  pretrained=None,
                  pretrained=None,
                  stage1_num_modules=1,
                  stage1_num_modules=1,
                  stage1_num_blocks=(4, ),
                  stage1_num_blocks=(4, ),
@@ -91,7 +93,7 @@ class HRNet(nn.Layer):
         self.feat_channels = [sum(stage4_num_channels)]
         self.feat_channels = [sum(stage4_num_channels)]
 
 
         self.conv_layer1_1 = layers.ConvBNReLU(
         self.conv_layer1_1 = layers.ConvBNReLU(
-            in_channels=3,
+            in_channels=in_channels,
             out_channels=64,
             out_channels=64,
             kernel_size=3,
             kernel_size=3,
             stride=2,
             stride=2,

+ 974 - 0
paddlers/models/ppseg/models/backbones/lite_hrnet.py

@@ -0,0 +1,974 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on
+https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
+"""
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from numbers import Integral
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Normal, Constant
+
+from paddleseg.cvlibs import manager
+from paddleseg import utils
+
+__all__ = [
+    "Lite_HRNet_18", "Lite_HRNet_30", "Lite_HRNet_naive",
+    "Lite_HRNet_wider_naive", "LiteHRNet"
+]
+
+
+def Conv2d(in_channels,
+           out_channels,
+           kernel_size,
+           stride=1,
+           padding=0,
+           dilation=1,
+           groups=1,
+           bias=True,
+           weight_init=Normal(std=0.001),
+           bias_init=Constant(0.)):
+    weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
+    if bias:
+        bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
+    else:
+        bias_attr = False
+    conv = nn.Conv2D(
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups,
+        weight_attr=weight_attr,
+        bias_attr=bias_attr)
+    return conv
+
+
+def channel_shuffle(x, groups):
+    x_shape = paddle.shape(x)
+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
+    num_channels = x.shape[1]
+    channels_per_group = num_channels // groups
+
+    x = paddle.reshape(
+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
+    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
+    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
+
+    return x
+
+
+class ConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride=1,
+                 groups=1,
+                 norm_type=None,
+                 norm_groups=32,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 act=None):
+        super(ConvNormLayer, self).__init__()
+        self.act = act
+        norm_lr = 0. if freeze_norm else 1.
+        if norm_type is not None:
+            assert norm_type in ['bn', 'sync_bn', 'gn'], \
+                "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
+            param_attr = ParamAttr(
+                initializer=Constant(1.0),
+                learning_rate=norm_lr,
+                regularizer=L2Decay(norm_decay), )
+            bias_attr = ParamAttr(
+                learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
+            global_stats = True if freeze_norm else None
+            if norm_type in ['bn', 'sync_bn']:
+                self.norm = nn.BatchNorm2D(
+                    ch_out,
+                    weight_attr=param_attr,
+                    bias_attr=bias_attr,
+                    use_global_stats=global_stats, )
+            elif norm_type == 'gn':
+                self.norm = nn.GroupNorm(
+                    num_groups=norm_groups,
+                    num_channels=ch_out,
+                    weight_attr=param_attr,
+                    bias_attr=bias_attr)
+            norm_params = self.norm.parameters()
+            if freeze_norm:
+                for param in norm_params:
+                    param.stop_gradient = True
+            conv_bias_attr = False
+        else:
+            conv_bias_attr = True
+            self.norm = None
+
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=Normal(
+                mean=0., std=0.001)),
+            bias_attr=conv_bias_attr)
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        if self.norm is not None:
+            out = self.norm(out)
+
+        if self.act == 'relu':
+            out = F.relu(out)
+        elif self.act == 'sigmoid':
+            out = F.sigmoid(out)
+        return out
+
+
+class DepthWiseSeparableConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride=1,
+                 dw_norm_type=None,
+                 pw_norm_type=None,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 dw_act=None,
+                 pw_act=None):
+        super(DepthWiseSeparableConvNormLayer, self).__init__()
+        self.depthwise_conv = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=ch_in,
+            filter_size=filter_size,
+            stride=stride,
+            groups=ch_in,
+            norm_type=dw_norm_type,
+            act=dw_act,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm, )
+        self.pointwise_conv = ConvNormLayer(
+            ch_in=ch_in,
+            ch_out=ch_out,
+            filter_size=1,
+            stride=1,
+            norm_type=pw_norm_type,
+            act=pw_act,
+            norm_decay=norm_decay,
+            freeze_norm=freeze_norm, )
+
+    def forward(self, x):
+        x = self.depthwise_conv(x)
+        x = self.pointwise_conv(x)
+        return x
+
+
+class CrossResolutionWeightingModule(nn.Layer):
+    def __init__(self,
+                 channels,
+                 ratio=16,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(CrossResolutionWeightingModule, self).__init__()
+        self.channels = channels
+        total_channel = sum(channels)
+        self.conv1 = ConvNormLayer(
+            ch_in=total_channel,
+            ch_out=total_channel // ratio,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.conv2 = ConvNormLayer(
+            ch_in=total_channel // ratio,
+            ch_out=total_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='sigmoid',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        out = []
+        for idx, xi in enumerate(x[:-1]):
+            kernel_size = stride = pow(2, len(x) - idx - 1)
+            xi = F.avg_pool2d(xi, kernel_size=kernel_size, stride=stride)
+            out.append(xi)
+        out.append(x[-1])
+
+        out = paddle.concat(out, 1)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        out = paddle.split(out, self.channels, 1)
+        out = [
+            s * F.interpolate(
+                a, paddle.shape(s)[-2:], mode='nearest') for s, a in zip(x, out)
+        ]
+        return out
+
+
+class SpatialWeightingModule(nn.Layer):
+    def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
+        super(SpatialWeightingModule, self).__init__()
+        self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
+        self.conv1 = ConvNormLayer(
+            ch_in=in_channel,
+            ch_out=in_channel // ratio,
+            filter_size=1,
+            stride=1,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.conv2 = ConvNormLayer(
+            ch_in=in_channel // ratio,
+            ch_out=in_channel,
+            filter_size=1,
+            stride=1,
+            act='sigmoid',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        out = self.global_avgpooling(x)
+        out = self.conv1(out)
+        out = self.conv2(out)
+        return x * out
+
+
+class ConditionalChannelWeightingBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 stride,
+                 reduce_ratio,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(ConditionalChannelWeightingBlock, self).__init__()
+        assert stride in [1, 2]
+        branch_channels = [channel // 2 for channel in in_channels]
+
+        self.cross_resolution_weighting = CrossResolutionWeightingModule(
+            branch_channels,
+            ratio=reduce_ratio,
+            norm_type=norm_type,
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.depthwise_convs = nn.LayerList([
+            ConvNormLayer(
+                channel,
+                channel,
+                filter_size=3,
+                stride=stride,
+                groups=channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay) for channel in branch_channels
+        ])
+
+        self.spatial_weighting = nn.LayerList([
+            SpatialWeightingModule(
+                channel,
+                ratio=4,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay) for channel in branch_channels
+        ])
+
+    def forward(self, x):
+        x = [s.chunk(2, axis=1) for s in x]
+        x1 = [s[0] for s in x]
+        x2 = [s[1] for s in x]
+
+        x2 = self.cross_resolution_weighting(x2)
+        x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
+        x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
+
+        out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
+        out = [channel_shuffle(s, groups=2) for s in out]
+        return out
+
+
+class ShuffleUnit(nn.Layer):
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 stride,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(ShuffleUnit, self).__init__()
+        branch_channel = out_channel // 2
+        self.stride = stride
+        if self.stride == 1:
+            assert in_channel == branch_channel * 2, \
+                "when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
+        if stride > 1:
+            self.branch1 = nn.Sequential(
+                ConvNormLayer(
+                    ch_in=in_channel,
+                    ch_out=in_channel,
+                    filter_size=3,
+                    stride=self.stride,
+                    groups=in_channel,
+                    norm_type=norm_type,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay),
+                ConvNormLayer(
+                    ch_in=in_channel,
+                    ch_out=branch_channel,
+                    filter_size=1,
+                    stride=1,
+                    norm_type=norm_type,
+                    act='relu',
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay), )
+        self.branch2 = nn.Sequential(
+            ConvNormLayer(
+                ch_in=branch_channel if stride == 1 else in_channel,
+                ch_out=branch_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=3,
+                stride=self.stride,
+                groups=branch_channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay), )
+
+    def forward(self, x):
+        if self.stride > 1:
+            x1 = self.branch1(x)
+            x2 = self.branch2(x)
+        else:
+            x1, x2 = x.chunk(2, axis=1)
+            x2 = self.branch2(x2)
+        out = paddle.concat([x1, x2], axis=1)
+        out = channel_shuffle(out, groups=2)
+        return out
+
+
+class IterativeHead(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(IterativeHead, self).__init__()
+        num_branches = len(in_channels)
+        self.in_channels = in_channels[::-1]
+
+        projects = []
+        for i in range(num_branches):
+            if i != num_branches - 1:
+                projects.append(
+                    DepthWiseSeparableConvNormLayer(
+                        ch_in=self.in_channels[i],
+                        ch_out=self.in_channels[i + 1],
+                        filter_size=3,
+                        stride=1,
+                        dw_act=None,
+                        pw_act='relu',
+                        dw_norm_type=norm_type,
+                        pw_norm_type=norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+            else:
+                projects.append(
+                    DepthWiseSeparableConvNormLayer(
+                        ch_in=self.in_channels[i],
+                        ch_out=self.in_channels[i],
+                        filter_size=3,
+                        stride=1,
+                        dw_act=None,
+                        pw_act='relu',
+                        dw_norm_type=norm_type,
+                        pw_norm_type=norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+        self.projects = nn.LayerList(projects)
+
+    def forward(self, x):
+        x = x[::-1]
+        y = []
+        last_x = None
+        for i, s in enumerate(x):
+            if last_x is not None:
+                last_x = F.interpolate(
+                    last_x,
+                    size=paddle.shape(s)[-2:],
+                    mode='bilinear',
+                    align_corners=True)
+                s = s + last_x
+            s = self.projects[i](s)
+            y.append(s)
+            last_x = s
+
+        return y[::-1]
+
+
+class Stem(nn.Layer):
+    def __init__(self,
+                 in_channel,
+                 stem_channel,
+                 out_channel,
+                 expand_ratio,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(Stem, self).__init__()
+        self.conv1 = ConvNormLayer(
+            in_channel,
+            stem_channel,
+            filter_size=3,
+            stride=2,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        mid_channel = int(round(stem_channel * expand_ratio))
+        branch_channel = stem_channel // 2
+        if stem_channel == out_channel:
+            inc_channel = out_channel - branch_channel
+        else:
+            inc_channel = out_channel - stem_channel
+        self.branch1 = nn.Sequential(
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=branch_channel,
+                filter_size=3,
+                stride=2,
+                groups=branch_channel,
+                norm_type=norm_type,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay),
+            ConvNormLayer(
+                ch_in=branch_channel,
+                ch_out=inc_channel,
+                filter_size=1,
+                stride=1,
+                norm_type=norm_type,
+                act='relu',
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay), )
+        self.expand_conv = ConvNormLayer(
+            ch_in=branch_channel,
+            ch_out=mid_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.depthwise_conv = ConvNormLayer(
+            ch_in=mid_channel,
+            ch_out=mid_channel,
+            filter_size=3,
+            stride=2,
+            groups=mid_channel,
+            norm_type=norm_type,
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+        self.linear_conv = ConvNormLayer(
+            ch_in=mid_channel,
+            ch_out=branch_channel
+            if stem_channel == out_channel else stem_channel,
+            filter_size=1,
+            stride=1,
+            norm_type=norm_type,
+            act='relu',
+            freeze_norm=freeze_norm,
+            norm_decay=norm_decay)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x1, x2 = x.chunk(2, axis=1)
+        x1 = self.branch1(x1)
+        x2 = self.expand_conv(x2)
+        x2 = self.depthwise_conv(x2)
+        x2 = self.linear_conv(x2)
+        out = paddle.concat([x1, x2], axis=1)
+        out = channel_shuffle(out, groups=2)
+
+        return out
+
+
+class LiteHRNetModule(nn.Layer):
+    def __init__(self,
+                 num_branches,
+                 num_blocks,
+                 in_channels,
+                 reduce_ratio,
+                 module_type,
+                 multiscale_output=False,
+                 with_fuse=True,
+                 norm_type='bn',
+                 freeze_norm=False,
+                 norm_decay=0.):
+        super(LiteHRNetModule, self).__init__()
+        assert num_branches == len(in_channels),\
+            "num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
+        assert module_type in [
+            'LITE', 'NAIVE'
+        ], "module_type should be one of ['LITE', 'NAIVE']"
+        self.num_branches = num_branches
+        self.in_channels = in_channels
+        self.multiscale_output = multiscale_output
+        self.with_fuse = with_fuse
+        self.norm_type = 'bn'
+        self.module_type = module_type
+
+        if self.module_type == 'LITE':
+            self.layers = self._make_weighting_blocks(
+                num_blocks,
+                reduce_ratio,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay)
+        elif self.module_type == 'NAIVE':
+            self.layers = self._make_naive_branches(
+                num_branches,
+                num_blocks,
+                freeze_norm=freeze_norm,
+                norm_decay=norm_decay)
+
+        if self.with_fuse:
+            self.fuse_layers = self._make_fuse_layers(
+                freeze_norm=freeze_norm, norm_decay=norm_decay)
+            self.relu = nn.ReLU()
+
+    def _make_weighting_blocks(self,
+                               num_blocks,
+                               reduce_ratio,
+                               stride=1,
+                               freeze_norm=False,
+                               norm_decay=0.):
+        layers = []
+        for i in range(num_blocks):
+            layers.append(
+                ConditionalChannelWeightingBlock(
+                    self.in_channels,
+                    stride=stride,
+                    reduce_ratio=reduce_ratio,
+                    norm_type=self.norm_type,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay))
+        return nn.Sequential(*layers)
+
+    def _make_naive_branches(self,
+                             num_branches,
+                             num_blocks,
+                             freeze_norm=False,
+                             norm_decay=0.):
+        branches = []
+        for branch_idx in range(num_branches):
+            layers = []
+            for i in range(num_blocks):
+                layers.append(
+                    ShuffleUnit(
+                        self.in_channels[branch_idx],
+                        self.in_channels[branch_idx],
+                        stride=1,
+                        norm_type=self.norm_type,
+                        freeze_norm=freeze_norm,
+                        norm_decay=norm_decay))
+            branches.append(nn.Sequential(*layers))
+        return nn.LayerList(branches)
+
+    def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
+        if self.num_branches == 1:
+            return None
+        fuse_layers = []
+        num_out_branches = self.num_branches if self.multiscale_output else 1
+        for i in range(num_out_branches):
+            fuse_layer = []
+            for j in range(self.num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            Conv2d(
+                                self.in_channels[j],
+                                self.in_channels[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(self.in_channels[i]),
+                            nn.Upsample(
+                                scale_factor=2**(j - i), mode='nearest')))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv_downsamples = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        groups=self.in_channels[j],
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[i],
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0,
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[i])))
+                        else:
+                            conv_downsamples.append(
+                                nn.Sequential(
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=1,
+                                        groups=self.in_channels[j],
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    Conv2d(
+                                        self.in_channels[j],
+                                        self.in_channels[j],
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0,
+                                        bias=False, ),
+                                    nn.BatchNorm2D(self.in_channels[j]),
+                                    nn.ReLU()))
+
+                    fuse_layer.append(nn.Sequential(*conv_downsamples))
+            fuse_layers.append(nn.LayerList(fuse_layer))
+
+        return nn.LayerList(fuse_layers)
+
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.layers[0](x[0])]
+        if self.module_type == 'LITE':
+            out = self.layers(x)
+        elif self.module_type == 'NAIVE':
+            for i in range(self.num_branches):
+                x[i] = self.layers[i](x[i])
+            out = x
+        if self.with_fuse:
+            out_fuse = []
+            for i in range(len(self.fuse_layers)):
+                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
+                for j in range(self.num_branches):
+                    if j == 0:
+                        y += y
+                    elif i == j:
+                        y += out[j]
+                    else:
+                        y += self.fuse_layers[i][j](out[j])
+                    if i == 0:
+                        out[i] = y
+                out_fuse.append(self.relu(y))
+            out = out_fuse
+        elif not self.multiscale_output:
+            out = [out[0]]
+        return out
+
+
+class LiteHRNet(nn.Layer):
+    """
+    @inproceedings{Yulitehrnet21,
+    title={Lite-HRNet: A Lightweight High-Resolution Network},
+        author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+        booktitle={CVPR},year={2021}
+    }
+
+    Args:
+        network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
+            "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
+            "wider_naive": Naive network with wider channels in each block.
+            "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
+            "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
+        in_channels (int, optional): The channels of input image. Default: 3.
+        freeze_at (int): the stage to freeze
+        freeze_norm (bool): whether to freeze norm in HRNet
+        norm_decay (float): weight decay for normalization layer weights
+        return_idx (List): the stage to return
+    """
+
+    def __init__(self,
+                 network_type,
+                 in_channels=3,
+                 freeze_at=0,
+                 freeze_norm=True,
+                 norm_decay=0.,
+                 return_idx=[0, 1, 2, 3],
+                 use_head=False,
+                 pretrained=None):
+        super(LiteHRNet, self).__init__()
+        if isinstance(return_idx, Integral):
+            return_idx = [return_idx]
+        assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
+            "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
+        assert len(return_idx) > 0, "need one or more return index"
+        self.freeze_at = freeze_at
+        self.freeze_norm = freeze_norm
+        self.norm_decay = norm_decay
+        self.return_idx = return_idx
+        self.norm_type = 'bn'
+        self.use_head = use_head
+        self.pretrained = pretrained
+
+        self.module_configs = {
+            "lite_18": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["LITE", "LITE", "LITE"],
+                "reduce_ratios": [8, 8, 8],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+            "lite_30": {
+                "num_modules": [3, 8, 3],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["LITE", "LITE", "LITE"],
+                "reduce_ratios": [8, 8, 8],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+            "naive": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
+                "reduce_ratios": [1, 1, 1],
+                "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
+            },
+            "wider_naive": {
+                "num_modules": [2, 4, 2],
+                "num_branches": [2, 3, 4],
+                "num_blocks": [2, 2, 2],
+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
+                "reduce_ratios": [1, 1, 1],
+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            },
+        }
+
+        self.stages_config = self.module_configs[network_type]
+
+        self.stem = Stem(in_channels, 32, 32, 1)
+        num_channels_pre_layer = [32]
+        for stage_idx in range(3):
+            num_channels = self.stages_config["num_channels"][stage_idx]
+            setattr(self, 'transition{}'.format(stage_idx),
+                    self._make_transition_layer(num_channels_pre_layer,
+                                                num_channels, self.freeze_norm,
+                                                self.norm_decay))
+            stage, num_channels_pre_layer = self._make_stage(
+                self.stages_config, stage_idx, num_channels, True,
+                self.freeze_norm, self.norm_decay)
+            setattr(self, 'stage{}'.format(stage_idx), stage)
+
+        num_channels = self.stages_config["num_channels"][-1]
+        self.feat_channels = num_channels
+
+        if self.use_head:
+            self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
+                                            self.freeze_norm, self.norm_decay)
+
+            self.feat_channels = [num_channels[0]]
+            for i in range(1, len(num_channels)):
+                self.feat_channels.append(num_channels[i] // 2)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def _make_transition_layer(self,
+                               num_channels_pre_layer,
+                               num_channels_cur_layer,
+                               freeze_norm=False,
+                               norm_decay=0.):
+        num_branches_pre = len(num_channels_pre_layer)
+        num_branches_cur = len(num_channels_cur_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_pre_layer[i],
+                                kernel_size=3,
+                                stride=1,
+                                padding=1,
+                                groups=num_channels_pre_layer[i],
+                                bias=False),
+                            nn.BatchNorm2D(num_channels_pre_layer[i]),
+                            Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]),
+                            nn.ReLU()))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv_downsamples = []
+                for j in range(i + 1 - num_branches_pre):
+                    conv_downsamples.append(
+                        nn.Sequential(
+                            Conv2d(
+                                num_channels_pre_layer[-1],
+                                num_channels_pre_layer[-1],
+                                groups=num_channels_pre_layer[-1],
+                                kernel_size=3,
+                                stride=2,
+                                padding=1,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_pre_layer[-1]),
+                            Conv2d(
+                                num_channels_pre_layer[-1],
+                                num_channels_cur_layer[i]
+                                if j == i - num_branches_pre else
+                                num_channels_pre_layer[-1],
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False, ),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]
+                                           if j == i - num_branches_pre else
+                                           num_channels_pre_layer[-1]),
+                            nn.ReLU()))
+                transition_layers.append(nn.Sequential(*conv_downsamples))
+        return nn.LayerList(transition_layers)
+
+    def _make_stage(self,
+                    stages_config,
+                    stage_idx,
+                    in_channels,
+                    multiscale_output,
+                    freeze_norm=False,
+                    norm_decay=0.):
+        num_modules = stages_config["num_modules"][stage_idx]
+        num_branches = stages_config["num_branches"][stage_idx]
+        num_blocks = stages_config["num_blocks"][stage_idx]
+        reduce_ratio = stages_config['reduce_ratios'][stage_idx]
+        module_type = stages_config['module_type'][stage_idx]
+
+        modules = []
+        for i in range(num_modules):
+            if not multiscale_output and i == num_modules - 1:
+                reset_multiscale_output = False
+            else:
+                reset_multiscale_output = True
+            modules.append(
+                LiteHRNetModule(
+                    num_branches,
+                    num_blocks,
+                    in_channels,
+                    reduce_ratio,
+                    module_type,
+                    multiscale_output=reset_multiscale_output,
+                    with_fuse=True,
+                    freeze_norm=freeze_norm,
+                    norm_decay=norm_decay))
+            in_channels = modules[-1].in_channels
+        return nn.Sequential(*modules), in_channels
+
+    def forward(self, x):
+        x = self.stem(x)
+
+        y_list = [x]
+        for stage_idx in range(3):
+            x_list = []
+            transition = getattr(self, 'transition{}'.format(stage_idx))
+            for j in range(self.stages_config["num_branches"][stage_idx]):
+                if transition[j] is not None:
+                    if j >= len(y_list):
+                        x_list.append(transition[j](y_list[-1]))
+                    else:
+                        x_list.append(transition[j](y_list[j]))
+                else:
+                    x_list.append(y_list[j])
+            y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
+
+        if self.use_head:
+            y_list = self.head_layer(y_list)
+
+        res = []
+        for i, layer in enumerate(y_list):
+            if i == self.freeze_at:
+                layer.stop_gradient = True
+            if i in self.return_idx:
+                res.append(layer)
+        return res
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_18(**kwargs):
+    model = LiteHRNet(network_type="lite_18", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_30(**kwargs):
+    model = LiteHRNet(network_type="lite_30", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_naive(**kwargs):
+    model = LiteHRNet(network_type="naive", **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Lite_HRNet_wider_naive(**kwargs):
+    model = LiteHRNet(network_type="wider_naive", **kwargs)
+    return model

+ 6 - 6
paddlers/models/ppseg/models/backbones/mix_transformer.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -20,9 +20,9 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 import paddle.nn.initializer as paddle_init
 import paddle.nn.initializer as paddle_init
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
-from paddlers.models.ppseg.models.backbones.transformer_utils import *
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
+from paddleseg.models.backbones.transformer_utils import *
 
 
 
 
 class Mlp(nn.Layer):
 class Mlp(nn.Layer):
@@ -260,7 +260,7 @@ class MixVisionTransformer(nn.Layer):
     def __init__(self,
     def __init__(self,
                  img_size=224,
                  img_size=224,
                  patch_size=16,
                  patch_size=16,
-                 in_chans=3,
+                 in_channels=3,
                  num_classes=1000,
                  num_classes=1000,
                  embed_dims=[64, 128, 256, 512],
                  embed_dims=[64, 128, 256, 512],
                  num_heads=[1, 2, 4, 8],
                  num_heads=[1, 2, 4, 8],
@@ -284,7 +284,7 @@ class MixVisionTransformer(nn.Layer):
             img_size=img_size,
             img_size=img_size,
             patch_size=7,
             patch_size=7,
             stride=4,
             stride=4,
-            in_chans=in_chans,
+            in_chans=in_channels,
             embed_dim=embed_dims[0])
             embed_dim=embed_dims[0])
         self.patch_embed2 = OverlapPatchEmbed(
         self.patch_embed2 = OverlapPatchEmbed(
             img_size=img_size // 4,
             img_size=img_size // 4,

+ 217 - 116
paddlers/models/ppseg/models/backbones/mobilenetv2.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -12,13 +12,26 @@
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
 
 
+import paddle
+from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg import utils
+from paddleseg.cvlibs import manager
+from paddleseg import utils
+
+__all__ = [
+    "MobileNetV2_x0_25",
+    "MobileNetV2_x0_5",
+    "MobileNetV2_x0_75",
+    "MobileNetV2_x1_0",
+    "MobileNetV2_x1_5",
+    "MobileNetV2_x2_0",
+]
 
 
 
 
-@manager.BACKBONES.add_component
 class MobileNetV2(nn.Layer):
 class MobileNetV2(nn.Layer):
     """
     """
         The MobileNetV2 implementation based on PaddlePaddle.
         The MobileNetV2 implementation based on PaddlePaddle.
@@ -29,69 +42,70 @@ class MobileNetV2(nn.Layer):
         (https://arxiv.org/abs/1801.04381).
         (https://arxiv.org/abs/1801.04381).
 
 
         Args:
         Args:
-            channel_ratio (float, optional): The ratio of channel. Default: 1.0
-            min_channel (int, optional): The minimum of channel. Default: 16
+            scale (float, optional): The scale of channel. Default: 1.0
+            in_channels (int, optional): The channels of input image. Default: 3.
             pretrained (str, optional): The path or url of pretrained model. Default: None
             pretrained (str, optional): The path or url of pretrained model. Default: None
         """
         """
 
 
-    def __init__(self, channel_ratio=1.0, min_channel=16, pretrained=None):
-        super(MobileNetV2, self).__init__()
-        self.channel_ratio = channel_ratio
-        self.min_channel = min_channel
+    def __init__(self, scale=1.0, in_channels=3, pretrained=None):
+        super().__init__()
+        self.scale = scale
         self.pretrained = pretrained
         self.pretrained = pretrained
+        prefix_name = ""
 
 
-        self.stage0 = conv_bn(3, self.depth(32), 3, 2)
-
-        self.stage1 = InvertedResidual(self.depth(32), self.depth(16), 1, 1)
-
-        self.stage2 = nn.Sequential(
-            InvertedResidual(self.depth(16), self.depth(24), 2, 6),
-            InvertedResidual(self.depth(24), self.depth(24), 1, 6), )
-
-        self.stage3 = nn.Sequential(
-            InvertedResidual(self.depth(24), self.depth(32), 2, 6),
-            InvertedResidual(self.depth(32), self.depth(32), 1, 6),
-            InvertedResidual(self.depth(32), self.depth(32), 1, 6), )
+        bottleneck_params_list = [
+            (1, 16, 1, 1),
+            (6, 24, 2, 2),  # x4
+            (6, 32, 3, 2),  # x8
+            (6, 64, 4, 2),
+            (6, 96, 3, 1),  # x16
+            (6, 160, 3, 2),
+            (6, 320, 1, 1),  # x32
+        ]
+        self.out_index = [1, 2, 4, 6]
 
 
-        self.stage4 = nn.Sequential(
-            InvertedResidual(self.depth(32), self.depth(64), 2, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6),
-            InvertedResidual(self.depth(64), self.depth(64), 1, 6), )
+        self.conv1 = ConvBNLayer(
+            num_channels=in_channels,
+            num_filters=int(32 * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            name=prefix_name + "conv1_1")
 
 
-        self.stage5 = nn.Sequential(
-            InvertedResidual(self.depth(64), self.depth(96), 1, 6),
-            InvertedResidual(self.depth(96), self.depth(96), 1, 6),
-            InvertedResidual(self.depth(96), self.depth(96), 1, 6), )
+        self.block_list = []
+        i = 1
+        in_c = int(32 * scale)
+        for layer_setting in bottleneck_params_list:
+            t, c, n, s = layer_setting
+            i += 1
+            block = self.add_sublayer(
+                prefix_name + "conv" + str(i),
+                sublayer=InvresiBlocks(
+                    in_c=in_c,
+                    t=t,
+                    c=int(c * scale),
+                    n=n,
+                    s=s,
+                    name=prefix_name + "conv" + str(i)))
+            self.block_list.append(block)
+            in_c = int(c * scale)
 
 
-        self.stage6 = nn.Sequential(
-            InvertedResidual(self.depth(96), self.depth(160), 2, 6),
-            InvertedResidual(self.depth(160), self.depth(160), 1, 6),
-            InvertedResidual(self.depth(160), self.depth(160), 1, 6), )
-
-        self.stage7 = InvertedResidual(self.depth(160), self.depth(320), 1, 6)
+        out_channels = [
+            bottleneck_params_list[idx][1] for idx in self.out_index
+        ]
+        self.feat_channels = [int(c * scale) for c in out_channels]
 
 
         self.init_weight()
         self.init_weight()
 
 
-    def depth(self, channels):
-        min_channel = min(channels, self.min_channel)
-        return max(min_channel, int(channels * self.channel_ratio))
-
-    def forward(self, x):
+    def forward(self, inputs):
         feat_list = []
         feat_list = []
 
 
-        feature_1_2 = self.stage0(x)
-        feature_1_2 = self.stage1(feature_1_2)
-        feature_1_4 = self.stage2(feature_1_2)
-        feature_1_8 = self.stage3(feature_1_4)
-        feature_1_16 = self.stage4(feature_1_8)
-        feature_1_16 = self.stage5(feature_1_16)
-        feature_1_32 = self.stage6(feature_1_16)
-        feature_1_32 = self.stage7(feature_1_32)
-        feat_list.append(feature_1_4)
-        feat_list.append(feature_1_8)
-        feat_list.append(feature_1_16)
-        feat_list.append(feature_1_32)
+        y = self.conv1(inputs, if_act=True)
+        for idx, block in enumerate(self.block_list):
+            y = block(y)
+            if idx in self.out_index:
+                feat_list.append(y)
+
         return feat_list
         return feat_list
 
 
     def init_weight(self):
     def init_weight(self):
@@ -99,66 +113,153 @@ class MobileNetV2(nn.Layer):
             utils.load_entire_model(self, self.pretrained)
             utils.load_entire_model(self, self.pretrained)
 
 
 
 
-def conv_bn(inp, oup, kernel, stride):
-    return nn.Sequential(
-        nn.Conv2D(
-            in_channels=inp,
-            out_channels=oup,
-            kernel_size=kernel,
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 filter_size,
+                 num_filters,
+                 stride,
+                 padding,
+                 channels=None,
+                 num_groups=1,
+                 name=None,
+                 use_cudnn=True):
+        super(ConvBNLayer, self).__init__()
+
+        self._conv = Conv2D(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+
+        self._batch_norm = BatchNorm(
+            num_filters,
+            param_attr=ParamAttr(name=name + "_bn_scale"),
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
+            moving_mean_name=name + "_bn_mean",
+            moving_variance_name=name + "_bn_variance")
+
+    def forward(self, inputs, if_act=True):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        if if_act:
+            y = F.relu6(y)
+        return y
+
+
+class InvertedResidualUnit(nn.Layer):
+    def __init__(self, num_channels, num_in_filter, num_filters, stride,
+                 filter_size, padding, expansion_factor, name):
+        super(InvertedResidualUnit, self).__init__()
+        num_expfilter = int(round(num_in_filter * expansion_factor))
+        self._expand_conv = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_expfilter,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            name=name + "_expand")
+
+        self._bottleneck_conv = ConvBNLayer(
+            num_channels=num_expfilter,
+            num_filters=num_expfilter,
+            filter_size=filter_size,
             stride=stride,
             stride=stride,
-            padding=(kernel - 1) // 2,
-            bias_attr=False),
-        nn.BatchNorm2D(
-            num_features=oup, epsilon=1e-05, momentum=0.1),
-        nn.ReLU())
-
-
-class InvertedResidual(nn.Layer):
-    def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-        self.use_res_connect = self.stride == 1 and inp == oup
-
-        self.conv = nn.Sequential(
-            nn.Conv2D(
-                inp,
-                inp * expand_ratio,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                dilation=1,
-                groups=1,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
-            nn.ReLU(),
-            nn.Conv2D(
-                inp * expand_ratio,
-                inp * expand_ratio,
-                kernel_size=3,
-                stride=stride,
-                padding=dilation,
-                dilation=dilation,
-                groups=inp * expand_ratio,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=inp * expand_ratio, epsilon=1e-05, momentum=0.1),
-            nn.ReLU(),
-            nn.Conv2D(
-                inp * expand_ratio,
-                oup,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                dilation=1,
-                groups=1,
-                bias_attr=False),
-            nn.BatchNorm2D(
-                num_features=oup, epsilon=1e-05, momentum=0.1), )
-
-    def forward(self, x):
-        if self.use_res_connect:
-            return x + self.conv(x)
-        else:
-            return self.conv(x)
+            padding=padding,
+            num_groups=num_expfilter,
+            use_cudnn=False,
+            name=name + "_dwise")
+
+        self._linear_conv = ConvBNLayer(
+            num_channels=num_expfilter,
+            num_filters=num_filters,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            name=name + "_linear")
+
+    def forward(self, inputs, ifshortcut):
+        y = self._expand_conv(inputs, if_act=True)
+        y = self._bottleneck_conv(y, if_act=True)
+        y = self._linear_conv(y, if_act=False)
+        if ifshortcut:
+            y = paddle.add(inputs, y)
+        return y
+
+
+class InvresiBlocks(nn.Layer):
+    def __init__(self, in_c, t, c, n, s, name):
+        super(InvresiBlocks, self).__init__()
+
+        self._first_block = InvertedResidualUnit(
+            num_channels=in_c,
+            num_in_filter=in_c,
+            num_filters=c,
+            stride=s,
+            filter_size=3,
+            padding=1,
+            expansion_factor=t,
+            name=name + "_1")
+
+        self._block_list = []
+        for i in range(1, n):
+            block = self.add_sublayer(
+                name + "_" + str(i + 1),
+                sublayer=InvertedResidualUnit(
+                    num_channels=c,
+                    num_in_filter=c,
+                    num_filters=c,
+                    stride=1,
+                    filter_size=3,
+                    padding=1,
+                    expansion_factor=t,
+                    name=name + "_" + str(i + 1)))
+            self._block_list.append(block)
+
+    def forward(self, inputs):
+        y = self._first_block(inputs, ifshortcut=False)
+        for block in self._block_list:
+            y = block(y, ifshortcut=True)
+        return y
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_25(**kwargs):
+    model = MobileNetV2(scale=0.25, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_5(**kwargs):
+    model = MobileNetV2(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x0_75(**kwargs):
+    model = MobileNetV2(scale=0.75, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x1_0(**kwargs):
+    model = MobileNetV2(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x1_5(**kwargs):
+    model = MobileNetV2(scale=1.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV2_x2_0(**kwargs):
+    model = MobileNetV2(scale=2.0, **kwargs)
+    return model

+ 317 - 183
paddlers/models/ppseg/models/backbones/mobilenetv3.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -14,11 +14,13 @@
 
 
 import paddle
 import paddle
 import paddle.nn as nn
 import paddle.nn as nn
-import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
-from paddlers.models.ppseg.models import layers
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils, logger
+from paddleseg.models import layers
 
 
 __all__ = [
 __all__ = [
     "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
     "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
@@ -28,8 +30,92 @@ __all__ = [
     "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
     "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
 ]
 ]
 
 
-
-def make_divisible(v, divisor=8, min_value=None):
+MODEL_STAGES_PATTERN = {
+    "MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
+    "MobileNetV3_large":
+    ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
+}
+
+# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
+# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
+# k: kernel_size
+# exp: middle channel number in depthwise block
+# c: output channel number in depthwise block
+# se: whether to use SE block
+# act: which activation to use
+# s: stride in depthwise block
+# d: dilation rate in depthwise block
+NET_CONFIG = {
+    "large": [
+        # k, exp, c, se, act, s
+        [3, 16, 16, False, "relu", 1],
+        [3, 64, 24, False, "relu", 2],
+        [3, 72, 24, False, "relu", 1],  # x4
+        [5, 72, 40, True, "relu", 2],
+        [5, 120, 40, True, "relu", 1],
+        [5, 120, 40, True, "relu", 1],  # x8
+        [3, 240, 80, False, "hardswish", 2],
+        [3, 200, 80, False, "hardswish", 1],
+        [3, 184, 80, False, "hardswish", 1],
+        [3, 184, 80, False, "hardswish", 1],
+        [3, 480, 112, True, "hardswish", 1],
+        [3, 672, 112, True, "hardswish", 1],  # x16
+        [5, 672, 160, True, "hardswish", 2],
+        [5, 960, 160, True, "hardswish", 1],
+        [5, 960, 160, True, "hardswish", 1],  # x32
+    ],
+    "small": [
+        # k, exp, c, se, act, s
+        [3, 16, 16, True, "relu", 2],
+        [3, 72, 24, False, "relu", 2],
+        [3, 88, 24, False, "relu", 1],
+        [5, 96, 40, True, "hardswish", 2],
+        [5, 240, 40, True, "hardswish", 1],
+        [5, 240, 40, True, "hardswish", 1],
+        [5, 120, 48, True, "hardswish", 1],
+        [5, 144, 48, True, "hardswish", 1],
+        [5, 288, 96, True, "hardswish", 2],
+        [5, 576, 96, True, "hardswish", 1],
+        [5, 576, 96, True, "hardswish", 1],
+    ],
+    "large_os8": [
+        # k, exp, c, se, act, s, {d}
+        [3, 16, 16, False, "relu", 1],
+        [3, 64, 24, False, "relu", 2],
+        [3, 72, 24, False, "relu", 1],  # x4
+        [5, 72, 40, True, "relu", 2],
+        [5, 120, 40, True, "relu", 1],
+        [5, 120, 40, True, "relu", 1],  # x8
+        [3, 240, 80, False, "hardswish", 1],
+        [3, 200, 80, False, "hardswish", 1, 2],
+        [3, 184, 80, False, "hardswish", 1, 2],
+        [3, 184, 80, False, "hardswish", 1, 2],
+        [3, 480, 112, True, "hardswish", 1, 2],
+        [3, 672, 112, True, "hardswish", 1, 2],
+        [5, 672, 160, True, "hardswish", 1, 2],
+        [5, 960, 160, True, "hardswish", 1, 4],
+        [5, 960, 160, True, "hardswish", 1, 4],
+    ],
+    "small_os8": [
+        # k, exp, c, se, act, s, {d}
+        [3, 16, 16, True, "relu", 2],
+        [3, 72, 24, False, "relu", 2],
+        [3, 88, 24, False, "relu", 1],
+        [5, 96, 40, True, "hardswish", 1],
+        [5, 240, 40, True, "hardswish", 1, 2],
+        [5, 240, 40, True, "hardswish", 1, 2],
+        [5, 120, 48, True, "hardswish", 1, 2],
+        [5, 144, 48, True, "hardswish", 1, 2],
+        [5, 288, 96, True, "hardswish", 1, 2],
+        [5, 576, 96, True, "hardswish", 1, 4],
+        [5, 576, 96, True, "hardswish", 1, 4],
+    ]
+}
+
+OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]}
+
+
+def _make_divisible(v, divisor=8, min_value=None):
     if min_value is None:
     if min_value is None:
         min_value = divisor
         min_value = divisor
     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@@ -38,156 +124,113 @@ def make_divisible(v, divisor=8, min_value=None):
     return new_v
     return new_v
 
 
 
 
-class MobileNetV3(nn.Layer):
-    """
-    The MobileNetV3 implementation based on PaddlePaddle.
+def _create_act(act):
+    if act == "hardswish":
+        return nn.Hardswish()
+    elif act == "relu":
+        return nn.ReLU()
+    elif act is None:
+        return None
+    else:
+        raise RuntimeError(
+            "The activation function is not supported: {}".format(act))
 
 
-    The original article refers to Jingdong
-    Andrew Howard, et, al. "Searching for MobileNetV3"
-    (https://arxiv.org/pdf/1905.02244.pdf).
 
 
+class MobileNetV3(nn.Layer):
+    """
+    MobileNetV3
     Args:
     Args:
-        pretrained (str, optional): The path of pretrained model.
-        scale (float, optional): The scale of channels . Default: 1.0.
-        model_name (str, optional): Model name. It determines the type of MobileNetV3. The value is 'small' or 'large'. Defualt: 'small'.
-        output_stride (int, optional): The stride of output features compared to input images. The value should be one of (2, 4, 8, 16, 32). Default: None.
-
+        config: list. MobileNetV3 depthwise blocks config.
+        in_channels (int, optional): The channels of input image. Default: 3.
+        scale: float=1.0. The coefficient that controls the size of network parameters. 
+    Returns:
+        model: nn.Layer. Specific MobileNetV3 model depends on args.
     """
     """
 
 
     def __init__(self,
     def __init__(self,
-                 pretrained=None,
+                 config,
+                 stages_pattern,
+                 out_index,
+                 in_channels=3,
                  scale=1.0,
                  scale=1.0,
-                 model_name="small",
-                 output_stride=None):
-        super(MobileNetV3, self).__init__()
+                 pretrained=None):
+        super().__init__()
 
 
+        self.cfg = config
+        self.out_index = out_index
+        self.scale = scale
+        self.pretrained = pretrained
         inplanes = 16
         inplanes = 16
-        if model_name == "large":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, "relu", 1],
-                [3, 64, 24, False, "relu", 2],
-                [3, 72, 24, False, "relu", 1],  # output 1 -> out_index=2
-                [5, 72, 40, True, "relu", 2],
-                [5, 120, 40, True, "relu", 1],
-                [5, 120, 40, True, "relu", 1],  # output 2 -> out_index=5
-                [3, 240, 80, False, "hard_swish", 2],
-                [3, 200, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 480, 112, True, "hard_swish", 1],
-                [3, 672, 112, True, "hard_swish",
-                 1],  # output 3 -> out_index=11
-                [5, 672, 160, True, "hard_swish", 2],
-                [5, 960, 160, True, "hard_swish", 1],
-                [5, 960, 160, True, "hard_swish",
-                 1],  # output 3 -> out_index=14
-            ]
-            self.out_indices = [2, 5, 11, 14]
-            self.feat_channels = [
-                make_divisible(i * scale) for i in [24, 40, 112, 160]
-            ]
-
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
-        elif model_name == "small":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, "relu", 2],  # output 1 -> out_index=0
-                [3, 72, 24, False, "relu", 2],
-                [3, 88, 24, False, "relu", 1],  # output 2 -> out_index=3
-                [5, 96, 40, True, "hard_swish", 2],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 120, 48, True, "hard_swish", 1],
-                [5, 144, 48, True, "hard_swish", 1],  # output 3 -> out_index=7
-                [5, 288, 96, True, "hard_swish", 2],
-                [5, 576, 96, True, "hard_swish", 1],
-                [5, 576, 96, True, "hard_swish", 1],  # output 4 -> out_index=10
-            ]
-            self.out_indices = [0, 3, 7, 10]
-            self.feat_channels = [
-                make_divisible(i * scale) for i in [16, 24, 48, 96]
-            ]
-
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
-        else:
-            raise NotImplementedError(
-                "mode[{}_model] is not implemented!".format(model_name))
-
-        ###################################################
-        # modify stride and dilation based on output_stride
-        self.dilation_cfg = [1] * len(self.cfg)
-        self.modify_bottle_params(output_stride=output_stride)
-        ###################################################
-
-        self.conv1 = ConvBNLayer(
-            in_c=3,
-            out_c=make_divisible(inplanes * scale),
+
+        self.conv = ConvBNLayer(
+            in_c=in_channels,
+            out_c=_make_divisible(inplanes * self.scale),
             filter_size=3,
             filter_size=3,
             stride=2,
             stride=2,
             padding=1,
             padding=1,
             num_groups=1,
             num_groups=1,
             if_act=True,
             if_act=True,
-            act="hard_swish")
-
-        self.block_list = []
-
-        inplanes = make_divisible(inplanes * scale)
-        for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
-            ######################################
-            # add dilation rate
-            dilation_rate = self.dilation_cfg[i]
-            ######################################
-            self.block_list.append(
-                ResidualUnit(
-                    in_c=inplanes,
-                    mid_c=make_divisible(scale * exp),
-                    out_c=make_divisible(scale * c),
-                    filter_size=k,
-                    stride=s,
-                    dilation=dilation_rate,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2)))
-            self.add_sublayer(
-                sublayer=self.block_list[-1], name="conv" + str(i + 2))
-            inplanes = make_divisible(scale * c)
-
-        self.pretrained = pretrained
+            act="hardswish")
+        self.blocks = nn.Sequential(*[
+            ResidualUnit(
+                in_c=_make_divisible(inplanes * self.scale if i == 0 else
+                                     self.cfg[i - 1][2] * self.scale),
+                mid_c=_make_divisible(self.scale * exp),
+                out_c=_make_divisible(self.scale * c),
+                filter_size=k,
+                stride=s,
+                use_se=se,
+                act=act,
+                dilation=td[0] if td else 1)
+            for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg)
+        ])
+
+        out_channels = [config[idx][2] for idx in self.out_index]
+        self.feat_channels = [
+            _make_divisible(self.scale * c) for c in out_channels
+        ]
+
+        self.init_res(stages_pattern)
         self.init_weight()
         self.init_weight()
 
 
-    def modify_bottle_params(self, output_stride=None):
-
-        if output_stride is not None and output_stride % 2 != 0:
-            raise ValueError("output stride must to be even number")
-        if output_stride is not None:
-            stride = 2
-            rate = 1
-            for i, _cfg in enumerate(self.cfg):
-                stride = stride * _cfg[-1]
-                if stride > output_stride:
-                    rate = rate * _cfg[-1]
-                    self.cfg[i][-1] = 1
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def init_res(self, stages_pattern, return_patterns=None,
+                 return_stages=None):
+        if return_patterns and return_stages:
+            msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
+            logger.warning(msg)
+            return_stages = None
+
+        if return_stages is True:
+            return_patterns = stages_pattern
+        # return_stages is int or bool
+        if type(return_stages) is int:
+            return_stages = [return_stages]
+        if isinstance(return_stages, list):
+            if max(return_stages) > len(stages_pattern) or min(
+                    return_stages) < 0:
+                msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
+                logger.warning(msg)
+                return_stages = [
+                    val for val in return_stages
+                    if val >= 0 and val < len(stages_pattern)
+                ]
+            return_patterns = [stages_pattern[i] for i in return_stages]
 
 
-                self.dilation_cfg[i] = rate
+    def forward(self, x):
+        x = self.conv(x)
 
 
-    def forward(self, inputs, label=None):
-        x = self.conv1(inputs)
-        # A feature list saves each downsampling feature.
         feat_list = []
         feat_list = []
-        for i, block in enumerate(self.block_list):
+        for idx, block in enumerate(self.blocks):
             x = block(x)
             x = block(x)
-            if i in self.out_indices:
+            if idx in self.out_index:
                 feat_list.append(x)
                 feat_list.append(x)
 
 
         return feat_list
         return feat_list
 
 
-    def init_weight(self):
-        if self.pretrained is not None:
-            utils.load_pretrained_model(self, self.pretrained)
-
 
 
 class ConvBNLayer(nn.Layer):
 class ConvBNLayer(nn.Layer):
     def __init__(self,
     def __init__(self,
@@ -196,36 +239,34 @@ class ConvBNLayer(nn.Layer):
                  filter_size,
                  filter_size,
                  stride,
                  stride,
                  padding,
                  padding,
-                 dilation=1,
                  num_groups=1,
                  num_groups=1,
                  if_act=True,
                  if_act=True,
-                 act=None):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.act = act
+                 act=None,
+                 dilation=1):
+        super().__init__()
 
 
-        self.conv = nn.Conv2D(
+        self.conv = Conv2D(
             in_channels=in_c,
             in_channels=in_c,
             out_channels=out_c,
             out_channels=out_c,
             kernel_size=filter_size,
             kernel_size=filter_size,
             stride=stride,
             stride=stride,
             padding=padding,
             padding=padding,
-            dilation=dilation,
             groups=num_groups,
             groups=num_groups,
-            bias_attr=False)
-        self.bn = layers.SyncBatchNorm(
-            num_features=out_c,
-            weight_attr=paddle.ParamAttr(
-                regularizer=paddle.regularizer.L2Decay(0.0)),
-            bias_attr=paddle.ParamAttr(
-                regularizer=paddle.regularizer.L2Decay(0.0)))
-        self._act_op = layers.Activation(act='hardswish')
+            bias_attr=False,
+            dilation=dilation)
+        self.bn = BatchNorm(
+            num_channels=out_c,
+            act=None,
+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+        self.if_act = if_act
+        self.act = _create_act(act)
 
 
     def forward(self, x):
     def forward(self, x):
         x = self.conv(x)
         x = self.conv(x)
         x = self.bn(x)
         x = self.bn(x)
         if self.if_act:
         if self.if_act:
-            x = self._act_op(x)
+            x = self.act(x)
         return x
         return x
 
 
 
 
@@ -237,10 +278,9 @@ class ResidualUnit(nn.Layer):
                  filter_size,
                  filter_size,
                  stride,
                  stride,
                  use_se,
                  use_se,
-                 dilation=1,
                  act=None,
                  act=None,
-                 name=''):
-        super(ResidualUnit, self).__init__()
+                 dilation=1):
+        super().__init__()
         self.if_shortcut = stride == 1 and in_c == out_c
         self.if_shortcut = stride == 1 and in_c == out_c
         self.if_se = use_se
         self.if_se = use_se
 
 
@@ -252,19 +292,18 @@ class ResidualUnit(nn.Layer):
             padding=0,
             padding=0,
             if_act=True,
             if_act=True,
             act=act)
             act=act)
-
         self.bottleneck_conv = ConvBNLayer(
         self.bottleneck_conv = ConvBNLayer(
             in_c=mid_c,
             in_c=mid_c,
             out_c=mid_c,
             out_c=mid_c,
             filter_size=filter_size,
             filter_size=filter_size,
             stride=stride,
             stride=stride,
-            padding='same',
-            dilation=dilation,
+            padding=int((filter_size - 1) // 2) * dilation,
             num_groups=mid_c,
             num_groups=mid_c,
             if_act=True,
             if_act=True,
-            act=act)
+            act=act,
+            dilation=dilation)
         if self.if_se:
         if self.if_se:
-            self.mid_se = SEModule(mid_c, name=name + "_se")
+            self.mid_se = SEModule(mid_c)
         self.linear_conv = ConvBNLayer(
         self.linear_conv = ConvBNLayer(
             in_c=mid_c,
             in_c=mid_c,
             out_c=out_c,
             out_c=out_c,
@@ -273,92 +312,187 @@ class ResidualUnit(nn.Layer):
             padding=0,
             padding=0,
             if_act=False,
             if_act=False,
             act=None)
             act=None)
-        self.dilation = dilation
 
 
-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
+    def forward(self, x):
+        identity = x
+        x = self.expand_conv(x)
         x = self.bottleneck_conv(x)
         x = self.bottleneck_conv(x)
         if self.if_se:
         if self.if_se:
             x = self.mid_se(x)
             x = self.mid_se(x)
         x = self.linear_conv(x)
         x = self.linear_conv(x)
         if self.if_shortcut:
         if self.if_shortcut:
-            x = inputs + x
+            x = paddle.add(identity, x)
         return x
         return x
 
 
 
 
+# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
+class Hardsigmoid(nn.Layer):
+    def __init__(self, slope=0.2, offset=0.5):
+        super().__init__()
+        self.slope = slope
+        self.offset = offset
+
+    def forward(self, x):
+        return nn.functional.hardsigmoid(
+            x, slope=self.slope, offset=self.offset)
+
+
 class SEModule(nn.Layer):
 class SEModule(nn.Layer):
-    def __init__(self, channel, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
-        self.conv1 = nn.Conv2D(
+    def __init__(self, channel, reduction=4):
+        super().__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
             in_channels=channel,
             in_channels=channel,
             out_channels=channel // reduction,
             out_channels=channel // reduction,
             kernel_size=1,
             kernel_size=1,
             stride=1,
             stride=1,
             padding=0)
             padding=0)
-        self.conv2 = nn.Conv2D(
+        self.relu = nn.ReLU()
+        self.conv2 = Conv2D(
             in_channels=channel // reduction,
             in_channels=channel // reduction,
             out_channels=channel,
             out_channels=channel,
             kernel_size=1,
             kernel_size=1,
             stride=1,
             stride=1,
             padding=0)
             padding=0)
+        self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
 
 
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = F.relu(outputs)
-        outputs = self.conv2(outputs)
-        outputs = F.hardsigmoid(outputs)
-        return paddle.multiply(x=inputs, y=outputs)
+    def forward(self, x):
+        identity = x
+        x = self.avg_pool(x)
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.hardsigmoid(x)
+        return paddle.multiply(x=identity, y=x)
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_35(**kwargs):
 def MobileNetV3_small_x0_35(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.35,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_5(**kwargs):
 def MobileNetV3_small_x0_5(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.5,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_small_x0_75(**kwargs):
 def MobileNetV3_small_x0_75(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=0.75,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
 @manager.BACKBONES.add_component
 @manager.BACKBONES.add_component
 def MobileNetV3_small_x1_0(**kwargs):
 def MobileNetV3_small_x1_0(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_small_x1_25(**kwargs):
 def MobileNetV3_small_x1_25(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["small"],
+        scale=1.25,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_35(**kwargs):
 def MobileNetV3_large_x0_35(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.35,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_5(**kwargs):
 def MobileNetV3_large_x0_5(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.5,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_large_x0_75(**kwargs):
 def MobileNetV3_large_x0_75(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=0.75,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
     return model
     return model
 
 
 
 
 @manager.BACKBONES.add_component
 @manager.BACKBONES.add_component
 def MobileNetV3_large_x1_0(**kwargs):
 def MobileNetV3_large_x1_0(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
     return model
     return model
 
 
 
 
+@manager.BACKBONES.add_component
 def MobileNetV3_large_x1_25(**kwargs):
 def MobileNetV3_large_x1_25(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
+    model = MobileNetV3(
+        config=NET_CONFIG["large"],
+        scale=1.25,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV3_large_x1_0_os8(**kwargs):
+    model = MobileNetV3(
+        config=NET_CONFIG["large_os8"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
+        out_index=OUT_INDEX["large"],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV3_small_x1_0_os8(**kwargs):
+    model = MobileNetV3(
+        config=NET_CONFIG["small_os8"],
+        scale=1.0,
+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
+        out_index=OUT_INDEX["small"],
+        **kwargs)
     return model
     return model

+ 7 - 6
paddlers/models/ppseg/models/backbones/resnet_vd.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 __all__ = [
 __all__ = [
     "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
     "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
@@ -206,15 +206,16 @@ class ResNet_vd(nn.Layer):
         layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
         layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
         output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
         output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
         multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
         multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path of pretrained model.
         pretrained (str, optional): The path of pretrained model.
 
 
     """
     """
 
 
     def __init__(self,
     def __init__(self,
-                 input_channel=3,
                  layers=50,
                  layers=50,
                  output_stride=8,
                  output_stride=8,
                  multi_grid=(1, 1, 1),
                  multi_grid=(1, 1, 1),
+                 in_channels=3,
                  pretrained=None,
                  pretrained=None,
                  data_format='NCHW'):
                  data_format='NCHW'):
         super(ResNet_vd, self).__init__()
         super(ResNet_vd, self).__init__()
@@ -252,7 +253,7 @@ class ResNet_vd(nn.Layer):
             dilation_dict = {3: 2}
             dilation_dict = {3: 2}
 
 
         self.conv1_1 = ConvBNLayer(
         self.conv1_1 = ConvBNLayer(
-            in_channels=input_channel,
+            in_channels=in_channels,
             out_channels=32,
             out_channels=32,
             kernel_size=3,
             kernel_size=3,
             stride=2,
             stride=2,

+ 315 - 0
paddlers/models/ppseg/models/backbones/shufflenetv2.py

@@ -0,0 +1,315 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import ParamAttr, reshape, transpose, concat, split
+from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
+from paddle.nn.initializer import KaimingNormal
+from paddle.nn.functional import swish
+
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils, logger
+
+__all__ = [
+    'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5',
+    'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0',
+    'ShuffleNetV2_swish'
+]
+
+
+def channel_shuffle(x, groups):
+    x_shape = paddle.shape(x)
+    batch_size, height, width = x_shape[0], x_shape[2], x_shape[3]
+    num_channels = x.shape[1]
+    channels_per_group = num_channels // groups
+
+    # reshape
+    x = reshape(
+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
+
+    # transpose
+    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
+
+    # flatten
+    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
+
+    return x
+
+
+class ConvBNLayer(Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            groups=1,
+            act=None,
+            name=None, ):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=KaimingNormal(), name=name + "_weights"),
+            bias_attr=False)
+
+        self._batch_norm = BatchNorm(
+            out_channels,
+            param_attr=ParamAttr(name=name + "_bn_scale"),
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
+            act=act,
+            moving_mean_name=name + "_bn_mean",
+            moving_variance_name=name + "_bn_variance")
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class InvertedResidual(Layer):
+    def __init__(self, in_channels, out_channels, stride, act="relu",
+                 name=None):
+        super(InvertedResidual, self).__init__()
+        self._conv_pw = ConvBNLayer(
+            in_channels=in_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv1')
+        self._conv_dw = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=out_channels // 2,
+            act=None,
+            name='stage_' + name + '_conv2')
+        self._conv_linear = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv3')
+
+    def forward(self, inputs):
+        x1, x2 = split(
+            inputs,
+            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
+            axis=1)
+        x2 = self._conv_pw(x2)
+        x2 = self._conv_dw(x2)
+        x2 = self._conv_linear(x2)
+        out = concat([x1, x2], axis=1)
+        return channel_shuffle(out, 2)
+
+
+class InvertedResidualDS(Layer):
+    def __init__(self, in_channels, out_channels, stride, act="relu",
+                 name=None):
+        super(InvertedResidualDS, self).__init__()
+
+        # branch1
+        self._conv_dw_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=in_channels,
+            act=None,
+            name='stage_' + name + '_conv4')
+        self._conv_linear_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv5')
+        # branch2
+        self._conv_pw_2 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv1')
+        self._conv_dw_2 = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=out_channels // 2,
+            act=None,
+            name='stage_' + name + '_conv2')
+        self._conv_linear_2 = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act,
+            name='stage_' + name + '_conv3')
+
+    def forward(self, inputs):
+        x1 = self._conv_dw_1(inputs)
+        x1 = self._conv_linear_1(x1)
+        x2 = self._conv_pw_2(inputs)
+        x2 = self._conv_dw_2(x2)
+        x2 = self._conv_linear_2(x2)
+        out = concat([x1, x2], axis=1)
+
+        return channel_shuffle(out, 2)
+
+
+class ShuffleNet(Layer):
+    def __init__(self, scale=1.0, act="relu", in_channels=3, pretrained=None):
+        super(ShuffleNet, self).__init__()
+        self.scale = scale
+        self.pretrained = pretrained
+        stage_repeats = [4, 8, 4]
+
+        if scale == 0.25:
+            stage_out_channels = [-1, 24, 24, 48, 96, 512]
+        elif scale == 0.33:
+            stage_out_channels = [-1, 24, 32, 64, 128, 512]
+        elif scale == 0.5:
+            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
+        elif scale == 1.0:
+            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
+        elif scale == 1.5:
+            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
+        elif scale == 2.0:
+            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
+        else:
+            raise NotImplementedError("This scale size:[" + str(scale) +
+                                      "] is not implemented!")
+
+        self.out_index = [3, 11, 15]
+        self.feat_channels = stage_out_channels[1:5]
+
+        # 1. conv1
+        self._conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=stage_out_channels[1],
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            act=act,
+            name='stage1_conv')
+        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
+
+        # 2. bottleneck sequences
+        self._block_list = []
+        for stage_id, num_repeat in enumerate(stage_repeats):
+            for i in range(num_repeat):
+                if i == 0:
+                    block = self.add_sublayer(
+                        name=str(stage_id + 2) + '_' + str(i + 1),
+                        sublayer=InvertedResidualDS(
+                            in_channels=stage_out_channels[stage_id + 1],
+                            out_channels=stage_out_channels[stage_id + 2],
+                            stride=2,
+                            act=act,
+                            name=str(stage_id + 2) + '_' + str(i + 1)))
+                else:
+                    block = self.add_sublayer(
+                        name=str(stage_id + 2) + '_' + str(i + 1),
+                        sublayer=InvertedResidual(
+                            in_channels=stage_out_channels[stage_id + 2],
+                            out_channels=stage_out_channels[stage_id + 2],
+                            stride=1,
+                            act=act,
+                            name=str(stage_id + 2) + '_' + str(i + 1)))
+                self._block_list.append(block)
+
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, inputs):
+        feat_list = []
+
+        y = self._conv1(inputs)
+        y = self._max_pool(y)
+        feat_list.append(y)
+
+        for idx, inv in enumerate(self._block_list):
+            y = inv(y)
+            if idx in self.out_index:
+                feat_list.append(y)
+        return feat_list
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_25(**kwargs):
+    model = ShuffleNet(scale=0.25, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_33(**kwargs):
+    model = ShuffleNet(scale=0.33, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x0_5(**kwargs):
+    model = ShuffleNet(scale=0.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x1_0(**kwargs):
+    model = ShuffleNet(scale=1.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x1_5(**kwargs):
+    model = ShuffleNet(scale=1.5, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_x2_0(**kwargs):
+    model = ShuffleNet(scale=2.0, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ShuffleNetV2_swish(**kwargs):
+    model = ShuffleNet(scale=1.0, act="swish", **kwargs)
+    return model

+ 120 - 66
paddlers/models/ppseg/models/backbones/stdcnet.py

@@ -17,9 +17,9 @@ import math
 import paddle
 import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 
 
-from paddlers.models.ppseg.utils import utils
-from paddlers.models.ppseg.cvlibs import manager, param_init
-from paddlers.models.ppseg.models.layers.layer_libs import SyncBatchNorm
+from paddleseg.utils import utils
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models.layers.layer_libs import SyncBatchNorm
 
 
 __all__ = ["STDC1", "STDC2"]
 __all__ = ["STDC1", "STDC2"]
 
 
@@ -37,9 +37,9 @@ class STDCNet(nn.Layer):
         layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
         layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3].
         block_num(int,optional): block_num of features block. Default: 4.
         block_num(int,optional): block_num of features block. Default: 4.
         type(str,optional): feature fusion method "cat"/"add". Default: "cat".
         type(str,optional): feature fusion method "cat"/"add". Default: "cat".
-        num_classes(int, optional): class number for image classification. Default: 1000.
-        dropout(float,optional): dropout ratio. if >0,use dropout ratio.  Default: 0.20.
-        use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False.
+        relative_lr(float,optional): parameters here receive a different learning rate when updating. The effective 
+            learning rate is the prodcut of relative_lr and the global learning rate. Default: 1.0. 
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained(str, optional): the path of pretrained model.
         pretrained(str, optional): the path of pretrained model.
     """
     """
 
 
@@ -48,34 +48,18 @@ class STDCNet(nn.Layer):
                  layers=[4, 5, 3],
                  layers=[4, 5, 3],
                  block_num=4,
                  block_num=4,
                  type="cat",
                  type="cat",
-                 num_classes=1000,
-                 dropout=0.20,
-                 use_conv_last=False,
+                 relative_lr=1.0,
+                 in_channels=3,
                  pretrained=None):
                  pretrained=None):
         super(STDCNet, self).__init__()
         super(STDCNet, self).__init__()
         if type == "cat":
         if type == "cat":
             block = CatBottleneck
             block = CatBottleneck
         elif type == "add":
         elif type == "add":
             block = AddBottleneck
             block = AddBottleneck
-        self.use_conv_last = use_conv_last
-        self.features = self._make_layers(base, layers, block_num, block)
-        self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1)
-
-        if (layers == [4, 5, 3]):  #stdc1446
-            self.x2 = nn.Sequential(self.features[:1])
-            self.x4 = nn.Sequential(self.features[1:2])
-            self.x8 = nn.Sequential(self.features[2:6])
-            self.x16 = nn.Sequential(self.features[6:11])
-            self.x32 = nn.Sequential(self.features[11:])
-        elif (layers == [2, 2, 2]):  #stdc813
-            self.x2 = nn.Sequential(self.features[:1])
-            self.x4 = nn.Sequential(self.features[1:2])
-            self.x8 = nn.Sequential(self.features[2:4])
-            self.x16 = nn.Sequential(self.features[4:6])
-            self.x32 = nn.Sequential(self.features[6:])
-        else:
-            raise NotImplementedError(
-                "model with layers:{} is not implemented!".format(layers))
+        self.layers = layers
+        self.feat_channels = [base // 2, base, base * 4, base * 8, base * 16]
+        self.features = self._make_layers(in_channels, base, layers, block_num,
+                                          block, relative_lr)
 
 
         self.pretrained = pretrained
         self.pretrained = pretrained
         self.init_weight()
         self.init_weight()
@@ -84,32 +68,42 @@ class STDCNet(nn.Layer):
         """
         """
         forward function for feature extract.
         forward function for feature extract.
         """
         """
-        feat2 = self.x2(x)
-        feat4 = self.x4(feat2)
-        feat8 = self.x8(feat4)
-        feat16 = self.x16(feat8)
-        feat32 = self.x32(feat16)
-        if self.use_conv_last:
-            feat32 = self.conv_last(feat32)
-        return feat2, feat4, feat8, feat16, feat32
-
-    def _make_layers(self, base, layers, block_num, block):
+        out_feats = []
+
+        x = self.features[0](x)
+        out_feats.append(x)
+        x = self.features[1](x)
+        out_feats.append(x)
+
+        idx = [[2, 2 + self.layers[0]],
+               [2 + self.layers[0], 2 + sum(self.layers[0:2])],
+               [2 + sum(self.layers[0:2]), 2 + sum(self.layers)]]
+        for start_idx, end_idx in idx:
+            for i in range(start_idx, end_idx):
+                x = self.features[i](x)
+            out_feats.append(x)
+
+        return out_feats
+
+    def _make_layers(self, in_channels, base, layers, block_num, block,
+                     relative_lr):
         features = []
         features = []
-        features += [ConvBNRelu(3, base // 2, 3, 2)]
-        features += [ConvBNRelu(base // 2, base, 3, 2)]
+        features += [ConvBNRelu(in_channels, base // 2, 3, 2, relative_lr)]
+        features += [ConvBNRelu(base // 2, base, 3, 2, relative_lr)]
 
 
         for i, layer in enumerate(layers):
         for i, layer in enumerate(layers):
             for j in range(layer):
             for j in range(layer):
                 if i == 0 and j == 0:
                 if i == 0 and j == 0:
-                    features.append(block(base, base * 4, block_num, 2))
+                    features.append(
+                        block(base, base * 4, block_num, 2, relative_lr))
                 elif j == 0:
                 elif j == 0:
                     features.append(
                     features.append(
                         block(base * int(math.pow(2, i + 1)), base * int(
                         block(base * int(math.pow(2, i + 1)), base * int(
-                            math.pow(2, i + 2)), block_num, 2))
+                            math.pow(2, i + 2)), block_num, 2, relative_lr))
                 else:
                 else:
                     features.append(
                     features.append(
                         block(base * int(math.pow(2, i + 2)), base * int(
                         block(base * int(math.pow(2, i + 2)), base * int(
-                            math.pow(2, i + 2)), block_num, 1))
+                            math.pow(2, i + 2)), block_num, 1, relative_lr))
 
 
         return nn.Sequential(*features)
         return nn.Sequential(*features)
 
 
@@ -125,16 +119,24 @@ class STDCNet(nn.Layer):
 
 
 
 
 class ConvBNRelu(nn.Layer):
 class ConvBNRelu(nn.Layer):
-    def __init__(self, in_planes, out_planes, kernel=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 kernel=3,
+                 stride=1,
+                 relative_lr=1.0):
         super(ConvBNRelu, self).__init__()
         super(ConvBNRelu, self).__init__()
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
         self.conv = nn.Conv2D(
         self.conv = nn.Conv2D(
             in_planes,
             in_planes,
             out_planes,
             out_planes,
             kernel_size=kernel,
             kernel_size=kernel,
             stride=stride,
             stride=stride,
             padding=kernel // 2,
             padding=kernel // 2,
+            weight_attr=param_attr,
             bias_attr=False)
             bias_attr=False)
-        self.bn = SyncBatchNorm(out_planes, data_format='NCHW')
+        self.bn = nn.BatchNorm2D(
+            out_planes, weight_attr=param_attr, bias_attr=param_attr)
         self.relu = nn.ReLU()
         self.relu = nn.ReLU()
 
 
     def forward(self, x):
     def forward(self, x):
@@ -143,11 +145,17 @@ class ConvBNRelu(nn.Layer):
 
 
 
 
 class AddBottleneck(nn.Layer):
 class AddBottleneck(nn.Layer):
-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 block_num=3,
+                 stride=1,
+                 relative_lr=1.0):
         super(AddBottleneck, self).__init__()
         super(AddBottleneck, self).__init__()
         assert block_num > 1, "block number should be larger than 1."
         assert block_num > 1, "block number should be larger than 1."
         self.conv_list = nn.LayerList()
         self.conv_list = nn.LayerList()
         self.stride = stride
         self.stride = stride
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
         if stride == 2:
         if stride == 2:
             self.avd_layer = nn.Sequential(
             self.avd_layer = nn.Sequential(
                 nn.Conv2D(
                 nn.Conv2D(
@@ -157,8 +165,12 @@ class AddBottleneck(nn.Layer):
                     stride=2,
                     stride=2,
                     padding=1,
                     padding=1,
                     groups=out_planes // 2,
                     groups=out_planes // 2,
+                    weight_attr=param_attr,
                     bias_attr=False),
                     bias_attr=False),
-                nn.BatchNorm2D(out_planes // 2), )
+                nn.BatchNorm2D(
+                    out_planes // 2,
+                    weight_attr=param_attr,
+                    bias_attr=param_attr), )
             self.skip = nn.Sequential(
             self.skip = nn.Sequential(
                 nn.Conv2D(
                 nn.Conv2D(
                     in_planes,
                     in_planes,
@@ -167,34 +179,53 @@ class AddBottleneck(nn.Layer):
                     stride=2,
                     stride=2,
                     padding=1,
                     padding=1,
                     groups=in_planes,
                     groups=in_planes,
+                    weight_attr=param_attr,
                     bias_attr=False),
                     bias_attr=False),
-                nn.BatchNorm2D(in_planes),
+                nn.BatchNorm2D(
+                    in_planes, weight_attr=param_attr, bias_attr=param_attr),
                 nn.Conv2D(
                 nn.Conv2D(
-                    in_planes, out_planes, kernel_size=1, bias_attr=False),
-                nn.BatchNorm2D(out_planes), )
+                    in_planes,
+                    out_planes,
+                    kernel_size=1,
+                    bias_attr=False,
+                    weight_attr=param_attr),
+                nn.BatchNorm2D(
+                    out_planes, weight_attr=param_attr, bias_attr=param_attr), )
             stride = 1
             stride = 1
 
 
         for idx in range(block_num):
         for idx in range(block_num):
             if idx == 0:
             if idx == 0:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        in_planes, out_planes // 2, kernel=1))
+                        in_planes,
+                        out_planes // 2,
+                        kernel=1,
+                        relative_lr=relative_lr))
             elif idx == 1 and block_num == 2:
             elif idx == 1 and block_num == 2:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        out_planes // 2, out_planes // 2, stride=stride))
+                        out_planes // 2,
+                        out_planes // 2,
+                        stride=stride,
+                        relative_lr=relative_lr))
             elif idx == 1 and block_num > 2:
             elif idx == 1 and block_num > 2:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        out_planes // 2, out_planes // 4, stride=stride))
+                        out_planes // 2,
+                        out_planes // 4,
+                        stride=stride,
+                        relative_lr=relative_lr))
             elif idx < block_num - 1:
             elif idx < block_num - 1:
                 self.conv_list.append(
                 self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx + 1))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx + 1)),
+                        relative_lr=relative_lr))
             else:
             else:
                 self.conv_list.append(
                 self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx))))
+                    ConvBNRelu(out_planes // int(math.pow(2, idx)),
+                               out_planes // int(math.pow(2, idx))),
+                    relative_lr=relative_lr)
 
 
     def forward(self, x):
     def forward(self, x):
         out_list = []
         out_list = []
@@ -211,11 +242,17 @@ class AddBottleneck(nn.Layer):
 
 
 
 
 class CatBottleneck(nn.Layer):
 class CatBottleneck(nn.Layer):
-    def __init__(self, in_planes, out_planes, block_num=3, stride=1):
+    def __init__(self,
+                 in_planes,
+                 out_planes,
+                 block_num=3,
+                 stride=1,
+                 relative_lr=1.0):
         super(CatBottleneck, self).__init__()
         super(CatBottleneck, self).__init__()
         assert block_num > 1, "block number should be larger than 1."
         assert block_num > 1, "block number should be larger than 1."
         self.conv_list = nn.LayerList()
         self.conv_list = nn.LayerList()
         self.stride = stride
         self.stride = stride
+        param_attr = paddle.ParamAttr(learning_rate=relative_lr)
         if stride == 2:
         if stride == 2:
             self.avd_layer = nn.Sequential(
             self.avd_layer = nn.Sequential(
                 nn.Conv2D(
                 nn.Conv2D(
@@ -225,8 +262,12 @@ class CatBottleneck(nn.Layer):
                     stride=2,
                     stride=2,
                     padding=1,
                     padding=1,
                     groups=out_planes // 2,
                     groups=out_planes // 2,
+                    weight_attr=param_attr,
                     bias_attr=False),
                     bias_attr=False),
-                nn.BatchNorm2D(out_planes // 2), )
+                nn.BatchNorm2D(
+                    out_planes // 2,
+                    weight_attr=param_attr,
+                    bias_attr=param_attr), )
             self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
             self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1)
             stride = 1
             stride = 1
 
 
@@ -234,23 +275,36 @@ class CatBottleneck(nn.Layer):
             if idx == 0:
             if idx == 0:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        in_planes, out_planes // 2, kernel=1))
+                        in_planes,
+                        out_planes // 2,
+                        kernel=1,
+                        relative_lr=relative_lr))
             elif idx == 1 and block_num == 2:
             elif idx == 1 and block_num == 2:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        out_planes // 2, out_planes // 2, stride=stride))
+                        out_planes // 2,
+                        out_planes // 2,
+                        stride=stride,
+                        relative_lr=relative_lr))
             elif idx == 1 and block_num > 2:
             elif idx == 1 and block_num > 2:
                 self.conv_list.append(
                 self.conv_list.append(
                     ConvBNRelu(
                     ConvBNRelu(
-                        out_planes // 2, out_planes // 4, stride=stride))
+                        out_planes // 2,
+                        out_planes // 4,
+                        stride=stride,
+                        relative_lr=relative_lr))
             elif idx < block_num - 1:
             elif idx < block_num - 1:
                 self.conv_list.append(
                 self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx + 1))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx + 1)),
+                        relative_lr=relative_lr))
             else:
             else:
                 self.conv_list.append(
                 self.conv_list.append(
-                    ConvBNRelu(out_planes // int(math.pow(2, idx)), out_planes
-                               // int(math.pow(2, idx))))
+                    ConvBNRelu(
+                        out_planes // int(math.pow(2, idx)),
+                        out_planes // int(math.pow(2, idx)),
+                        relative_lr=relative_lr))
 
 
     def forward(self, x):
     def forward(self, x):
         out_list = []
         out_list = []

+ 7 - 7
paddlers/models/ppseg/models/backbones/swin_transformer.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 import numpy as np
 import numpy as np
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
-from paddlers.models.ppseg.models.backbones.transformer_utils import *
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
+from paddleseg.models.backbones.transformer_utils import *
 
 
 
 
 class Mlp(nn.Layer):
 class Mlp(nn.Layer):
@@ -531,7 +531,7 @@ class SwinTransformer(nn.Layer):
     Args:
     Args:
         pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
         pretrain_img_size (int): Input image size for training the pretrained model, used in absolute postion embedding. Default: 224.
         patch_size (int | tuple(int)): Patch size. Default: 4.
         patch_size (int | tuple(int)): Patch size. Default: 4.
-        in_chans (int): Number of input image channels. Default: 3.
+        in_channels (int): Number of input image channels. Default: 3.
         embed_dim (int): Number of linear projection output channels. Default: 96.
         embed_dim (int): Number of linear projection output channels. Default: 96.
         depths (tuple[int]): Depths of each Swin Transformer stage.
         depths (tuple[int]): Depths of each Swin Transformer stage.
         num_heads (tuple[int]): Number of attention head of each stage.
         num_heads (tuple[int]): Number of attention head of each stage.
@@ -553,7 +553,7 @@ class SwinTransformer(nn.Layer):
     def __init__(self,
     def __init__(self,
                  pretrain_img_size=224,
                  pretrain_img_size=224,
                  patch_size=4,
                  patch_size=4,
-                 in_chans=3,
+                 in_channels=3,
                  embed_dim=96,
                  embed_dim=96,
                  depths=[2, 2, 6, 2],
                  depths=[2, 2, 6, 2],
                  num_heads=[3, 6, 12, 24],
                  num_heads=[3, 6, 12, 24],
@@ -583,7 +583,7 @@ class SwinTransformer(nn.Layer):
         # split image into non-overlapping patches
         # split image into non-overlapping patches
         self.patch_embed = PatchEmbed(
         self.patch_embed = PatchEmbed(
             patch_size=patch_size,
             patch_size=patch_size,
-            in_chans=in_chans,
+            in_chans=in_channels,
             embed_dim=embed_dim,
             embed_dim=embed_dim,
             norm_layer=norm_layer if self.patch_norm else None)
             norm_layer=norm_layer if self.patch_norm else None)
 
 

+ 716 - 0
paddlers/models/ppseg/models/backbones/top_transformer.py

@@ -0,0 +1,716 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This file refers to https://github.com/hustvl/TopFormer and https://github.com/BR-IDL/PaddleViT
+"""
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import manager
+from paddleseg import utils
+from paddleseg.models.backbones.transformer_utils import Identity, DropPath
+
+__all__ = ["TopTransformer_Base", "TopTransformer_Small", "TopTransformer_Tiny"]
+
+
+def make_divisible(val, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(val + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * val:
+        new_v += divisor
+    return new_v
+
+
+class HSigmoid(nn.Layer):
+    def __init__(self, inplace=True):
+        super().__init__()
+        self.relu = nn.ReLU6()
+
+    def forward(self, x):
+        return self.relu(x + 3) / 6
+
+
+class Conv2DBN(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 ks=1,
+                 stride=1,
+                 pad=0,
+                 dilation=1,
+                 groups=1,
+                 bn_weight_init=1,
+                 lr_mult=1.0):
+        super().__init__()
+        conv_weight_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.c = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=ks,
+            stride=stride,
+            padding=pad,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=conv_weight_attr,
+            bias_attr=False)
+        bn_weight_attr = paddle.ParamAttr(
+            initializer=nn.initializer.Constant(bn_weight_init),
+            learning_rate=lr_mult)
+        bn_bias_attr = paddle.ParamAttr(
+            initializer=nn.initializer.Constant(0), learning_rate=lr_mult)
+        self.bn = nn.BatchNorm2D(
+            out_channels, weight_attr=bn_weight_attr, bias_attr=bn_bias_attr)
+
+    def forward(self, inputs):
+        out = self.c(inputs)
+        out = self.bn(out)
+        return out
+
+
+class ConvBNAct(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=1,
+                 stride=1,
+                 padding=0,
+                 groups=1,
+                 norm=nn.BatchNorm2D,
+                 act=None,
+                 bias_attr=False,
+                 lr_mult=1.0):
+        super(ConvBNAct, self).__init__()
+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=param_attr,
+            bias_attr=param_attr if bias_attr else False)
+        self.act = act() if act is not None else Identity()
+        self.bn = norm(out_channels, weight_attr=param_attr, bias_attr=param_attr) \
+            if norm is not None else Identity()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.act(x)
+        return x
+
+
+class MLP(nn.Layer):
+    def __init__(self,
+                 in_features,
+                 hidden_features=None,
+                 out_features=None,
+                 act_layer=nn.ReLU,
+                 drop=0.,
+                 lr_mult=1.0):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Conv2DBN(in_features, hidden_features, lr_mult=lr_mult)
+        param_attr = paddle.ParamAttr(learning_rate=lr_mult)
+        self.dwconv = nn.Conv2D(
+            hidden_features,
+            hidden_features,
+            3,
+            1,
+            1,
+            groups=hidden_features,
+            weight_attr=param_attr,
+            bias_attr=param_attr)
+        self.act = act_layer()
+        self.fc2 = Conv2DBN(hidden_features, out_features, lr_mult=lr_mult)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.dwconv(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class InvertedResidual(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 expand_ratio,
+                 activations=None,
+                 lr_mult=1.0):
+        super(InvertedResidual, self).__init__()
+        assert stride in [1, 2], "The stride should be 1 or 2."
+
+        if activations is None:
+            activations = nn.ReLU
+
+        hidden_dim = int(round(in_channels * expand_ratio))
+        self.use_res_connect = stride == 1 and in_channels == out_channels
+
+        layers = []
+        if expand_ratio != 1:
+            layers.append(
+                Conv2DBN(
+                    in_channels, hidden_dim, ks=1, lr_mult=lr_mult))
+            layers.append(activations())
+        layers.extend([
+            Conv2DBN(
+                hidden_dim,
+                hidden_dim,
+                ks=kernel_size,
+                stride=stride,
+                pad=kernel_size // 2,
+                groups=hidden_dim,
+                lr_mult=lr_mult), activations(), Conv2DBN(
+                    hidden_dim, out_channels, ks=1, lr_mult=lr_mult)
+        ])
+        self.conv = nn.Sequential(*layers)
+        self.out_channels = out_channels
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class TokenPyramidModule(nn.Layer):
+    def __init__(self,
+                 cfgs,
+                 out_indices,
+                 in_channels=3,
+                 inp_channel=16,
+                 activation=nn.ReLU,
+                 width_mult=1.,
+                 lr_mult=1.):
+        super().__init__()
+        self.out_indices = out_indices
+
+        self.stem = nn.Sequential(
+            Conv2DBN(
+                in_channels, inp_channel, 3, 2, 1, lr_mult=lr_mult),
+            activation())
+
+        self.layers = []
+        for i, (k, t, c, s) in enumerate(cfgs):
+            output_channel = make_divisible(c * width_mult, 8)
+            exp_size = t * inp_channel
+            exp_size = make_divisible(exp_size * width_mult, 8)
+            layer_name = 'layer{}'.format(i + 1)
+            layer = InvertedResidual(
+                inp_channel,
+                output_channel,
+                kernel_size=k,
+                stride=s,
+                expand_ratio=t,
+                activations=activation,
+                lr_mult=lr_mult)
+            self.add_sublayer(layer_name, layer)
+            self.layers.append(layer_name)
+            inp_channel = output_channel
+
+    def forward(self, x):
+        outs = []
+        x = self.stem(x)
+        for i, layer_name in enumerate(self.layers):
+            layer = getattr(self, layer_name)
+            x = layer(x)
+            if i in self.out_indices:
+                outs.append(x)
+        return outs
+
+
+class Attention(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads,
+                 attn_ratio=4,
+                 activation=None,
+                 lr_mult=1.0):
+        super().__init__()
+        self.num_heads = num_heads
+        self.scale = key_dim**-0.5
+        self.key_dim = key_dim
+        self.nh_kd = nh_kd = key_dim * num_heads
+        self.d = int(attn_ratio * key_dim)
+        self.dh = int(attn_ratio * key_dim) * num_heads
+        self.attn_ratio = attn_ratio
+
+        self.to_q = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
+        self.to_k = Conv2DBN(dim, nh_kd, 1, lr_mult=lr_mult)
+        self.to_v = Conv2DBN(dim, self.dh, 1, lr_mult=lr_mult)
+
+        self.proj = nn.Sequential(
+            activation(),
+            Conv2DBN(
+                self.dh, dim, bn_weight_init=0, lr_mult=lr_mult))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+        H, W = x_shape[2], x_shape[3]
+
+        qq = self.to_q(x).reshape(
+            [0, self.num_heads, self.key_dim, -1]).transpose([0, 1, 3, 2])
+        kk = self.to_k(x).reshape([0, self.num_heads, self.key_dim, -1])
+        vv = self.to_v(x).reshape([0, self.num_heads, self.d, -1]).transpose(
+            [0, 1, 3, 2])
+
+        attn = paddle.matmul(qq, kk)
+        attn = F.softmax(attn, axis=-1)
+
+        xx = paddle.matmul(attn, vv)
+
+        xx = xx.transpose([0, 1, 3, 2]).reshape([0, self.dh, H, W])
+        xx = self.proj(xx)
+        return xx
+
+
+class Block(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads,
+                 mlp_ratios=4.,
+                 attn_ratio=2.,
+                 drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.ReLU,
+                 lr_mult=1.0):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.mlp_ratios = mlp_ratios
+
+        self.attn = Attention(
+            dim,
+            key_dim=key_dim,
+            num_heads=num_heads,
+            attn_ratio=attn_ratio,
+            activation=act_layer,
+            lr_mult=lr_mult)
+
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+        mlp_hidden_dim = int(dim * mlp_ratios)
+        self.mlp = MLP(in_features=dim,
+                       hidden_features=mlp_hidden_dim,
+                       act_layer=act_layer,
+                       drop=drop,
+                       lr_mult=lr_mult)
+
+    def forward(self, x):
+        h = x
+        x = self.attn(x)
+        x = self.drop_path(x)
+        x = h + x
+
+        h = x
+        x = self.mlp(x)
+        x = self.drop_path(x)
+        x = x + h
+        return x
+
+
+class BasicLayer(nn.Layer):
+    def __init__(self,
+                 block_num,
+                 embedding_dim,
+                 key_dim,
+                 num_heads,
+                 mlp_ratios=4.,
+                 attn_ratio=2.,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=None,
+                 lr_mult=1.0):
+        super().__init__()
+        self.block_num = block_num
+
+        self.transformer_blocks = nn.LayerList()
+        for i in range(self.block_num):
+            self.transformer_blocks.append(
+                Block(
+                    embedding_dim,
+                    key_dim=key_dim,
+                    num_heads=num_heads,
+                    mlp_ratios=mlp_ratios,
+                    attn_ratio=attn_ratio,
+                    drop=drop,
+                    drop_path=drop_path[i]
+                    if isinstance(drop_path, list) else drop_path,
+                    act_layer=act_layer,
+                    lr_mult=lr_mult))
+
+    def forward(self, x):
+        # token * N 
+        for i in range(self.block_num):
+            x = self.transformer_blocks[i](x)
+        return x
+
+
+class PyramidPoolAgg(nn.Layer):
+    def __init__(self, stride):
+        super().__init__()
+        self.stride = stride
+        self.tmp = Identity()  # avoid the error of paddle.flops
+
+    def forward(self, inputs):
+        '''
+        # The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
+        # so exporting the inference model will raise error.
+        _, _, H, W = inputs[-1].shape
+        H = (H - 1) // self.stride + 1
+        W = (W - 1) // self.stride + 1
+        return paddle.concat(
+            [F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
+        '''
+        out = []
+        ks = 2**len(inputs)
+        stride = self.stride**len(inputs)
+        for x in inputs:
+            x = F.avg_pool2d(x, int(ks), int(stride))
+            ks /= 2
+            stride /= 2
+            out.append(x)
+        out = paddle.concat(out, axis=1)
+        return out
+
+
+class InjectionMultiSum(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None,
+                 lr_mult=1.0):
+        super(InjectionMultiSum, self).__init__()
+
+        self.local_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.global_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.global_act = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, lr_mult=lr_mult)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_global):
+        xl_hw = paddle.shape(x_low)[2:]
+        local_feat = self.local_embedding(x_low)
+
+        global_act = self.global_act(x_global)
+        sig_act = F.interpolate(
+            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
+
+        global_feat = self.global_embedding(x_global)
+        global_feat = F.interpolate(
+            global_feat, xl_hw, mode='bilinear', align_corners=False)
+
+        out = local_feat * sig_act + global_feat
+        return out
+
+
+class InjectionMultiSumCBR(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None):
+        '''
+        local_embedding: conv-bn-relu
+        global_embedding: conv-bn-relu
+        global_act: conv
+        '''
+        super(InjectionMultiSumCBR, self).__init__()
+
+        self.local_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1)
+        self.global_embedding = ConvBNAct(
+            in_channels, out_channels, kernel_size=1)
+        self.global_act = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, norm=None, act=None)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_global):
+        xl_hw = paddle.shape(x)[2:]
+        local_feat = self.local_embedding(x_low)
+        # kernel
+        global_act = self.global_act(x_global)
+        global_act = F.interpolate(
+            self.act(global_act), xl_hw, mode='bilinear', align_corners=False)
+        # feat_h
+        global_feat = self.global_embedding(x_global)
+        global_feat = F.interpolate(
+            global_feat, xl_hw, mode='bilinear', align_corners=False)
+        out = local_feat * global_act + global_feat
+        return out
+
+
+class FuseBlockSum(nn.Layer):
+    def __init__(self, in_channels, out_channels, activations=None):
+        super(FuseBlockSum, self).__init__()
+
+        self.fuse1 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.fuse2 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+
+    def forward(self, x_low, x_high):
+        xl_hw = paddle.shape(x)[2:]
+        inp = self.fuse1(x_low)
+        kernel = self.fuse2(x_high)
+        feat_h = F.interpolate(
+            kernel, xl_hw, mode='bilinear', align_corners=False)
+        out = inp + feat_h
+        return out
+
+
+class FuseBlockMulti(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            stride=1,
+            activations=None, ):
+        super(FuseBlockMulti, self).__init__()
+        assert stride in [1, 2], "The stride should be 1 or 2."
+
+        self.fuse1 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.fuse2 = ConvBNAct(
+            in_channels, out_channels, kernel_size=1, act=None)
+        self.act = HSigmoid()
+
+    def forward(self, x_low, x_high):
+        xl_hw = paddle.shape(x)[2:]
+        inp = self.fuse1(x_low)
+        sig_act = self.fuse2(x_high)
+        sig_act = F.interpolate(
+            self.act(sig_act), xl_hw, mode='bilinear', align_corners=False)
+        out = inp * sig_act
+        return out
+
+
+SIM_BLOCK = {
+    "fuse_sum": FuseBlockSum,
+    "fuse_multi": FuseBlockMulti,
+    "multi_sum": InjectionMultiSum,
+    "multi_sum_cbr": InjectionMultiSumCBR,
+}
+
+
+class TopTransformer(nn.Layer):
+    def __init__(self,
+                 cfgs,
+                 injection_out_channels,
+                 encoder_out_indices,
+                 trans_out_indices=[1, 2, 3],
+                 depths=4,
+                 key_dim=16,
+                 num_heads=8,
+                 attn_ratios=2,
+                 mlp_ratios=2,
+                 c2t_stride=2,
+                 drop_path_rate=0.,
+                 act_layer=nn.ReLU6,
+                 injection_type="muli_sum",
+                 injection=True,
+                 lr_mult=1.0,
+                 in_channels=3,
+                 pretrained=None):
+        super().__init__()
+        self.feat_channels = [
+            c[2] for i, c in enumerate(cfgs) if i in encoder_out_indices
+        ]
+        self.injection_out_channels = injection_out_channels
+        self.injection = injection
+        self.embed_dim = sum(self.feat_channels)
+        self.trans_out_indices = trans_out_indices
+
+        self.tpm = TokenPyramidModule(
+            cfgs=cfgs,
+            out_indices=encoder_out_indices,
+            in_channels=in_channels,
+            lr_mult=lr_mult)
+        self.ppa = PyramidPoolAgg(stride=c2t_stride)
+
+        dpr = [x.item() for x in \
+               paddle.linspace(0, drop_path_rate, depths)]
+        self.trans = BasicLayer(
+            block_num=depths,
+            embedding_dim=self.embed_dim,
+            key_dim=key_dim,
+            num_heads=num_heads,
+            mlp_ratios=mlp_ratios,
+            attn_ratio=attn_ratios,
+            drop=0,
+            attn_drop=0,
+            drop_path=dpr,
+            act_layer=act_layer,
+            lr_mult=lr_mult)
+
+        self.SIM = nn.LayerList()
+        inj_module = SIM_BLOCK[injection_type]
+        if self.injection:
+            for i in range(len(self.feat_channels)):
+                if i in trans_out_indices:
+                    self.SIM.append(
+                        inj_module(
+                            self.feat_channels[i],
+                            injection_out_channels[i],
+                            activations=act_layer,
+                            lr_mult=lr_mult))
+                else:
+                    self.SIM.append(Identity())
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, x):
+        ouputs = self.tpm(x)
+        out = self.ppa(ouputs)
+        out = self.trans(out)
+
+        if self.injection:
+            xx = out.split(self.feat_channels, axis=1)
+            results = []
+            for i in range(len(self.feat_channels)):
+                if i in self.trans_out_indices:
+                    local_tokens = ouputs[i]
+                    global_semantics = xx[i]
+                    out_ = self.SIM[i](local_tokens, global_semantics)
+                    results.append(out_)
+            return results
+        else:
+            ouputs.append(out)
+            return ouputs
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Base(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2        
+        [3, 4, 32, 2],  # 1/4 1      
+        [3, 3, 32, 1],  #            
+        [5, 3, 64, 2],  # 1/8 3      
+        [5, 3, 64, 1],  #            
+        [3, 3, 128, 2],  # 1/16 5     
+        [3, 3, 128, 1],  #            
+        [5, 6, 160, 2],  # 1/32 7     
+        [5, 6, 160, 1],  #            
+        [3, 6, 160, 1],  #            
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 256, 256, 256],
+        encoder_out_indices=[2, 4, 6, 9],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=8,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Small(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2        
+        [3, 4, 24, 2],  # 1/4 1      
+        [3, 3, 24, 1],  #            
+        [5, 3, 48, 2],  # 1/8 3      
+        [5, 3, 48, 1],  #            
+        [3, 3, 96, 2],  # 1/16 5     
+        [3, 3, 96, 1],  #            
+        [5, 6, 128, 2],  # 1/32 7     
+        [5, 6, 128, 1],  #            
+        [3, 6, 128, 1],  #           
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 192, 192, 192],
+        encoder_out_indices=[2, 4, 6, 9],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=6,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def TopTransformer_Tiny(**kwargs):
+    cfgs = [
+        # k,  t,  c, s
+        [3, 1, 16, 1],  # 1/2       
+        [3, 4, 16, 2],  # 1/4 1      
+        [3, 3, 16, 1],  #            
+        [5, 3, 32, 2],  # 1/8 3      
+        [5, 3, 32, 1],  #            
+        [3, 3, 64, 2],  # 1/16 5     
+        [3, 3, 64, 1],  #            
+        [5, 6, 96, 2],  # 1/32 7     
+        [5, 6, 96, 1],  #               
+    ]
+
+    model = TopTransformer(
+        cfgs=cfgs,
+        injection_out_channels=[None, 128, 128, 128],
+        encoder_out_indices=[2, 4, 6, 8],
+        trans_out_indices=[1, 2, 3],
+        depths=4,
+        key_dim=16,
+        num_heads=4,
+        attn_ratios=2,
+        mlp_ratios=2,
+        c2t_stride=2,
+        drop_path_rate=0.,
+        act_layer=nn.ReLU6,
+        injection_type="multi_sum",
+        injection=True,
+        **kwargs)
+    return model

+ 2 - 2
paddlers/models/ppseg/models/backbones/transformer_utils.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -35,7 +35,7 @@ def drop_path(x, drop_prob=0., training=False):
         return x
         return x
     keep_prob = paddle.to_tensor(1 - drop_prob)
     keep_prob = paddle.to_tensor(1 - drop_prob)
     shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
     shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+    random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype)
     random_tensor = paddle.floor(random_tensor)  # binarize
     random_tensor = paddle.floor(random_tensor)  # binarize
     output = x.divide(keep_prob) * random_tensor
     output = x.divide(keep_prob) * random_tensor
     return output
     return output

+ 6 - 6
paddlers/models/ppseg/models/backbones/vision_transformer.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -20,9 +20,9 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 import numpy as np
 import numpy as np
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils, logger
-from paddlers.models.ppseg.models.backbones.transformer_utils import to_2tuple, DropPath, Identity
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils, logger
+from paddleseg.models.backbones.transformer_utils import to_2tuple, DropPath, Identity
 
 
 
 
 class Mlp(nn.Layer):
 class Mlp(nn.Layer):
@@ -154,7 +154,7 @@ class VisionTransformer(nn.Layer):
     def __init__(self,
     def __init__(self,
                  img_size=224,
                  img_size=224,
                  patch_size=16,
                  patch_size=16,
-                 in_chans=3,
+                 in_channels=3,
                  embed_dim=768,
                  embed_dim=768,
                  depth=12,
                  depth=12,
                  num_heads=12,
                  num_heads=12,
@@ -176,7 +176,7 @@ class VisionTransformer(nn.Layer):
         self.patch_embed = PatchEmbed(
         self.patch_embed = PatchEmbed(
             img_size=img_size,
             img_size=img_size,
             patch_size=patch_size,
             patch_size=patch_size,
-            in_chans=in_chans,
+            in_chans=in_channels,
             embed_dim=embed_dim)
             embed_dim=embed_dim)
         self.pos_w = self.patch_embed.num_patches_in_w
         self.pos_w = self.patch_embed.num_patches_in_w
         self.pos_h = self.patch_embed.num_patches_in_h
         self.pos_h = self.patch_embed.num_patches_in_h

+ 11 - 6
paddlers/models/ppseg/models/backbones/xception_deeplab.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -15,9 +15,9 @@
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
-from paddlers.models.ppseg.models import layers
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
+from paddleseg.models import layers
 
 
 __all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
 __all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
 
 
@@ -255,12 +255,17 @@ class XceptionDeeplab(nn.Layer):
 
 
      Args:
      Args:
          backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
          backbone (str): Which type of Xception_DeepLab to select. It should be one of ('xception_41', 'xception_65', 'xception_71').
+         in_channels (int, optional): The channels of input image. Default: 3.
          pretrained (str, optional): The path of pretrained model.
          pretrained (str, optional): The path of pretrained model.
          output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.
          output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 16.
 
 
     """
     """
 
 
-    def __init__(self, backbone, pretrained=None, output_stride=16):
+    def __init__(self,
+                 backbone,
+                 in_channels=3,
+                 pretrained=None,
+                 output_stride=16):
 
 
         super(XceptionDeeplab, self).__init__()
         super(XceptionDeeplab, self).__init__()
 
 
@@ -269,7 +274,7 @@ class XceptionDeeplab(nn.Layer):
         self.feat_channels = [128, 2048]
         self.feat_channels = [128, 2048]
 
 
         self._conv1 = ConvBNLayer(
         self._conv1 = ConvBNLayer(
-            3,
+            in_channels,
             32,
             32,
             3,
             3,
             stride=2,
             stride=2,

+ 13 - 11
paddlers/models/ppseg/models/bisenet.py

@@ -18,9 +18,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg import utils
-from paddlers.models.ppseg.cvlibs import manager, param_init
-from paddlers.models.ppseg.models import layers
+from paddleseg import utils
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models import layers
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component
@@ -35,6 +35,7 @@ class BiSeNetV2(nn.Layer):
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
         lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
         lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25.
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
     """
     """
 
 
@@ -42,6 +43,7 @@ class BiSeNetV2(nn.Layer):
                  num_classes,
                  num_classes,
                  lambd=0.25,
                  lambd=0.25,
                  align_corners=False,
                  align_corners=False,
+                 in_channels=3,
                  pretrained=None):
                  pretrained=None):
         super().__init__()
         super().__init__()
 
 
@@ -51,8 +53,8 @@ class BiSeNetV2(nn.Layer):
         sb_channels = (C1, C3, C4, C5)
         sb_channels = (C1, C3, C4, C5)
         mid_channels = 128
         mid_channels = 128
 
 
-        self.db = DetailBranch(db_channels)
-        self.sb = SemanticBranch(sb_channels)
+        self.db = DetailBranch(in_channels, db_channels)
+        self.sb = SemanticBranch(in_channels, sb_channels)
 
 
         self.bga = BGA(mid_channels, align_corners)
         self.bga = BGA(mid_channels, align_corners)
         self.aux_head1 = SegHead(C1, C1, num_classes)
         self.aux_head1 = SegHead(C1, C1, num_classes)
@@ -189,15 +191,15 @@ class GatherAndExpansionLayer2(nn.Layer):
 class DetailBranch(nn.Layer):
 class DetailBranch(nn.Layer):
     """The detail branch of BiSeNet, which has wide channels but shallow layers."""
     """The detail branch of BiSeNet, which has wide channels but shallow layers."""
 
 
-    def __init__(self, in_channels):
+    def __init__(self, in_channels, feature_channels):
         super().__init__()
         super().__init__()
 
 
-        C1, C2, C3 = in_channels
+        C1, C2, C3 = feature_channels
 
 
         self.convs = nn.Sequential(
         self.convs = nn.Sequential(
             # stage 1
             # stage 1
             layers.ConvBNReLU(
             layers.ConvBNReLU(
-                3, C1, 3, stride=2),
+                in_channels, C1, 3, stride=2),
             layers.ConvBNReLU(C1, C1, 3),
             layers.ConvBNReLU(C1, C1, 3),
             # stage 2
             # stage 2
             layers.ConvBNReLU(
             layers.ConvBNReLU(
@@ -217,11 +219,11 @@ class DetailBranch(nn.Layer):
 class SemanticBranch(nn.Layer):
 class SemanticBranch(nn.Layer):
     """The semantic branch of BiSeNet, which has narrow channels but deep layers."""
     """The semantic branch of BiSeNet, which has narrow channels but deep layers."""
 
 
-    def __init__(self, in_channels):
+    def __init__(self, in_channels, feature_channels):
         super().__init__()
         super().__init__()
-        C1, C3, C4, C5 = in_channels
+        C1, C3, C4, C5 = feature_channels
 
 
-        self.stem = StemBlock(3, C1)
+        self.stem = StemBlock(in_channels, C1)
 
 
         self.stage3 = nn.Sequential(
         self.stage3 = nn.Sequential(
             GatherAndExpansionLayer2(C1, C3, 6),
             GatherAndExpansionLayer2(C1, C3, 6),

+ 3 - 3
paddlers/models/ppseg/models/bisenetv1.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 174 - 0
paddlers/models/ppseg/models/ccnet.py

@@ -0,0 +1,174 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
+
+
+@manager.MODELS.add_component
+class CCNet(nn.Layer):
+    """
+    The CCNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Zilong Huang, et al. "CCNet: Criss-Cross Attention for Semantic Segmentation"
+    (https://arxiv.org/abs/1811.11721)
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (paddle.nn.Layer): Backbone network, currently support Resnet18_vd/Resnet34_vd/Resnet50_vd/Resnet101_vd.
+        backbone_indices (tuple, list, optional): Two values in the tuple indicate the indices of output of backbone. Default: (2, 3).
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
+        dropout_prob (float, optional): The probability of dropout. Default: 0.0.
+        recurrence (int, optional): The number of recurrent operations. Defautl: 1.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=(2, 3),
+                 enable_auxiliary_loss=True,
+                 dropout_prob=0.0,
+                 recurrence=1,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.recurrence = recurrence
+        self.align_corners = align_corners
+
+        self.backbone = backbone
+        self.backbone_indices = backbone_indices
+        backbone_channels = [
+            backbone.feat_channels[i] for i in backbone_indices
+        ]
+
+        if enable_auxiliary_loss:
+            self.aux_head = layers.AuxLayer(
+                backbone_channels[0],
+                512,
+                num_classes,
+                dropout_prob=dropout_prob)
+        self.head = RCCAModule(
+            backbone_channels[1],
+            512,
+            num_classes,
+            dropout_prob=dropout_prob,
+            recurrence=recurrence)
+        self.pretrained = pretrained
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+    def forward(self, x):
+        feat_list = self.backbone(x)
+        logit_list = []
+        output = self.head(feat_list[self.backbone_indices[-1]])
+        logit_list.append(output)
+        if self.training and self.enable_auxiliary_loss:
+            aux_out = self.aux_head(feat_list[self.backbone_indices[-2]])
+            logit_list.append(aux_out)
+        return [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+
+
+class RCCAModule(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 num_classes,
+                 dropout_prob=0.1,
+                 recurrence=1):
+        super().__init__()
+        inter_channels = in_channels // 4
+        self.recurrence = recurrence
+        self.conva = layers.ConvBNLeakyReLU(
+            in_channels, inter_channels, 3, padding=1, bias_attr=False)
+        self.cca = CrissCrossAttention(inter_channels)
+        self.convb = layers.ConvBNLeakyReLU(
+            inter_channels, inter_channels, 3, padding=1, bias_attr=False)
+        self.out = layers.AuxLayer(
+            in_channels + inter_channels,
+            out_channels,
+            num_classes,
+            dropout_prob=dropout_prob)
+
+    def forward(self, x):
+        feat = self.conva(x)
+        for i in range(self.recurrence):
+            feat = self.cca(feat)
+        feat = self.convb(feat)
+        output = self.out(paddle.concat([x, feat], axis=1))
+        return output
+
+
+class CrissCrossAttention(nn.Layer):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.q_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
+        self.k_conv = nn.Conv2D(in_channels, in_channels // 8, kernel_size=1)
+        self.v_conv = nn.Conv2D(in_channels, in_channels, kernel_size=1)
+        self.softmax = nn.Softmax(axis=3)
+        self.gamma = self.create_parameter(
+            shape=(1, ), default_initializer=nn.initializer.Constant(0))
+        self.inf_tensor = paddle.full(shape=(1, ), fill_value=float('inf'))
+
+    def forward(self, x):
+        b, c, h, w = paddle.shape(x)
+        proj_q = self.q_conv(x)
+        proj_q_h = proj_q.transpose([0, 3, 1, 2]).reshape(
+            [b * w, -1, h]).transpose([0, 2, 1])
+        proj_q_w = proj_q.transpose([0, 2, 1, 3]).reshape(
+            [b * h, -1, w]).transpose([0, 2, 1])
+
+        proj_k = self.k_conv(x)
+        proj_k_h = proj_k.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
+        proj_k_w = proj_k.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
+
+        proj_v = self.v_conv(x)
+        proj_v_h = proj_v.transpose([0, 3, 1, 2]).reshape([b * w, -1, h])
+        proj_v_w = proj_v.transpose([0, 2, 1, 3]).reshape([b * h, -1, w])
+
+        energy_h = (paddle.bmm(proj_q_h, proj_k_h) + self.Inf(b, h, w)).reshape(
+            [b, w, h, h]).transpose([0, 2, 1, 3])
+        energy_w = paddle.bmm(proj_q_w, proj_k_w).reshape([b, h, w, w])
+        concate = self.softmax(paddle.concat([energy_h, energy_w], axis=3))
+
+        attn_h = concate[:, :, :, 0:h].transpose([0, 2, 1, 3]).reshape(
+            [b * w, h, h])
+        attn_w = concate[:, :, :, h:h + w].reshape([b * h, w, w])
+        out_h = paddle.bmm(proj_v_h, attn_h.transpose([0, 2, 1])).reshape(
+            [b, w, -1, h]).transpose([0, 2, 3, 1])
+        out_w = paddle.bmm(proj_v_w, attn_w.transpose([0, 2, 1])).reshape(
+            [b, h, -1, w]).transpose([0, 2, 1, 3])
+        return self.gamma * (out_h + out_w) + x
+
+    def Inf(self, B, H, W):
+        return -paddle.tile(
+            paddle.diag(paddle.tile(self.inf_tensor, [H]), 0).unsqueeze(0),
+            [B * W, 1, 1])

+ 3 - 3
paddlers/models/ppseg/models/danet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 403 - 0
paddlers/models/ppseg/models/ddrnet.py

@@ -0,0 +1,403 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models import layers
+from paddleseg.utils import utils
+
+
+class DualResNet(nn.Layer):
+    """
+    The DDRNet implementation based on PaddlePaddle.
+
+    The original article refers to
+    Yuanduo Hong, Huihui Pan, Weichao Sun, et al. "Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes"
+    (https://arxiv.org/abs/2101.06085)
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        in_channels (int, optional): Number of input channels. Default: 3.
+        block_layers (list, tuple): The numbers of layers in different blocks. Default: [2, 2, 2, 2].
+        planes (int): Base channels in network. Default: 64.
+        spp_planes (int): Branch channels for DAPPM. Default: 128.
+        head_planes (int): Mid channels of segmentation head. Default: 128.
+        enable_auxiliary_loss (bool): Whether use auxiliary head for stage3. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 in_channels=3,
+                 block_layers=[2, 2, 2, 2],
+                 planes=64,
+                 spp_planes=128,
+                 head_planes=128,
+                 enable_auxiliary_loss=False,
+                 pretrained=None):
+        super().__init__()
+        highres_planes = planes * 2
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.conv1 = nn.Sequential(
+            layers.ConvBNReLU(
+                in_channels, planes, kernel_size=3, stride=2, padding=1),
+            layers.ConvBNReLU(
+                planes, planes, kernel_size=3, stride=2, padding=1), )
+        self.relu = nn.ReLU()
+        self.layer1 = self._make_layers(BasicBlock, planes, planes,
+                                        block_layers[0])
+        self.layer2 = self._make_layers(
+            BasicBlock, planes, planes * 2, block_layers[1], stride=2)
+        self.layer3 = self._make_layers(
+            BasicBlock, planes * 2, planes * 4, block_layers[2], stride=2)
+        self.layer4 = self._make_layers(
+            BasicBlock, planes * 4, planes * 8, block_layers[3], stride=2)
+
+        self.compression3 = layers.ConvBN(
+            planes * 4, highres_planes, kernel_size=1, bias_attr=False)
+
+        self.compression4 = layers.ConvBN(
+            planes * 8, highres_planes, kernel_size=1, bias_attr=False)
+
+        self.down3 = layers.ConvBN(
+            highres_planes,
+            planes * 4,
+            kernel_size=3,
+            stride=2,
+            bias_attr=False)
+
+        self.down4 = nn.Sequential(
+            layers.ConvBNReLU(
+                highres_planes,
+                planes * 4,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias_attr=False),
+            layers.ConvBN(
+                planes * 4,
+                planes * 8,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias_attr=False))
+
+        self.layer3_ = self._make_layers(BasicBlock, planes * 2, highres_planes,
+                                         2)
+        self.layer4_ = self._make_layers(BasicBlock, highres_planes,
+                                         highres_planes, 2)
+        self.layer5_ = self._make_layers(Bottleneck, highres_planes,
+                                         highres_planes, 1)
+        self.layer5 = self._make_layers(
+            Bottleneck, planes * 8, planes * 8, 1, stride=2)
+
+        self.spp = DAPPM(planes * 16, spp_planes, planes * 4)
+        if self.enable_auxiliary_loss:
+            self.aux_head = DDRNetHead(highres_planes, head_planes, num_classes)
+        self.head = DDRNetHead(planes * 4, head_planes, num_classes)
+
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+        else:
+            for m in self.sublayers():
+                if isinstance(m, nn.Conv2D):
+                    param_init.kaiming_normal_init(m.weight)
+                elif isinstance(m, nn.BatchNorm2D):
+                    param_init.constant_init(m.weight, value=1)
+                    param_init.constant_init(m.bias, value=0)
+
+    def _make_layers(self, block, inplanes, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2D(
+                    inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias_attr=False),
+                nn.BatchNorm2D(planes * block.expansion), )
+        layers = []
+        layers.append(block(inplanes, planes, stride, downsample))
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            if i == (blocks - 1):
+                layers.append(block(inplanes, planes, stride=1, no_relu=True))
+            else:
+                layers.append(block(inplanes, planes, stride=1, no_relu=False))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        n, c, h, w = paddle.shape(x)
+        width_output = w // 8
+        height_output = h // 8
+
+        x = self.conv1(x)
+        stage1_out = self.layer1(x)
+        stage2_out = self.layer2(self.relu(stage1_out))
+        stage3_out = self.layer3(self.relu(stage2_out))
+        stage3_out_dual = self.layer3_(self.relu(stage2_out))
+        x = stage3_out + self.down3(self.relu(stage3_out_dual))
+        stage3_merge = stage3_out_dual + F.interpolate(
+            self.compression3(self.relu(stage3_out)),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        stage4_out = self.layer4(self.relu(x))
+        stage4_out_dual = self.layer4_(self.relu(stage3_merge))
+
+        x = stage4_out + self.down4(self.relu(stage4_out_dual))
+        stage4_merge = stage4_out_dual + F.interpolate(
+            self.compression4(self.relu(stage4_out)),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        stage5_out_dual = self.layer5_(self.relu(stage4_merge))
+        x = F.interpolate(
+            self.spp(self.layer5(self.relu(x))),
+            size=[height_output, width_output],
+            mode='bilinear')
+
+        output = self.head(x + stage5_out_dual)
+        logit_list = []
+        logit_list.append(output)
+
+        if self.enable_auxiliary_loss:
+            aux_out = self.aux_head(stage3_merge)
+            logit_list.append(aux_out)
+        return [
+            F.interpolate(
+                logit, [h, w], mode='bilinear') for logit in logit_list
+        ]
+
+
+class BasicBlock(nn.Layer):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 no_relu=False):
+        super().__init__()
+        self.conv_bn_relu = layers.ConvBNReLU(
+            inplanes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias_attr=False)
+        self.relu = nn.ReLU()
+        self.conv_bn = layers.ConvBN(
+            planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
+        self.downsample = downsample
+        self.stride = stride
+        self.no_relu = no_relu
+
+    def forward(self, x):
+        residual = x
+        out = self.conv_bn_relu(x)
+        out = self.conv_bn(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        if self.no_relu:
+            return out
+        else:
+            return self.relu(out)
+
+
+class Bottleneck(nn.Layer):
+    expansion = 2
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 no_relu=True):
+        super().__init__()
+        self.conv_bn_relu1 = layers.ConvBNReLU(
+            inplanes, planes, kernel_size=1, bias_attr=False)
+        self.conv_bn_relu2 = layers.ConvBNReLU(
+            planes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias_attr=False)
+        self.conv_bn = layers.ConvBN(
+            planes, planes * self.expansion, kernel_size=1, bias_attr=False)
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+        self.no_relu = no_relu
+
+    def forward(self, x):
+        residual = x
+        out = self.conv_bn_relu1(x)
+        out = self.conv_bn_relu2(out)
+        out = self.conv_bn(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        if self.no_relu:
+            return out
+        else:
+            return self.relu(out)
+
+
+class DAPPM(nn.Layer):
+    def __init__(self, inplanes, branch_planes, outplanes):
+        super().__init__()
+        self.scale1 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=5, stride=2, padding=2),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale2 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=9, stride=4, padding=4),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale3 = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=17, stride=8, padding=8),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale4 = nn.Sequential(
+            nn.AdaptiveAvgPool2D((1, 1)),
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.scale0 = nn.Sequential(
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, branch_planes, kernel_size=1, bias_attr=False), )
+        self.process1 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process2 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process3 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.process4 = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes,
+                branch_planes,
+                kernel_size=3,
+                padding=1,
+                bias_attr=False), )
+        self.compression = nn.Sequential(
+            layers.SyncBatchNorm(branch_planes * 5),
+            nn.ReLU(),
+            nn.Conv2D(
+                branch_planes * 5, outplanes, kernel_size=1, bias_attr=False))
+        self.shortcut = nn.Sequential(
+            layers.SyncBatchNorm(inplanes),
+            nn.ReLU(),
+            nn.Conv2D(
+                inplanes, outplanes, kernel_size=1, bias_attr=False))
+
+    def forward(self, x):
+        n, c, h, w = paddle.shape(x)
+        x0 = self.scale0(x)
+        x1 = self.process1(
+            F.interpolate(
+                self.scale1(x), size=[h, w], mode='bilinear') + x0)
+        x2 = self.process2(
+            F.interpolate(
+                self.scale2(x), size=[h, w], mode='bilinear') + x1)
+        x3 = self.process3(
+            F.interpolate(
+                self.scale3(x), size=[h, w], mode='bilinear') + x2)
+        x4 = self.process4(
+            F.interpolate(
+                self.scale4(x), size=[h, w], mode='bilinear') + x3)
+
+        out = self.compression(paddle.concat([x0, x1, x2, x3, x4],
+                                             1)) + self.shortcut(x)
+        return out
+
+
+class DDRNetHead(nn.Layer):
+    def __init__(self, inplanes, interplanes, outplanes, scale_factor=None):
+        super().__init__()
+        self.bn1 = nn.BatchNorm2D(inplanes)
+        self.relu = nn.ReLU()
+        self.conv_bn_relu = layers.ConvBNReLU(
+            inplanes, interplanes, kernel_size=3, padding=1, bias_attr=False)
+        self.conv = nn.Conv2D(
+            interplanes, outplanes, kernel_size=1, padding=0, bias_attr=True)
+
+        self.scale_factor = scale_factor
+
+    def forward(self, x):
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv_bn_relu(x)
+        out = self.conv(x)
+
+        if self.scale_factor is not None:
+            out = F.interpolate(
+                out, scale_factor=self.scale_factor, mode='bilinear')
+        return out
+
+
+@manager.MODELS.add_component
+def DDRNet_23(**kwargs):
+    return DualResNet(
+        block_layers=[2, 2, 2, 2],
+        planes=64,
+        spp_planes=128,
+        head_planes=128,
+        **kwargs)

+ 5 - 5
paddlers/models/ppseg/models/decoupled_segnet.py

@@ -18,11 +18,11 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.models.backbones import resnet_vd
-from paddlers.models.ppseg.models import deeplab
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.models.backbones import resnet_vd
+from paddleseg.models import deeplab
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 3 - 3
paddlers/models/ppseg/models/deeplab.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 __all__ = ['DeepLabV3P', 'DeepLabV3']
 __all__ = ['DeepLabV3P', 'DeepLabV3']
 
 

+ 3 - 3
paddlers/models/ppseg/models/dmnet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 3 - 3
paddlers/models/ppseg/models/dnlnet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 6 - 4
paddlers/models/ppseg/models/emanet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component
@@ -209,7 +209,9 @@ class EMAU(nn.Layer):
             mu = F.normalize(mu, axis=1, p=2)
             mu = F.normalize(mu, axis=1, p=2)
             mu = self.mu * (1 - self.momentum) + mu * self.momentum
             mu = self.mu * (1 - self.momentum) + mu * self.momentum
             if paddle.distributed.get_world_size() > 1:
             if paddle.distributed.get_world_size() > 1:
-                mu = paddle.distributed.all_reduce(mu)
+                out = paddle.distributed.all_reduce(mu)
+                if out is not None:
+                    mu = out
                 mu /= paddle.distributed.get_world_size()
                 mu /= paddle.distributed.get_world_size()
             self.mu = mu
             self.mu = mu
 
 

+ 3 - 3
paddlers/models/ppseg/models/encnet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 6 - 4
paddlers/models/ppseg/models/enet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg import utils
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.cvlibs import manager, param_init
+from paddleseg import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager, param_init
 
 
 __all__ = ['ENet']
 __all__ = ['ENet']
 
 
@@ -34,6 +34,7 @@ class ENet(nn.Layer):
 
 
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
         pretrained (str, optional): The path or url of pretrained model. Default: None.
         encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
         encoder_relu (bool, optional): When ``True`` ReLU is used as the activation
             function; otherwise, PReLU is used. Default: False.
             function; otherwise, PReLU is used. Default: False.
@@ -43,13 +44,14 @@ class ENet(nn.Layer):
 
 
     def __init__(self,
     def __init__(self,
                  num_classes,
                  num_classes,
+                 in_channels=3,
                  pretrained=None,
                  pretrained=None,
                  encoder_relu=False,
                  encoder_relu=False,
                  decoder_relu=True):
                  decoder_relu=True):
         super(ENet, self).__init__()
         super(ENet, self).__init__()
 
 
         self.numclasses = num_classes
         self.numclasses = num_classes
-        self.initial_block = InitialBlock(3, 16, relu=encoder_relu)
+        self.initial_block = InitialBlock(in_channels, 16, relu=encoder_relu)
 
 
         self.downsample1_0 = DownsamplingBottleneck(
         self.downsample1_0 = DownsamplingBottleneck(
             16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)
             16, 64, return_indices=True, dropout_prob=0.01, relu=encoder_relu)

+ 3 - 3
paddlers/models/ppseg/models/espnet.py

@@ -18,9 +18,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg import utils
-from paddlers.models.ppseg.cvlibs import manager, param_init
-from paddlers.models.ppseg.models import layers
+from paddleseg import utils
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models import layers
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 3 - 3
paddlers/models/ppseg/models/espnetv1.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 16 - 6
paddlers/models/ppseg/models/fast_scnn.py

@@ -16,9 +16,9 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
 import paddle
 import paddle
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 __all__ = ['FastSCNN']
 __all__ = ['FastSCNN']
 
 
@@ -34,6 +34,7 @@ class FastSCNN(nn.Layer):
     (https://arxiv.org/pdf/1902.04502.pdf).
     (https://arxiv.org/pdf/1902.04502.pdf).
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
         enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
         enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
             If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
             If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
         align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
@@ -43,13 +44,15 @@ class FastSCNN(nn.Layer):
 
 
     def __init__(self,
     def __init__(self,
                  num_classes,
                  num_classes,
+                 in_channels=3,
                  enable_auxiliary_loss=True,
                  enable_auxiliary_loss=True,
                  align_corners=False,
                  align_corners=False,
                  pretrained=None):
                  pretrained=None):
 
 
         super().__init__()
         super().__init__()
 
 
-        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
+        self.learning_to_downsample = LearningToDownsample(in_channels, 32, 48,
+                                                           64)
         self.global_feature_extractor = GlobalFeatureExtractor(
         self.global_feature_extractor = GlobalFeatureExtractor(
             in_channels=64,
             in_channels=64,
             block_channels=[64, 96, 128],
             block_channels=[64, 96, 128],
@@ -108,11 +111,18 @@ class LearningToDownsample(nn.Layer):
         out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
         out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64.
     """
     """
 
 
-    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
+    def __init__(self,
+                 in_channels=3,
+                 dw_channels1=32,
+                 dw_channels2=48,
+                 out_channels=64):
         super(LearningToDownsample, self).__init__()
         super(LearningToDownsample, self).__init__()
 
 
         self.conv_bn_relu = layers.ConvBNReLU(
         self.conv_bn_relu = layers.ConvBNReLU(
-            in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
+            in_channels=in_channels,
+            out_channels=dw_channels1,
+            kernel_size=3,
+            stride=2)
         self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
         self.dsconv_bn_relu1 = layers.SeparableConvBNReLU(
             in_channels=dw_channels1,
             in_channels=dw_channels1,
             out_channels=dw_channels2,
             out_channels=dw_channels2,

+ 3 - 3
paddlers/models/ppseg/models/fastfcn.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 3 - 3
paddlers/models/ppseg/models/fcn.py

@@ -16,9 +16,9 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
 import paddle
 import paddle
-from paddlers.models.ppseg import utils
-from paddlers.models.ppseg.cvlibs import manager, param_init
-from paddlers.models.ppseg.models import layers
+from paddleseg import utils
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models import layers
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 3 - 3
paddlers/models/ppseg/models/gcnet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 4 - 4
paddlers/models/ppseg/models/ginet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 from paddle.nn import functional as F
 from paddle.nn import functional as F
 
 
-from paddlers.models.ppseg.utils import utils
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.utils import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component
@@ -92,7 +92,7 @@ class GINet(nn.Layer):
 
 
         return [
         return [
             F.interpolate(
             F.interpolate(
-                logit, (h, w),
+                logit, [h, w],
                 mode='bilinear',
                 mode='bilinear',
                 align_corners=self.align_corners) for logit in logit_list
                 align_corners=self.align_corners) for logit in logit_list
         ]
         ]

+ 198 - 0
paddlers/models/ppseg/models/glore.py

@@ -0,0 +1,198 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
+
+
+@manager.MODELS.add_component
+class GloRe(nn.Layer):
+    """
+    The GloRe implementation based on PaddlePaddle.
+
+    The original article refers to:
+       Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks"
+       (https://arxiv.org/pdf/1811.12814.pdf)
+    
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101.
+        backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone.
+        gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512.
+        gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128.
+        gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128.
+        enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=(2, 3),
+                 gru_channels=512,
+                 gru_num_state=128,
+                 gru_num_node=64,
+                 enable_auxiliary_loss=True,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+
+        self.backbone = backbone
+        backbone_channels = [
+            backbone.feat_channels[i] for i in backbone_indices
+        ]
+
+        self.head = GloReHead(num_classes, backbone_indices, backbone_channels,
+                              gru_channels, gru_num_state, gru_num_node,
+                              enable_auxiliary_loss)
+        self.align_corners = align_corners
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        feat_list = self.backbone(x)
+        logit_list = self.head(feat_list)
+        return [
+            F.interpolate(
+                logit,
+                paddle.shape(x)[2:],
+                mode='bilinear',
+                align_corners=self.align_corners) for logit in logit_list
+        ]
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class GloReHead(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 backbone_indices,
+                 backbone_channels,
+                 gru_channels=512,
+                 gru_num_state=128,
+                 gru_num_node=64,
+                 enable_auxiliary_loss=True):
+        super().__init__()
+
+        in_channels = backbone_channels[1]
+        self.conv_bn_relu = layers.ConvBNReLU(
+            in_channels, gru_channels, 1, bias_attr=False)
+        self.gru_module = GruModule(
+            num_input=gru_channels,
+            num_state=gru_num_state,
+            num_node=gru_num_node)
+
+        self.dropout = nn.Dropout(0.1)
+        self.classifier = nn.Conv2D(512, num_classes, kernel_size=1)
+        self.auxlayer = layers.AuxLayer(
+            in_channels=backbone_channels[0],
+            inter_channels=backbone_channels[0] // 4,
+            out_channels=num_classes)
+
+        self.backbone_indices = backbone_indices
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+    def forward(self, feat_list):
+
+        logit_list = []
+        x = feat_list[self.backbone_indices[1]]
+
+        feature = self.conv_bn_relu(x)
+        gru_output = self.gru_module(feature)
+        output = self.dropout(gru_output)
+        logit = self.classifier(output)
+        logit_list.append(logit)
+
+        if self.enable_auxiliary_loss:
+            low_level_feat = feat_list[self.backbone_indices[0]]
+            auxiliary_logit = self.auxlayer(low_level_feat)
+            logit_list.append(auxiliary_logit)
+
+        return logit_list
+
+
+class GCN(nn.Layer):
+    def __init__(self, num_state, num_node, bias=False):
+        super(GCN, self).__init__()
+        self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1)
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv1D(
+            num_state, num_state, kernel_size=1, bias_attr=bias)
+
+    def forward(self, x):
+        h = self.conv1(paddle.transpose(x, perm=(0, 2, 1)))
+        h = paddle.transpose(h, perm=(0, 2, 1))
+        h = h + x
+        h = self.relu(self.conv2(h))
+        return h
+
+
+class GruModule(nn.Layer):
+    def __init__(self,
+                 num_input=512,
+                 num_state=128,
+                 num_node=64,
+                 normalize=False):
+        super(GruModule, self).__init__()
+        self.normalize = normalize
+        self.num_state = num_state
+        self.num_node = num_node
+        self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1)
+        self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1)
+        self.gcn = GCN(num_state=self.num_state, num_node=self.num_node)
+        self.extend_dim = nn.Conv2D(
+            self.num_state, num_input, kernel_size=1, bias_attr=False)
+        self.extend_bn = layers.SyncBatchNorm(num_input, epsilon=1e-4)
+
+    def forward(self, input):
+        n, c, h, w = input.shape
+        # B, C, H, W
+        reduction_dim = self.reduction_dim(input)
+        # B, N, H, W
+        mat_B = self.projection_mat(input)
+        # B, C, H*W
+        reshaped_reduction = paddle.reshape(
+            reduction_dim, shape=[n, self.num_state, h * w])
+        # B, N, H*W
+        reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w])
+        # B, N, H*W
+        reproject = reshaped_B
+        # B, C, N
+        node_state_V = paddle.matmul(
+            reshaped_reduction, paddle.transpose(
+                reshaped_B, perm=[0, 2, 1]))
+
+        if self.normalize:
+            node_state_V = node_state_V * (1. / reshaped_reduction.shape[2])
+
+        # B, C, N
+        gcn_out = self.gcn(node_state_V)
+        # B, C, H*W
+        Y = paddle.matmul(gcn_out, reproject)
+        # B, C, H, W
+        Y = paddle.reshape(Y, shape=[n, self.num_state, h, w])
+        Y_extend = self.extend_dim(Y)
+        Y_extend = self.extend_bn(Y_extend)
+
+        out = input + Y_extend
+        return out

+ 5 - 5
paddlers/models/ppseg/models/gscnn.py

@@ -18,11 +18,11 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.models.backbones import resnet_vd
-from paddlers.models.ppseg.models import deeplab
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.models.backbones import resnet_vd
+from paddleseg.models import deeplab
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 6 - 4
paddlers/models/ppseg/models/hardnet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component
@@ -31,6 +31,7 @@ class HarDNet(nn.Layer):
 
 
     Args:
     Args:
         num_classes (int): The unique number of target classes.
         num_classes (int): The unique number of target classes.
+        in_channels (int, optional): The channels of input image. Default: 3.
         stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
         stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48).
         ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
         ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320).
         grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
         grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7.
@@ -43,6 +44,7 @@ class HarDNet(nn.Layer):
 
 
     def __init__(self,
     def __init__(self,
                  num_classes,
                  num_classes,
+                 in_channels=3,
                  stem_channels=(16, 24, 32, 48),
                  stem_channels=(16, 24, 32, 48),
                  ch_list=(64, 96, 160, 224, 320),
                  ch_list=(64, 96, 160, 224, 320),
                  grmul=1.7,
                  grmul=1.7,
@@ -60,7 +62,7 @@ class HarDNet(nn.Layer):
 
 
         self.stem = nn.Sequential(
         self.stem = nn.Sequential(
             layers.ConvBNReLU(
             layers.ConvBNReLU(
-                3, stem_channels[0], kernel_size=3, bias_attr=False),
+                in_channels, stem_channels[0], kernel_size=3, bias_attr=False),
             layers.ConvBNReLU(
             layers.ConvBNReLU(
                 stem_channels[0],
                 stem_channels[0],
                 stem_channels[1],
                 stem_channels[1],

+ 3 - 3
paddlers/models/ppseg/models/hrnet_contrast.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.utils import utils
+from paddleseg.cvlibs import manager
+from paddleseg.models import layers
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 3 - 3
paddlers/models/ppseg/models/isanet.py

@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
 
 
 
 
 @manager.MODELS.add_component
 @manager.MODELS.add_component

+ 2 - 1
paddlers/models/ppseg/models/layers/__init__.py

@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
 
 
-from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU
+from .layer_libs import ConvBNReLU, ConvBN, SeparableConvBNReLU, DepthwiseConvBN, AuxLayer, SyncBatchNorm, JPU, ConvBNPReLU, ConvBNAct, ConvBNLeakyReLU
 from .activation import Activation
 from .activation import Activation
 from .pyramid_pool import ASPPModule, PPModule
 from .pyramid_pool import ASPPModule, PPModule
 from .attention import AttentionBlock
 from .attention import AttentionBlock
 from .nonlocal2d import NonLocal2D
 from .nonlocal2d import NonLocal2D
 from .wrap_functions import *
 from .wrap_functions import *
+from .tensor_fusion import UAFM_SpAtten, UAFM_SpAtten_S, UAFM_ChAtten, UAFM_ChAtten_S, UAFM, UAFMMobile, UAFMMobile_SpAtten

+ 1 - 1
paddlers/models/ppseg/models/layers/activation.py

@@ -33,7 +33,7 @@ class Activation(nn.Layer):
 
 
     Examples:
     Examples:
 
 
-        from paddlers.models.ppseg.models.common.activation import Activation
+        from paddleseg.models.common.activation import Activation
 
 
         relu = Activation("relu")
         relu = Activation("relu")
         print(relu)
         print(relu)

+ 127 - 1
paddlers/models/ppseg/models/layers/attention.py

@@ -16,7 +16,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.models import layers
+from paddleseg.models import layers
 
 
 
 
 class AttentionBlock(nn.Layer):
 class AttentionBlock(nn.Layer):
@@ -144,3 +144,129 @@ class AttentionBlock(nn.Layer):
         if self.out_project is not None:
         if self.out_project is not None:
             context = self.out_project(context)
             context = self.out_project(context)
         return context
         return context
+
+
+class DualAttentionModule(nn.Layer):
+    """
+    Dual attention module.
+
+    Args:
+        in_channels (int): The number of input channels.
+        out_channels (int): The number of output channels.
+    """
+
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        inter_channels = in_channels // 4
+
+        self.channel_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
+        self.position_conv = layers.ConvBNReLU(in_channels, inter_channels, 1)
+        self.pam = PAM(inter_channels)
+        self.cam = CAM(inter_channels)
+        self.conv1 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
+        self.conv2 = layers.ConvBNReLU(inter_channels, inter_channels, 3)
+        self.conv3 = layers.ConvBNReLU(inter_channels, out_channels, 3)
+
+    def forward(self, feats):
+        channel_feats = self.channel_conv(feats)
+        channel_feats = self.cam(channel_feats)
+        channel_feats = self.conv1(channel_feats)
+
+        position_feats = self.position_conv(feats)
+        position_feats = self.pam(position_feats)
+        position_feats = self.conv2(position_feats)
+
+        feats_sum = position_feats + channel_feats
+        out = self.conv3(feats_sum)
+        return out
+
+
+class PAM(nn.Layer):
+    """
+    Position attention module.
+    Args:
+        in_channels (int): The number of input channels.
+    """
+
+    def __init__(self, in_channels):
+        super().__init__()
+        mid_channels = in_channels // 8
+        self.mid_channels = mid_channels
+        self.in_channels = in_channels
+
+        self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
+        self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1)
+        self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1)
+
+        self.gamma = self.create_parameter(
+            shape=[1],
+            dtype='float32',
+            default_initializer=nn.initializer.Constant(0))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+
+        # query: n, h * w, c1
+        query = self.query_conv(x)
+        query = paddle.reshape(query, (0, self.mid_channels, -1))
+        query = paddle.transpose(query, (0, 2, 1))
+
+        # key: n, c1, h * w
+        key = self.key_conv(x)
+        key = paddle.reshape(key, (0, self.mid_channels, -1))
+
+        # sim: n, h * w, h * w
+        sim = paddle.bmm(query, key)
+        sim = F.softmax(sim, axis=-1)
+
+        value = self.value_conv(x)
+        value = paddle.reshape(value, (0, self.in_channels, -1))
+        sim = paddle.transpose(sim, (0, 2, 1))
+
+        # feat: from (n, c2, h * w) -> (n, c2, h, w)
+        feat = paddle.bmm(value, sim)
+        feat = paddle.reshape(feat,
+                              (0, self.in_channels, x_shape[2], x_shape[3]))
+
+        out = self.gamma * feat + x
+        return out
+
+
+class CAM(nn.Layer):
+    """
+    Channel attention module.
+    Args:
+        in_channels (int): The number of input channels.
+    """
+
+    def __init__(self, channels):
+        super().__init__()
+
+        self.channels = channels
+        self.gamma = self.create_parameter(
+            shape=[1],
+            dtype='float32',
+            default_initializer=nn.initializer.Constant(0))
+
+    def forward(self, x):
+        x_shape = paddle.shape(x)
+        # query: n, c, h * w
+        query = paddle.reshape(x, (0, self.channels, -1))
+        # key: n, h * w, c
+        key = paddle.reshape(x, (0, self.channels, -1))
+        key = paddle.transpose(key, (0, 2, 1))
+
+        # sim: n, c, c
+        sim = paddle.bmm(query, key)
+        # The danet author claims that this can avoid gradient divergence
+        sim = paddle.max(sim, axis=-1, keepdim=True).tile(
+            [1, 1, self.channels]) - sim
+        sim = F.softmax(sim, axis=-1)
+
+        # feat: from (n, c, h * w) to (n, c, h, w)
+        value = paddle.reshape(x, (0, self.channels, -1))
+        feat = paddle.bmm(sim, value)
+        feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3]))
+
+        out = self.gamma * feat + x
+        return out

+ 58 - 1
paddlers/models/ppseg/models/layers/layer_libs.py

@@ -17,7 +17,7 @@ import os
 import paddle
 import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
-from paddlers.models.ppseg.models import layers
+from paddleseg.models import layers
 
 
 
 
 def SyncBatchNorm(*args, **kwargs):
 def SyncBatchNorm(*args, **kwargs):
@@ -56,6 +56,37 @@ class ConvBNReLU(nn.Layer):
         return x
         return x
 
 
 
 
+class ConvBNAct(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 padding='same',
+                 act_type=None,
+                 **kwargs):
+        super().__init__()
+
+        self._conv = nn.Conv2D(
+            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
+
+        if 'data_format' in kwargs:
+            data_format = kwargs['data_format']
+        else:
+            data_format = 'NCHW'
+        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
+
+        self._act_type = act_type
+        if act_type is not None:
+            self._act = layers.Activation(act_type)
+
+    def forward(self, x):
+        x = self._conv(x)
+        x = self._batch_norm(x)
+        if self._act_type is not None:
+            x = self._act(x)
+        return x
+
+
 class ConvBN(nn.Layer):
 class ConvBN(nn.Layer):
     def __init__(self,
     def __init__(self,
                  in_channels,
                  in_channels,
@@ -293,3 +324,29 @@ class ConvBNPReLU(nn.Layer):
         x = self._batch_norm(x)
         x = self._batch_norm(x)
         x = self._prelu(x)
         x = self._prelu(x)
         return x
         return x
+
+
+class ConvBNLeakyReLU(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 padding='same',
+                 **kwargs):
+        super().__init__()
+
+        self._conv = nn.Conv2D(
+            in_channels, out_channels, kernel_size, padding=padding, **kwargs)
+
+        if 'data_format' in kwargs:
+            data_format = kwargs['data_format']
+        else:
+            data_format = 'NCHW'
+        self._batch_norm = SyncBatchNorm(out_channels, data_format=data_format)
+        self._relu = layers.Activation("leakyrelu")
+
+    def forward(self, x):
+        x = self._conv(x)
+        x = self._batch_norm(x)
+        x = self._relu(x)
+        return x

+ 1 - 1
paddlers/models/ppseg/models/layers/nonlocal2d.py

@@ -16,7 +16,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.models import layers
+from paddleseg.models import layers
 
 
 
 
 class NonLocal2D(nn.Layer):
 class NonLocal2D(nn.Layer):

+ 1 - 1
paddlers/models/ppseg/models/layers/pyramid_pool.py

@@ -16,7 +16,7 @@ import paddle
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 from paddle import nn
 from paddle import nn
 
 
-from paddlers.models.ppseg.models import layers
+from paddleseg.models import layers
 
 
 
 
 class ASPPModule(nn.Layer):
 class ASPPModule(nn.Layer):

+ 285 - 0
paddlers/models/ppseg/models/layers/tensor_fusion.py

@@ -0,0 +1,285 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Constant
+from paddleseg.models import layers
+from paddleseg.models.layers import tensor_fusion_helper as helper
+
+
+class UAFM(nn.Layer):
+    """
+    The base of Unified Attention Fusion Module.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__()
+
+        self.conv_x = layers.ConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.ConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+        self.resize_mode = resize_mode
+
+    def check(self, x, y):
+        assert x.ndim == 4 and y.ndim == 4
+        x_h, x_w = x.shape[2:]
+        y_h, y_w = y.shape[2:]
+        assert x_h >= y_h and x_w >= y_w
+
+    def prepare(self, x, y):
+        x = self.prepare_x(x, y)
+        y = self.prepare_y(x, y)
+        return x, y
+
+    def prepare_x(self, x, y):
+        x = self.conv_x(x)
+        return x
+
+    def prepare_y(self, x, y):
+        y_up = F.interpolate(y, paddle.shape(x)[2:], mode=self.resize_mode)
+        return y_up
+
+    def fuse(self, x, y):
+        out = x + y
+        out = self.conv_out(out)
+        return out
+
+    def forward(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        self.check(x, y)
+        x, y = self.prepare(x, y)
+        out = self.fuse(x, y)
+        return out
+
+
+class UAFM_ChAtten(UAFM):
+    """
+    The UAFM with channel attention, which uses mean and max values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNAct(
+                4 * y_ch,
+                y_ch // 2,
+                kernel_size=1,
+                bias_attr=False,
+                act_type="leakyrelu"),
+            layers.ConvBN(
+                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_hw([x, y], self.training)
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_ChAtten_S(UAFM):
+    """
+    The UAFM with channel attention, which uses mean values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNAct(
+                2 * y_ch,
+                y_ch // 2,
+                kernel_size=1,
+                bias_attr=False,
+                act_type="leakyrelu"),
+            layers.ConvBN(
+                y_ch // 2, y_ch, kernel_size=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_reduce_hw([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_SpAtten(UAFM):
+    """
+    The UAFM with spatial attention, which uses mean and max values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                4, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+        self._scale = self.create_parameter(
+            shape=[1],
+            attr=ParamAttr(initializer=Constant(value=1.)),
+            dtype="float32")
+        self._scale.stop_gradient = True
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (self._scale - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFM_SpAtten_S(UAFM):
+    """
+    The UAFM with spatial attention, which uses mean values.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                2, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out
+
+
+class UAFMMobile(UAFM):
+    """
+    Unified Attention Fusion Module for mobile.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_x = layers.SeparableConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.SeparableConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+
+
+class UAFMMobile_SpAtten(UAFM):
+    """
+    Unified Attention Fusion Module with spatial attention for mobile.
+    Args:
+        x_ch (int): The channel of x tensor, which is the low level feature.
+        y_ch (int): The channel of y tensor, which is the high level feature.
+        out_ch (int): The channel of output tensor.
+        ksize (int, optional): The kernel size of the conv for x tensor. Default: 3.
+        resize_mode (str, optional): The resize model in unsampling y tensor. Default: bilinear.
+    """
+
+    def __init__(self, x_ch, y_ch, out_ch, ksize=3, resize_mode='bilinear'):
+        super().__init__(x_ch, y_ch, out_ch, ksize, resize_mode)
+
+        self.conv_x = layers.SeparableConvBNReLU(
+            x_ch, y_ch, kernel_size=ksize, padding=ksize // 2, bias_attr=False)
+        self.conv_out = layers.SeparableConvBNReLU(
+            y_ch, out_ch, kernel_size=3, padding=1, bias_attr=False)
+
+        self.conv_xy_atten = nn.Sequential(
+            layers.ConvBNReLU(
+                4, 2, kernel_size=3, padding=1, bias_attr=False),
+            layers.ConvBN(
+                2, 1, kernel_size=3, padding=1, bias_attr=False))
+
+    def fuse(self, x, y):
+        """
+        Args:
+            x (Tensor): The low level feature.
+            y (Tensor): The high level feature.
+        """
+        atten = helper.avg_max_reduce_channel([x, y])
+        atten = F.sigmoid(self.conv_xy_atten(atten))
+
+        out = x * atten + y * (1 - atten)
+        out = self.conv_out(out)
+        return out

+ 133 - 0
paddlers/models/ppseg/models/layers/tensor_fusion_helper.py

@@ -0,0 +1,133 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+def avg_reduce_hw(x):
+    # Reduce hw by avg
+    # Return cat([avg_pool_0, avg_pool_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return F.adaptive_avg_pool2d(x, 1)
+    elif len(x) == 1:
+        return F.adaptive_avg_pool2d(x[0], 1)
+    else:
+        res = []
+        for xi in x:
+            res.append(F.adaptive_avg_pool2d(xi, 1))
+        return paddle.concat(res, axis=1)
+
+
+def avg_max_reduce_hw_helper(x, is_training, use_concat=True):
+    assert not isinstance(x, (list, tuple))
+    avg_pool = F.adaptive_avg_pool2d(x, 1)
+    # TODO(pjc): when axis=[2, 3], the paddle.max api has bug for training.
+    if is_training:
+        max_pool = F.adaptive_max_pool2d(x, 1)
+    else:
+        max_pool = paddle.max(x, axis=[2, 3], keepdim=True)
+
+    if use_concat:
+        res = paddle.concat([avg_pool, max_pool], axis=1)
+    else:
+        res = [avg_pool, max_pool]
+    return res
+
+
+def avg_max_reduce_hw(x, is_training):
+    # Reduce hw by avg and max
+    # Return cat([avg_pool_0, avg_pool_1, ..., max_pool_0, max_pool_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return avg_max_reduce_hw_helper(x, is_training)
+    elif len(x) == 1:
+        return avg_max_reduce_hw_helper(x[0], is_training)
+    else:
+        res_avg = []
+        res_max = []
+        for xi in x:
+            avg, max = avg_max_reduce_hw_helper(xi, is_training, False)
+            res_avg.append(avg)
+            res_max.append(max)
+        res = res_avg + res_max
+        return paddle.concat(res, axis=1)
+
+
+def avg_reduce_channel(x):
+    # Reduce channel by avg
+    # Return cat([avg_ch_0, avg_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return paddle.mean(x, axis=1, keepdim=True)
+    elif len(x) == 1:
+        return paddle.mean(x[0], axis=1, keepdim=True)
+    else:
+        res = []
+        for xi in x:
+            res.append(paddle.mean(xi, axis=1, keepdim=True))
+        return paddle.concat(res, axis=1)
+
+
+def max_reduce_channel(x):
+    # Reduce channel by max
+    # Return cat([max_ch_0, max_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return paddle.max(x, axis=1, keepdim=True)
+    elif len(x) == 1:
+        return paddle.max(x[0], axis=1, keepdim=True)
+    else:
+        res = []
+        for xi in x:
+            res.append(paddle.max(xi, axis=1, keepdim=True))
+        return paddle.concat(res, axis=1)
+
+
+def avg_max_reduce_channel_helper(x, use_concat=True):
+    # Reduce hw by avg and max, only support single input
+    assert not isinstance(x, (list, tuple))
+    mean_value = paddle.mean(x, axis=1, keepdim=True)
+    max_value = paddle.max(x, axis=1, keepdim=True)
+
+    if use_concat:
+        res = paddle.concat([mean_value, max_value], axis=1)
+    else:
+        res = [mean_value, max_value]
+    return res
+
+
+def avg_max_reduce_channel(x):
+    # Reduce hw by avg and max
+    # Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
+    if not isinstance(x, (list, tuple)):
+        return avg_max_reduce_channel_helper(x)
+    elif len(x) == 1:
+        return avg_max_reduce_channel_helper(x[0])
+    else:
+        res = []
+        for xi in x:
+            res.extend(avg_max_reduce_channel_helper(xi, False))
+        return paddle.concat(res, axis=1)
+
+
+def cat_avg_max_reduce_channel(x):
+    # Reduce hw by cat+avg+max
+    assert isinstance(x, (list, tuple)) and len(x) > 1
+
+    x = paddle.concat(x, axis=1)
+
+    mean_value = paddle.mean(x, axis=1, keepdim=True)
+    max_value = paddle.max(x, axis=1, keepdim=True)
+    res = paddle.concat([mean_value, max_value], axis=1)
+
+    return res

+ 2 - 2
paddlers/models/ppseg/models/losses/binary_cross_entropy_loss.py

@@ -16,7 +16,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
@@ -99,7 +99,7 @@ class BCELoss(nn.Layer):
                     raise ValueError(
                     raise ValueError(
                         "if type of `weight` is str, it should equal to 'dynamic', but it is {}"
                         "if type of `weight` is str, it should equal to 'dynamic', but it is {}"
                         .format(self.weight))
                         .format(self.weight))
-            elif isinstance(self.weight, paddle.VarBase):
+            elif not isinstance(self.weight, paddle.Tensor):
                 raise TypeError(
                 raise TypeError(
                     'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
                     'The type of `weight` is wrong, it should be Tensor or str, but it is {}'
                     .format(type(self.weight)))
                     .format(type(self.weight)))

+ 1 - 1
paddlers/models/ppseg/models/losses/bootstrapped_cross_entropy.py

@@ -16,7 +16,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 2 - 4
paddlers/models/ppseg/models/losses/cross_entropy_loss.py

@@ -16,7 +16,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
@@ -78,8 +78,6 @@ class CrossEntropyLoss(nn.Layer):
             logit = paddle.transpose(logit, [0, 2, 3, 1])
             logit = paddle.transpose(logit, [0, 2, 3, 1])
         label = label.astype('int64')
         label = label.astype('int64')
 
 
-        # In F.cross_entropy, the ignore_index is invalid, which needs to be fixed.
-        # When there is 255 in the label and paddle version <= 2.1.3, the cross_entropy OP will report an error, which is fixed in paddle develop version.
         loss = F.cross_entropy(
         loss = F.cross_entropy(
             logit,
             logit,
             label,
             label,
@@ -121,7 +119,7 @@ class CrossEntropyLoss(nn.Layer):
             loss = loss * semantic_weights
             loss = loss * semantic_weights
 
 
         if self.weight is not None:
         if self.weight is not None:
-            _one_hot = F.one_hot(label, logit.shape[-1])
+            _one_hot = F.one_hot(label * mask, logit.shape[-1])
             coef = paddle.sum(_one_hot * self.weight, axis=-1)
             coef = paddle.sum(_one_hot * self.weight, axis=-1)
         else:
         else:
             coef = paddle.ones_like(label)
             coef = paddle.ones_like(label)

+ 2 - 2
paddlers/models/ppseg/models/losses/decoupledsegnet_relax_boundary_loss.py

@@ -16,9 +16,9 @@ import numpy as np
 import paddle
 import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
-from scipy.ndimage.interpolation import shift
+from scipy.ndimage import shift
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 2 - 2
paddlers/models/ppseg/models/losses/detail_aggregate_loss.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 44 - 23
paddlers/models/ppseg/models/losses/dice_loss.py

@@ -13,44 +13,65 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
 class DiceLoss(nn.Layer):
 class DiceLoss(nn.Layer):
     """
     """
-    Implements the dice loss function.
+    The implements of the dice loss.
 
 
     Args:
     Args:
-        ignore_index (int64): Specifies a target value that is ignored
-            and does not contribute to the input gradient. Default ``255``.
-        smooth (float32): laplace smoothing,
-            to smooth dice loss and accelerate convergence. following:
-            https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
+        weight (list[float], optional): The weight for each class. Default: None.
+        ignore_index (int64): ignore_index (int64, optional): Specifies a target value that
+            is ignored and does not contribute to the input gradient. Default ``255``.
+        smooth (float32): Laplace smoothing to smooth dice loss and accelerate convergence.
+            Default: 1.0
     """
     """
 
 
-    def __init__(self, ignore_index=255, smooth=0.):
-        super(DiceLoss, self).__init__()
+    def __init__(self, weight=None, ignore_index=255, smooth=1.0):
+        super().__init__()
+        self.weight = weight
         self.ignore_index = ignore_index
         self.ignore_index = ignore_index
-        self.eps = 1e-5
         self.smooth = smooth
         self.smooth = smooth
+        self.eps = 1e-8
 
 
     def forward(self, logits, labels):
     def forward(self, logits, labels):
-        labels = paddle.cast(labels, dtype='int32')
-        labels_one_hot = F.one_hot(labels, num_classes=logits.shape[1])
-        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
-        labels_one_hot = paddle.cast(labels_one_hot, dtype='float32')
+        num_class = logits.shape[1]
+        if self.weight is not None:
+            assert num_class == len(self.weight), \
+                "The lenght of weight should be euqal to the num class"
+
+        mask = labels != self.ignore_index
+        mask = paddle.cast(paddle.unsqueeze(mask, 1), 'float32')
 
 
+        labels[labels == self.ignore_index] = 0
+        labels_one_hot = F.one_hot(labels, num_class)
+        labels_one_hot = paddle.transpose(labels_one_hot, [0, 3, 1, 2])
         logits = F.softmax(logits, axis=1)
         logits = F.softmax(logits, axis=1)
 
 
-        mask = (paddle.unsqueeze(labels, 1) != self.ignore_index)
-        logits = logits * mask
-        labels_one_hot = labels_one_hot * mask
+        dice_loss = 0.0
+        for i in range(num_class):
+            dice_loss_i = dice_loss_helper(logits[:, i], labels_one_hot[:, i],
+                                           mask, self.smooth, self.eps)
+            if self.weight is not None:
+                dice_loss_i *= self.weight[i]
+            dice_loss += dice_loss_i
+        dice_loss = dice_loss / num_class
+
+        return dice_loss
 
 
-        dims = (0, ) + tuple(range(2, labels.ndimension() + 1))
 
 
-        intersection = paddle.sum(logits * labels_one_hot, dims)
-        cardinality = paddle.sum(logits + labels_one_hot, dims)
-        dice_loss = ((2. * intersection + self.smooth) /
-                     (cardinality + self.eps + self.smooth)).mean()
-        return 1 - dice_loss
+def dice_loss_helper(logit, label, mask, smooth, eps):
+    assert logit.shape == label.shape, \
+        "The shape of logit and label should be the same"
+    logit = paddle.reshape(logit, [0, -1])
+    label = paddle.reshape(label, [0, -1])
+    mask = paddle.reshape(mask, [0, -1])
+    logit *= mask
+    label *= mask
+    intersection = paddle.sum(logit * label, axis=1)
+    cardinality = paddle.sum(logit + label, axis=1)
+    dice_loss = 1 - (2 * intersection + smooth) / (cardinality + smooth + eps)
+    dice_loss = dice_loss.mean()
+    return dice_loss

+ 2 - 2
paddlers/models/ppseg/models/losses/edge_attention_loss.py

@@ -16,8 +16,8 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import losses
+from paddleseg.cvlibs import manager
+from paddleseg.models import losses
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 98 - 26
paddlers/models/ppseg/models/losses/focal_loss.py

@@ -17,44 +17,116 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
 class FocalLoss(nn.Layer):
 class FocalLoss(nn.Layer):
     """
     """
-    Focal Loss.
+    The implement of focal loss.
 
 
-    Code referenced from:
-    https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
+    The focal loss requires the label is 0 or 1 for now.
 
 
     Args:
     Args:
-        gamma (float): the coefficient of Focal Loss.
-        ignore_index (int64): Specifies a target value that is ignored
+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
+        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
+        ignore_index (int64, optional): Specifies a target value that is ignored
             and does not contribute to the input gradient. Default ``255``.
             and does not contribute to the input gradient. Default ``255``.
     """
     """
 
 
-    def __init__(self, gamma=2.0, ignore_index=255, edge_label=False):
-        super(FocalLoss, self).__init__()
+    def __init__(self, alpha=0.25, gamma=2.0, ignore_index=255):
+        super().__init__()
+        self.alpha = alpha
         self.gamma = gamma
         self.gamma = gamma
         self.ignore_index = ignore_index
         self.ignore_index = ignore_index
-        self.edge_label = edge_label
+        self.EPS = 1e-10
 
 
     def forward(self, logit, label):
     def forward(self, logit, label):
-        logit = paddle.reshape(
-            logit, [logit.shape[0], logit.shape[1], -1])  # N,C,H,W => N,C,H*W
-        logit = paddle.transpose(logit, [0, 2, 1])  # N,C,H*W => N,H*W,C
-        logit = paddle.reshape(logit,
-                               [-1, logit.shape[2]])  # N,H*W,C => N*H*W,C
-        label = paddle.reshape(label, [-1, 1])
-        range_ = paddle.arange(0, label.shape[0])
-        range_ = paddle.unsqueeze(range_, axis=-1)
-        label = paddle.cast(label, dtype='int64')
-        label = paddle.concat([range_, label], axis=-1)
-        logpt = F.log_softmax(logit)
-        logpt = paddle.gather_nd(logpt, label)
-
-        pt = paddle.exp(logpt.detach())
-        loss = -1 * (1 - pt)**self.gamma * logpt
-        loss = paddle.mean(loss)
-        return loss
+        """
+        Forward computation.
+
+        Args:
+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
+                (N, C, H, W), where C is number of classes.
+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
+                where each value is 0 <= label[i] <= C-1.
+        Returns:
+            (Tensor): The average loss.
+        """
+        assert logit.ndim == 4, "The ndim of logit should be 4."
+        assert logit.shape[1] == 2, "The channel of logit should be 2."
+        assert label.ndim == 3, "The ndim of label should be 3."
+
+        class_num = logit.shape[1]  # class num is 2
+        logit = paddle.transpose(logit, [0, 2, 3, 1])  # N,C,H,W => N,H,W,C
+
+        mask = label != self.ignore_index  # N,H,W
+        mask = paddle.unsqueeze(mask, 3)
+        mask = paddle.cast(mask, 'float32')
+        mask.stop_gradient = True
+
+        label = F.one_hot(label, class_num)  # N,H,W,C
+        label = paddle.cast(label, logit.dtype)
+        label.stop_gradient = True
+
+        loss = F.sigmoid_focal_loss(
+            logit=logit,
+            label=label,
+            alpha=self.alpha,
+            gamma=self.gamma,
+            reduction='none')
+        loss = loss * mask
+        avg_loss = paddle.sum(loss) / (
+            paddle.sum(paddle.cast(mask != 0., 'int32')) * class_num + self.EPS)
+        return avg_loss
+
+
+@manager.LOSSES.add_component
+class MultiClassFocalLoss(nn.Layer):
+    """
+    The implement of focal loss for multi class.
+
+    Args:
+        alpha (float, list, optional): The alpha of focal loss. alpha is the weight
+            of class 1, 1-alpha is the weight of class 0. Default: 0.25
+        gamma (float, optional): The gamma of Focal Loss. Default: 2.0
+        ignore_index (int64, optional): Specifies a target value that is ignored
+            and does not contribute to the input gradient. Default ``255``.
+    """
+
+    def __init__(self, num_class, alpha=1.0, gamma=2.0, ignore_index=255):
+        super().__init__()
+        self.num_class = num_class
+        self.alpha = alpha
+        self.gamma = gamma
+        self.ignore_index = ignore_index
+        self.EPS = 1e-10
+
+    def forward(self, logit, label):
+        """
+        Forward computation.
+
+        Args:
+            logit (Tensor): Logit tensor, the data type is float32, float64. Shape is
+                (N, C, H, W), where C is number of classes.
+            label (Tensor): Label tensor, the data type is int64. Shape is (N, W, W),
+                where each value is 0 <= label[i] <= C-1.
+        Returns:
+            (Tensor): The average loss.
+        """
+        assert logit.ndim == 4, "The ndim of logit should be 4."
+        assert label.ndim == 3, "The ndim of label should be 3."
+
+        logit = paddle.transpose(logit, [0, 2, 3, 1])
+        label = label.astype('int64')
+        ce_loss = F.cross_entropy(
+            logit, label, ignore_index=self.ignore_index, reduction='none')
+
+        pt = paddle.exp(-ce_loss)
+        focal_loss = self.alpha * ((1 - pt)**self.gamma) * ce_loss
+
+        mask = paddle.cast(label != self.ignore_index, 'float32')
+        focal_loss *= mask
+        avg_loss = paddle.mean(focal_loss) / (paddle.mean(mask) + self.EPS)
+        return avg_loss

+ 1 - 1
paddlers/models/ppseg/models/losses/gscnn_dual_task_loss.py

@@ -13,7 +13,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 1 - 1
paddlers/models/ppseg/models/losses/kl_loss.py

@@ -16,7 +16,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 23 - 1
paddlers/models/ppseg/models/losses/l1_loss.py

@@ -16,7 +16,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
@@ -74,3 +74,25 @@ class L1Loss(nn.L1Loss):
 
 
     def __init__(self, reduction='mean', ignore_index=255):
     def __init__(self, reduction='mean', ignore_index=255):
         super().__init__(reduction=reduction)
         super().__init__(reduction=reduction)
+        self.ignore_index = ignore_index
+        self.EPS = 1e-10
+
+    def forward(self, input, label):
+        mask = label != self.ignore_index
+        mask = paddle.cast(mask, "float32")
+        label.stop_gradient = True
+        mask.stop_gradient = True
+
+        output = paddle.nn.functional.l1_loss(
+            input, label, "none", name=self.name) * mask
+
+        if self.reduction == "mean":
+            return paddle.mean(output) / (paddle.mean(mask) + self.EPS)
+        elif self.reduction == "none":
+            return output
+        elif self.reduction == "sum":
+            return paddle.sum(output)
+        else:
+            raise ValueError(
+                "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
+                "received %s, which is not allowed." % self.reduction)

+ 13 - 5
paddlers/models/ppseg/models/losses/lovasz_loss.py

@@ -22,7 +22,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
@@ -124,8 +124,12 @@ def lovasz_hinge_flat(logits, labels):
     signs = 2. * labels - 1.
     signs = 2. * labels - 1.
     signs.stop_gradient = True
     signs.stop_gradient = True
     errors = 1. - logits * signs
     errors = 1. - logits * signs
-    errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
-                                                        'descending', True)
+    if hasattr(paddle, "_legacy_C_ops"):
+        errors_sorted, perm = paddle._legacy_C_ops.argsort(errors, 'axis', 0,
+                                                           'descending', True)
+    else:
+        errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
+                                                    'descending', True)
     errors_sorted.stop_gradient = False
     errors_sorted.stop_gradient = False
     gt_sorted = paddle.gather(labels, perm)
     gt_sorted = paddle.gather(labels, perm)
     grad = lovasz_grad(gt_sorted)
     grad = lovasz_grad(gt_sorted)
@@ -181,8 +185,12 @@ def lovasz_softmax_flat(probas, labels, classes='present'):
         else:
         else:
             class_pred = probas[:, c]
             class_pred = probas[:, c]
         errors = paddle.abs(fg - class_pred)
         errors = paddle.abs(fg - class_pred)
-        errors_sorted, perm = paddle.fluid.core.ops.argsort(errors, 'axis', 0,
-                                                            'descending', True)
+        if hasattr(paddle, "_legacy_C_ops"):
+            errors_sorted, perm = paddle._legacy_C_ops.argsort(
+                errors, 'axis', 0, 'descending', True)
+        else:
+            errors_sorted, perm = paddle._C_ops.argsort(errors, 'axis', 0,
+                                                        'descending', True)
         errors_sorted.stop_gradient = False
         errors_sorted.stop_gradient = False
 
 
         fg_sorted = paddle.gather(fg, perm)
         fg_sorted = paddle.gather(fg, perm)

+ 1 - 1
paddlers/models/ppseg/models/losses/mean_square_error_loss.py

@@ -16,7 +16,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 1 - 1
paddlers/models/ppseg/models/losses/mixed_loss.py

@@ -16,7 +16,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 2 - 2
paddlers/models/ppseg/models/losses/ohem_cross_entropy_loss.py

@@ -16,7 +16,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
@@ -55,7 +55,7 @@ class OhemCrossEntropyLoss(nn.Layer):
 
 
         # get the label after ohem
         # get the label after ohem
         n, c, h, w = logit.shape
         n, c, h, w = logit.shape
-        label = label.reshape((-1, ))
+        label = label.reshape((-1, )).astype('int64')
         valid_mask = (label != self.ignore_index).astype('int64')
         valid_mask = (label != self.ignore_index).astype('int64')
         num_valid = valid_mask.sum()
         num_valid = valid_mask.sum()
         label = label * valid_mask
         label = label * valid_mask

+ 2 - 2
paddlers/models/ppseg/models/losses/ohem_edge_attention_loss.py

@@ -16,8 +16,8 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.models import losses
+from paddleseg.cvlibs import manager
+from paddleseg.models import losses
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 5 - 2
paddlers/models/ppseg/models/losses/pixel_contrast_cross_entropy_loss.py

@@ -16,7 +16,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
@@ -101,9 +101,12 @@ class PixelContrastCrossEntropyLoss(nn.Layer):
                 elif num_hard >= n_view / 2:
                 elif num_hard >= n_view / 2:
                     num_easy_keep = num_easy
                     num_easy_keep = num_easy
                     num_hard_keep = n_view - num_easy_keep
                     num_hard_keep = n_view - num_easy_keep
-                else:
+                elif num_easy >= n_view / 2:
                     num_hard_keep = num_hard
                     num_hard_keep = num_hard
                     num_easy_keep = n_view - num_hard_keep
                     num_easy_keep = n_view - num_hard_keep
+                else:
+                    num_hard_keep = num_hard
+                    num_easy_keep = num_easy
 
 
                 indices = None
                 indices = None
                 if num_hard > 0:
                 if num_hard > 0:

+ 1 - 1
paddlers/models/ppseg/models/losses/point_cross_entropy_loss.py

@@ -16,7 +16,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 1 - 1
paddlers/models/ppseg/models/losses/rmi_loss.py

@@ -17,7 +17,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 _euler_num = 2.718281828
 _euler_num = 2.718281828
 _pi = 3.14159265
 _pi = 3.14159265

+ 8 - 6
paddlers/models/ppseg/models/losses/semantic_connectivity_loss.py

@@ -18,7 +18,7 @@ import paddle
 from paddle import nn
 from paddle import nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component
@@ -92,6 +92,7 @@ class SemanticConnectivityLoss(nn.Layer):
                 label_num_conn, label_conn = cv2.connectedComponents(
                 label_num_conn, label_conn = cv2.connectedComponents(
                     labels_np_class.astype(np.uint8))
                     labels_np_class.astype(np.uint8))
 
 
+                origin_pred_num_conn = pred_num_conn
                 if pred_num_conn > 2 * label_num_conn:
                 if pred_num_conn > 2 * label_num_conn:
                     pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
                     pred_num_conn = min(pred_num_conn, self.max_pred_num_conn)
                 real_pred_num = pred_num_conn - 1
                 real_pred_num = pred_num_conn - 1
@@ -100,8 +101,9 @@ class SemanticConnectivityLoss(nn.Layer):
                 # Connected Components Matching and SC Loss Calculation
                 # Connected Components Matching and SC Loss Calculation
                 if real_label_num > 0 and real_pred_num > 0:
                 if real_label_num > 0 and real_pred_num > 0:
                     img_connectivity = compute_class_connectiveity(
                     img_connectivity = compute_class_connectiveity(
-                        pred_conn, label_conn, pred_num_conn, label_num_conn,
-                        pred_i, real_label_num, real_pred_num, zero)
+                        pred_conn, label_conn, pred_num_conn,
+                        origin_pred_num_conn, label_num_conn, pred_i,
+                        real_label_num, real_pred_num, zero)
                     sc_loss += 1 - img_connectivity
                     sc_loss += 1 - img_connectivity
                 elif real_label_num == 0 and real_pred_num == 0:
                 elif real_label_num == 0 and real_pred_num == 0:
                     # if no connected component, SC Loss = 0, so pass
                     # if no connected component, SC Loss = 0, so pass
@@ -122,12 +124,12 @@ class SemanticConnectivityLoss(nn.Layer):
 
 
 
 
 def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
 def compute_class_connectiveity(pred_conn, label_conn, pred_num_conn,
-                                label_num_conn, pred, real_label_num,
-                                real_pred_num, zero):
+                                origin_pred_num_conn, label_num_conn, pred,
+                                real_label_num, real_pred_num, zero):
 
 
     pred_conn = paddle.to_tensor(pred_conn)
     pred_conn = paddle.to_tensor(pred_conn)
     label_conn = paddle.to_tensor(label_conn)
     label_conn = paddle.to_tensor(label_conn)
-    pred_conn = F.one_hot(pred_conn, pred_num_conn)
+    pred_conn = F.one_hot(pred_conn, origin_pred_num_conn)
     label_conn = F.one_hot(label_conn, label_num_conn)
     label_conn = F.one_hot(label_conn, label_num_conn)
 
 
     ious = paddle.zeros((real_label_num, real_pred_num))
     ious = paddle.zeros((real_label_num, real_pred_num))

+ 1 - 1
paddlers/models/ppseg/models/losses/semantic_encode_cross_entropy_loss.py

@@ -16,7 +16,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.cvlibs import manager
+from paddleseg.cvlibs import manager
 
 
 
 
 @manager.LOSSES.add_component
 @manager.LOSSES.add_component

+ 162 - 0
paddlers/models/ppseg/models/lraspp.py

@@ -0,0 +1,162 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
+
+
+@manager.MODELS.add_component
+class LRASPP(nn.Layer):
+    """
+    Semantic segmentation model with a light R-ASPP head.
+    
+    The original article refers to
+        Howard, Andrew, et al. "Searching for mobilenetv3."
+        (https://arxiv.org/pdf/1909.11065.pdf)
+
+    Args:
+        num_classes (int): The number of target classes.
+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
+            has feat_channels, of which the length is 5.
+        backbone_indices (List(int), optional): The values indicate the indices of backbone output 
+            used as the input of the LR-ASPP head.
+            Default: [0, 1, 3].
+        lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head.
+            Default: [32, 64].
+        lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head.
+            Default: 128
+        resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head.
+            Default: bilinear.
+        use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use
+            a 49x49 kernel for average pooling.
+            Default: True.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=[0, 1, 3],
+                 lraspp_head_inter_chs=[32, 64],
+                 lraspp_head_out_ch=128,
+                 resize_mode='bilinear',
+                 use_gap=True,
+                 pretrained=None):
+        super().__init__()
+
+        # backbone
+        assert hasattr(backbone, 'feat_channels'), \
+            "The backbone should has feat_channels."
+        assert len(backbone.feat_channels) >= len(backbone_indices), \
+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
+        assert len(backbone.feat_channels) > max(backbone_indices), \
+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
+        self.backbone = backbone
+
+        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
+            "should not be lesser than 1"
+
+        # head
+        assert len(backbone_indices) == len(
+            lraspp_head_inter_chs
+        ) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs."
+        self.backbone_indices = backbone_indices
+
+        self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels,
+                                      lraspp_head_inter_chs, lraspp_head_out_ch,
+                                      num_classes, resize_mode, use_gap)
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+
+        feats_backbone = self.backbone(x)
+        assert len(feats_backbone) >= len(self.backbone_indices), \
+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
+
+        y = self.lraspp_head(feats_backbone)
+        y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False)
+        logit_list = [y]
+
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class LRASPPHead(nn.Layer):
+    def __init__(self,
+                 indices,
+                 in_chs,
+                 mid_chs,
+                 out_ch,
+                 n_classes,
+                 resize_mode,
+                 use_gap,
+                 align_corners=False):
+        super().__init__()
+
+        self.indices = indices[-2::-1]
+        self.in_chs = [in_chs[i] for i in indices[::-1]]
+        self.mid_chs = mid_chs[::-1]
+        self.convs = nn.LayerList()
+        self.conv_ups = nn.LayerList()
+        for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs):
+            self.convs.append(
+                nn.Conv2D(
+                    in_ch, mid_ch, kernel_size=1, bias_attr=False))
+            self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1))
+        self.conv_w = nn.Sequential(
+            nn.AvgPool2D(
+                kernel_size=(49, 49), stride=(16, 20))
+            if not use_gap else nn.AdaptiveAvgPool2D(1),
+            nn.Conv2D(
+                self.in_chs[0], out_ch, 1, bias_attr=False),
+            nn.Sigmoid())
+        self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1)
+        self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False)
+        self.conv_out = nn.Conv2D(
+            out_ch, n_classes, kernel_size=1, bias_attr=False)
+
+        self.interp = partial(
+            F.interpolate, mode=resize_mode, align_corners=align_corners)
+
+    def forward(self, in_feat_list):
+        x = in_feat_list[-1]
+
+        x = self.conv_v(x) * self.interp(self.conv_w(x), paddle.shape(x)[2:])
+        y = self.conv_t(x)
+
+        for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups):
+            feat = in_feat_list[idx]
+            y = self.interp(y, paddle.shape(feat)[2:])
+            y = paddle.concat([y, conv(feat)], axis=1)
+            y = conv_up(y)
+
+        y = self.conv_out(y)
+        return y

+ 4 - 4
paddlers/models/ppseg/models/mla_transformer.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -16,9 +16,9 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
-from paddlers.models.ppseg.models import layers
-from paddlers.models.ppseg.cvlibs import manager
-from paddlers.models.ppseg.utils import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
 
 
 
 
 class MLAHeads(nn.Layer):
 class MLAHeads(nn.Layer):

+ 289 - 0
paddlers/models/ppseg/models/mobileseg.py

@@ -0,0 +1,289 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg import utils
+from paddleseg.models import layers
+from paddleseg.cvlibs import manager
+
+
+@manager.MODELS.add_component
+class MobileSeg(nn.Layer):
+    """
+    The semantic segmentation models for mobile devices.
+
+    Args:
+        num_classes (int): The number of target classes.
+        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
+            has feat_channels, of which the length is 5.
+        backbone_indices (List(int), optional): The values indicate the indices of output of backbone.
+            Default: [2, 3, 4].
+        cm_bin_sizes (List(int), optional): The bin size of context module. Default: [1,2,4].
+        cm_out_ch (int, optional): The output channel of the last context module. Default: 128.
+        arm_type (str, optional): The type of attention refinement module. Default: ARM_Add_SpAttenAdd3.
+        arm_out_chs (List(int), optional): The out channels of each arm module. Default: [64, 96, 128].
+        seg_head_inter_chs (List(int), optional): The intermediate channels of segmentation head.
+            Default: [64, 64, 64].
+        resize_mode (str, optional): The resize mode for the upsampling operation in decoder.
+            Default: bilinear.
+        use_last_fuse (bool, optional): Whether use fusion in the last. Default: False.
+        pretrained (str, optional): The path or url of pretrained model. Default: None.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices=[1, 2, 3],
+                 cm_bin_sizes=[1, 2],
+                 cm_out_ch=64,
+                 arm_type='UAFMMobile',
+                 arm_out_chs=[32, 48, 64],
+                 seg_head_inter_chs=[32, 32, 32],
+                 resize_mode='bilinear',
+                 use_last_fuse=False,
+                 pretrained=None):
+        super().__init__()
+
+        # backbone
+        assert hasattr(backbone, 'feat_channels'), \
+            "The backbone should has feat_channels."
+        assert len(backbone.feat_channels) >= len(backbone_indices), \
+            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
+            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
+        assert len(backbone.feat_channels) > max(backbone_indices), \
+            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
+            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
+        self.backbone = backbone
+
+        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
+            "should not be lesser than 1"
+        self.backbone_indices = backbone_indices  # [..., x16_id, x32_id]
+        backbone_out_chs = [backbone.feat_channels[i] for i in backbone_indices]
+
+        # head
+        if len(arm_out_chs) == 1:
+            arm_out_chs = arm_out_chs * len(backbone_indices)
+        assert len(arm_out_chs) == len(backbone_indices), "The length of " \
+            "arm_out_chs and backbone_indices should be equal"
+
+        self.ppseg_head = MobileSegHead(backbone_out_chs, arm_out_chs,
+                                        cm_bin_sizes, cm_out_ch, arm_type,
+                                        resize_mode, use_last_fuse)
+
+        if len(seg_head_inter_chs) == 1:
+            seg_head_inter_chs = seg_head_inter_chs * len(backbone_indices)
+        assert len(seg_head_inter_chs) == len(backbone_indices), "The length of " \
+            "seg_head_inter_chs and backbone_indices should be equal"
+        self.seg_heads = nn.LayerList()  # [..., head_16, head32]
+        for in_ch, mid_ch in zip(arm_out_chs, seg_head_inter_chs):
+            self.seg_heads.append(SegHead(in_ch, mid_ch, num_classes))
+
+        # pretrained
+        self.pretrained = pretrained
+        self.init_weight()
+
+    def forward(self, x):
+        x_hw = paddle.shape(x)[2:]
+
+        feats_backbone = self.backbone(x)  # [x4, x8, x16, x32]
+        assert len(feats_backbone) >= len(self.backbone_indices), \
+            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
+            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"
+
+        feats_selected = [feats_backbone[i] for i in self.backbone_indices]
+        feats_head = self.ppseg_head(feats_selected)  # [..., x8, x16, x32]
+
+        if self.training:
+            logit_list = []
+            for x, seg_head in zip(feats_head, self.seg_heads):
+                x = seg_head(x)
+                logit_list.append(x)
+            logit_list = [
+                F.interpolate(
+                    x, x_hw, mode='bilinear', align_corners=False)
+                for x in logit_list
+            ]
+        else:
+            x = self.seg_heads[0](feats_head[0])
+            x = F.interpolate(x, x_hw, mode='bilinear', align_corners=False)
+            logit_list = [x]
+
+        return logit_list
+
+    def init_weight(self):
+        if self.pretrained is not None:
+            utils.load_entire_model(self, self.pretrained)
+
+
+class MobileSegHead(nn.Layer):
+    """
+    The head of MobileSeg.
+
+    Args:
+        backbone_out_chs (List(Tensor)): The channels of output tensors in the backbone.
+        arm_out_chs (List(int)): The out channels of each arm module.
+        cm_bin_sizes (List(int)): The bin size of context module.
+        cm_out_ch (int): The output channel of the last context module.
+        arm_type (str): The type of attention refinement module.
+        resize_mode (str): The resize mode for the upsampling operation in decoder.
+    """
+
+    def __init__(self, backbone_out_chs, arm_out_chs, cm_bin_sizes, cm_out_ch,
+                 arm_type, resize_mode, use_last_fuse):
+        super().__init__()
+
+        self.cm = MobileContextModule(backbone_out_chs[-1], cm_out_ch,
+                                      cm_out_ch, cm_bin_sizes)
+
+        assert hasattr(layers,arm_type), \
+            "Not support arm_type ({})".format(arm_type)
+        arm_class = eval("layers." + arm_type)
+
+        self.arm_list = nn.LayerList()  # [..., arm8, arm16, arm32]
+        for i in range(len(backbone_out_chs)):
+            low_chs = backbone_out_chs[i]
+            high_ch = cm_out_ch if i == len(
+                backbone_out_chs) - 1 else arm_out_chs[i + 1]
+            out_ch = arm_out_chs[i]
+            arm = arm_class(
+                low_chs, high_ch, out_ch, ksize=3, resize_mode=resize_mode)
+            self.arm_list.append(arm)
+
+        self.use_last_fuse = use_last_fuse
+        if self.use_last_fuse:
+            self.fuse_convs = nn.LayerList()
+            for i in range(1, len(arm_out_chs)):
+                conv = layers.SeparableConvBNReLU(
+                    arm_out_chs[i],
+                    arm_out_chs[0],
+                    kernel_size=3,
+                    bias_attr=False)
+                self.fuse_convs.append(conv)
+            self.last_conv = layers.SeparableConvBNReLU(
+                len(arm_out_chs) * arm_out_chs[0],
+                arm_out_chs[0],
+                kernel_size=3,
+                bias_attr=False)
+
+    def forward(self, in_feat_list):
+        """
+        Args:
+            in_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+        Returns:
+            out_feat_list (List(Tensor)): Such as [x2, x4, x8, x16, x32].
+                x2, x4 and x8 are optional.
+                The length of in_feat_list and out_feat_list are the same.
+        """
+
+        high_feat = self.cm(in_feat_list[-1])
+        out_feat_list = []
+
+        for i in reversed(range(len(in_feat_list))):
+            low_feat = in_feat_list[i]
+            arm = self.arm_list[i]
+            high_feat = arm(low_feat, high_feat)
+            out_feat_list.insert(0, high_feat)
+
+        if self.use_last_fuse:
+            x_list = [out_feat_list[0]]
+            size = paddle.shape(out_feat_list[0])[2:]
+            for i, (x, conv
+                    ) in enumerate(zip(out_feat_list[1:], self.fuse_convs)):
+                x = conv(x)
+                x = F.interpolate(
+                    x, size=size, mode='bilinear', align_corners=False)
+                x_list.append(x)
+            x = paddle.concat(x_list, axis=1)
+            x = self.last_conv(x)
+            out_feat_list[0] = x
+
+        return out_feat_list
+
+
+class MobileContextModule(nn.Layer):
+    """
+    Context Module for Mobile Model.
+
+    Args:
+        in_channels (int): The number of input channels to pyramid pooling module.
+        inter_channels (int): The number of inter channels to pyramid pooling module.
+        out_channels (int): The number of output channels after pyramid pooling module.
+        bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 3).
+        align_corners (bool): An argument of F.interpolate. It should be set to False
+            when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 inter_channels,
+                 out_channels,
+                 bin_sizes,
+                 align_corners=False):
+        super().__init__()
+
+        self.stages = nn.LayerList([
+            self._make_stage(in_channels, inter_channels, size)
+            for size in bin_sizes
+        ])
+
+        self.conv_out = layers.SeparableConvBNReLU(
+            in_channels=inter_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            bias_attr=False)
+
+        self.align_corners = align_corners
+
+    def _make_stage(self, in_channels, out_channels, size):
+        prior = nn.AdaptiveAvgPool2D(output_size=size)
+        conv = layers.ConvBNReLU(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
+        return nn.Sequential(prior, conv)
+
+    def forward(self, input):
+        out = None
+        input_shape = paddle.shape(input)[2:]
+
+        for stage in self.stages:
+            x = stage(input)
+            x = F.interpolate(
+                x,
+                input_shape,
+                mode='bilinear',
+                align_corners=self.align_corners)
+            if out is None:
+                out = x
+            else:
+                out += x
+
+        out = self.conv_out(out)
+        return out
+
+
+class SegHead(nn.Layer):
+    def __init__(self, in_chan, mid_chan, n_classes):
+        super().__init__()
+        self.conv = layers.SeparableConvBNReLU(
+            in_chan, mid_chan, kernel_size=3, bias_attr=False)
+        self.conv_out = nn.Conv2D(
+            mid_chan, n_classes, kernel_size=1, bias_attr=False)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.