Explorar el Código

[Fix] Update ppdet Version and Update QR Code (#64)

Lin Manhui hace 3 años
padre
commit
a4957b21be
Se han modificado 100 ficheros con 6278 adiciones y 1307 borrados
  1. 4 4
      .github/workflows/build.yaml
  2. 1 1
      README.md
  3. BIN
      docs/images/whole_picture.png
  4. 1 0
      paddlers/models/hash.txt
  5. 11 3
      paddlers/models/ppdet/core/workspace.py
  6. 12 12
      paddlers/models/ppdet/data/__init__.py
  7. 1 1
      paddlers/models/ppdet/data/crop_utils/__init__.py
  8. 3 3
      paddlers/models/ppdet/data/reader.py
  9. 4 1
      paddlers/models/ppdet/data/shm_utils.py
  10. 1 0
      paddlers/models/ppdet/data/source/__init__.py
  11. 54 21
      paddlers/models/ppdet/data/source/category.py
  12. 159 55
      paddlers/models/ppdet/data/source/coco.py
  13. 126 24
      paddlers/models/ppdet/data/source/dataset.py
  14. 16 16
      paddlers/models/ppdet/data/source/keypoint_coco.py
  15. 19 11
      paddlers/models/ppdet/data/source/mot.py
  16. 7 4
      paddlers/models/ppdet/data/source/voc.py
  17. 2 0
      paddlers/models/ppdet/data/transform/__init__.py
  18. 2 2
      paddlers/models/ppdet/data/transform/autoaugment_utils.py
  19. 113 16
      paddlers/models/ppdet/data/transform/batch_operators.py
  20. 9 9
      paddlers/models/ppdet/data/transform/keypoint_operators.py
  21. 4 4
      paddlers/models/ppdet/data/transform/mot_operators.py
  22. 500 113
      paddlers/models/ppdet/data/transform/operators.py
  23. 479 0
      paddlers/models/ppdet/data/transform/rotated_operators.py
  24. 72 0
      paddlers/models/ppdet/data/utils.py
  25. 12 12
      paddlers/models/ppdet/engine/__init__.py
  26. 179 18
      paddlers/models/ppdet/engine/callbacks.py
  27. 12 12
      paddlers/models/ppdet/engine/env.py
  28. 44 20
      paddlers/models/ppdet/engine/export_utils.py
  29. 174 77
      paddlers/models/ppdet/engine/tracker.py
  30. 533 128
      paddlers/models/ppdet/engine/trainer.py
  31. 35 0
      paddlers/models/ppdet/ext_op/README.md
  32. 90 0
      paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc
  33. 63 0
      paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu
  34. 97 0
      paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
  35. 114 0
      paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
  36. 348 0
      paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h
  37. 33 0
      paddlers/models/ppdet/ext_op/setup.py
  38. 149 0
      paddlers/models/ppdet/ext_op/unittest/test_matched_rbox_iou.py
  39. 151 0
      paddlers/models/ppdet/ext_op/unittest/test_rbox_iou.py
  40. 1 1
      paddlers/models/ppdet/metrics/__init__.py
  41. 12 12
      paddlers/models/ppdet/metrics/coco_utils.py
  42. 11 1
      paddlers/models/ppdet/metrics/json_results.py
  43. 28 19
      paddlers/models/ppdet/metrics/keypoint_metrics.py
  44. 14 22
      paddlers/models/ppdet/metrics/map_utils.py
  45. 35 29
      paddlers/models/ppdet/metrics/mcmot_metrics.py
  46. 141 70
      paddlers/models/ppdet/metrics/metrics.py
  47. 43 29
      paddlers/models/ppdet/metrics/mot_metrics.py
  48. 12 12
      paddlers/models/ppdet/metrics/munkres.py
  49. 1 0
      paddlers/models/ppdet/model_zoo/.gitignore
  50. 12 12
      paddlers/models/ppdet/model_zoo/__init__.py
  51. 12 12
      paddlers/models/ppdet/model_zoo/model_zoo.py
  52. 13 0
      paddlers/models/ppdet/model_zoo/tests/__init__.py
  53. 48 0
      paddlers/models/ppdet/model_zoo/tests/test_get_model.py
  54. 68 0
      paddlers/models/ppdet/model_zoo/tests/test_list_model.py
  55. 14 12
      paddlers/models/ppdet/modeling/__init__.py
  56. 20 7
      paddlers/models/ppdet/modeling/architectures/__init__.py
  57. 79 0
      paddlers/models/ppdet/modeling/architectures/bytetrack.py
  58. 16 16
      paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
  59. 12 12
      paddlers/models/ppdet/modeling/architectures/centernet.py
  60. 15 14
      paddlers/models/ppdet/modeling/architectures/deepsort.py
  61. 12 12
      paddlers/models/ppdet/modeling/architectures/fairmot.py
  62. 14 14
      paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
  63. 12 12
      paddlers/models/ppdet/modeling/architectures/fcos.py
  64. 12 12
      paddlers/models/ppdet/modeling/architectures/gfl.py
  65. 12 12
      paddlers/models/ppdet/modeling/architectures/jde.py
  66. 13 13
      paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
  67. 4 4
      paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
  68. 16 16
      paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
  69. 19 27
      paddlers/models/ppdet/modeling/architectures/meta_arch.py
  70. 24 20
      paddlers/models/ppdet/modeling/architectures/picodet.py
  71. 68 0
      paddlers/models/ppdet/modeling/architectures/retinanet.py
  72. 25 44
      paddlers/models/ppdet/modeling/architectures/s2anet.py
  73. 12 12
      paddlers/models/ppdet/modeling/architectures/ttfnet.py
  74. 16 13
      paddlers/models/ppdet/modeling/architectures/yolo.py
  75. 138 0
      paddlers/models/ppdet/modeling/architectures/yolox.py
  76. 2 0
      paddlers/models/ppdet/modeling/assigners/__init__.py
  77. 33 27
      paddlers/models/ppdet/modeling/assigners/atss_assigner.py
  78. 54 0
      paddlers/models/ppdet/modeling/assigners/max_iou_assigner.py
  79. 4 1
      paddlers/models/ppdet/modeling/assigners/simota_assigner.py
  80. 20 18
      paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
  81. 51 20
      paddlers/models/ppdet/modeling/assigners/utils.py
  82. 23 12
      paddlers/models/ppdet/modeling/backbones/__init__.py
  83. 1 1
      paddlers/models/ppdet/modeling/backbones/blazenet.py
  84. 245 0
      paddlers/models/ppdet/modeling/backbones/convnext.py
  85. 404 0
      paddlers/models/ppdet/modeling/backbones/csp_darknet.py
  86. 321 0
      paddlers/models/ppdet/modeling/backbones/cspresnet.py
  87. 15 10
      paddlers/models/ppdet/modeling/backbones/darknet.py
  88. 12 12
      paddlers/models/ppdet/modeling/backbones/dla.py
  89. 2 2
      paddlers/models/ppdet/modeling/backbones/esnet.py
  90. 12 12
      paddlers/models/ppdet/modeling/backbones/ghostnet.py
  91. 4 2
      paddlers/models/ppdet/modeling/backbones/hardnet.py
  92. 27 14
      paddlers/models/ppdet/modeling/backbones/lcnet.py
  93. 1 1
      paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
  94. 14 14
      paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
  95. 266 0
      paddlers/models/ppdet/modeling/backbones/mobileone.py
  96. 14 14
      paddlers/models/ppdet/modeling/backbones/resnet.py
  97. 17 15
      paddlers/models/ppdet/modeling/backbones/senet.py
  98. 2 3
      paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
  99. 22 71
      paddlers/models/ppdet/modeling/backbones/swin_transformer.py
  100. 74 0
      paddlers/models/ppdet/modeling/backbones/transformer_utils.py

+ 4 - 4
.github/workflows/build.yaml

@@ -26,16 +26,16 @@ jobs:
         include:
         include:
           - python-version: "3.7"
           - python-version: "3.7"
             os: windows-latest
             os: windows-latest
-            gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/cp37/GDAL-3.3.3-cp37-cp37m-win_amd64.whl
+            gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.3.3-cp37-cp37m-win_amd64.whl
           - python-version: "3.7"
           - python-version: "3.7"
             os: ubuntu-latest
             os: ubuntu-latest
-            gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
+            gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
           - python-version: "3.8"
           - python-version: "3.8"
             os: windows-latest
             os: windows-latest
-            gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/GDAL-3.3.3-cp38-cp38-win_amd64.whl
+            gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.3.3-cp38-cp38-win_amd64.whl
           - python-version: "3.8"
           - python-version: "3.8"
             os: ubuntu-latest
             os: ubuntu-latest
-            gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl
+            gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl
       fail-fast: false
       fail-fast: false
     steps:
     steps:
       - uses: actions/checkout@v3
       - uses: actions/checkout@v3

+ 1 - 1
README.md

@@ -48,7 +48,7 @@ PaddleRS具有以下五大特色:
 * 如果您发现任何PaddleRS存在的问题或是对PaddleRS有建议, 欢迎通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleRS/issues)向我们提出。
 * 如果您发现任何PaddleRS存在的问题或是对PaddleRS有建议, 欢迎通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleRS/issues)向我们提出。
 * 欢迎加入PaddleRS微信群:
 * 欢迎加入PaddleRS微信群:
 <div align="center">
 <div align="center">
-<img src="https://user-images.githubusercontent.com/21275753/199192024-79373ad7-917f-4a7c-9de2-010a4d0c0152.png"  width = "150" />  
+<img src="https://user-images.githubusercontent.com/21275753/200470530-a3321f5b-fa8e-4330-84fa-b76cb3df873a.png"  width = "150" />  
 </div>
 </div>
 
 
 ## <img src="./docs/images/model.png" width="30"/> 产品矩阵
 ## <img src="./docs/images/model.png" width="30"/> 产品矩阵

BIN
docs/images/whole_picture.png


+ 1 - 0
paddlers/models/hash.txt

@@ -1 +1,2 @@
+ppdet ba2aad26e6bc1e5c2dad76ca96692a0d63eccfac
 ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef
 ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef

+ 11 - 3
paddlers/models/ppdet/core/workspace.py

@@ -210,9 +210,17 @@ def create(cls_or_name, **kwargs):
     assert type(cls_or_name) in [type, str
     assert type(cls_or_name) in [type, str
                                  ], "should be a class or name of a class"
                                  ], "should be a class or name of a class"
     name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
     name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
-    assert name in global_config and \
-        isinstance(global_config[name], SchemaDict), \
-        "the module {} is not registered".format(name)
+    if name in global_config:
+        if isinstance(global_config[name], SchemaDict):
+            pass
+        elif hasattr(global_config[name], "__dict__"):
+            # support instance return directly
+            return global_config[name]
+        else:
+            raise ValueError("The module {} is not registered".format(name))
+    else:
+        raise ValueError("The module {} is not registered".format(name))
+
     config = global_config[name]
     config = global_config[name]
     cls = getattr(config.pymodule, name)
     cls = getattr(config.pymodule, name)
     cls_kwargs = {}
     cls_kwargs = {}

+ 12 - 12
paddlers/models/ppdet/data/__init__.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from . import source
 from . import source

+ 1 - 1
paddlers/models/ppdet/data/crop_utils/__init__.py

@@ -10,4 +10,4 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
-# limitations under the License.
+# limitations under the License.

+ 3 - 3
paddlers/models/ppdet/data/reader.py

@@ -23,7 +23,7 @@ else:
 import numpy as np
 import numpy as np
 
 
 from paddle.io import DataLoader, DistributedBatchSampler
 from paddle.io import DataLoader, DistributedBatchSampler
-from paddle.fluid.dataloader.collate import default_collate_fn
+from .utils import default_collate_fn
 
 
 from paddlers.models.ppdet.core.workspace import register
 from paddlers.models.ppdet.core.workspace import register
 from . import transform
 from . import transform
@@ -118,7 +118,7 @@ class BaseDataLoader(object):
         collate_batch (bool): whether to collate batch in dataloader.
         collate_batch (bool): whether to collate batch in dataloader.
             If set to True, the samples will collate into batch according
             If set to True, the samples will collate into batch according
             to the batch size. Otherwise, the ground-truth will not collate,
             to the batch size. Otherwise, the ground-truth will not collate,
-            which is used when the number of ground-truch is different in
+            which is used when the number of ground-truch is different in 
             samples.
             samples.
         use_shared_memory (bool): whether to use shared memory to
         use_shared_memory (bool): whether to use shared memory to
                 accelerate data loading, enable this only if you
                 accelerate data loading, enable this only if you
@@ -144,7 +144,7 @@ class BaseDataLoader(object):
         self._sample_transforms = Compose(
         self._sample_transforms = Compose(
             sample_transforms, num_classes=num_classes)
             sample_transforms, num_classes=num_classes)
 
 
-        # batch transfrom
+        # batch transfrom 
         self._batch_transforms = BatchCompose(batch_transforms, num_classes,
         self._batch_transforms = BatchCompose(batch_transforms, num_classes,
                                               collate_batch)
                                               collate_batch)
         self.batch_size = batch_size
         self.batch_size = batch_size

+ 4 - 1
paddlers/models/ppdet/data/shm_utils.py

@@ -34,7 +34,10 @@ SHM_DEFAULT_MOUNT = '/dev/shm'
 
 
 
 
 def _parse_size_in_M(size_str):
 def _parse_size_in_M(size_str):
-    num, unit = size_str[:-1], size_str[-1]
+    if size_str[-1] == 'B':
+        num, unit = size_str[:-2], size_str[-2]
+    else:
+        num, unit = size_str[:-1], size_str[-1]
     assert unit in SIZE_UNIT, \
     assert unit in SIZE_UNIT, \
             "unknown shm size unit {}".format(unit)
             "unknown shm size unit {}".format(unit)
     return float(num) * \
     return float(num) * \

+ 1 - 0
paddlers/models/ppdet/data/source/__init__.py

@@ -27,3 +27,4 @@ from .category import *
 from .keypoint_coco import *
 from .keypoint_coco import *
 from .mot import *
 from .mot import *
 from .sniper_coco import SniperCOCODataSet
 from .sniper_coco import SniperCOCODataSet
+from .dataset import ImageFolder

+ 54 - 21
paddlers/models/ppdet/data/source/category.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -39,24 +39,49 @@ def get_categories(metric_type, anno_file=None, arch=None):
     if arch == 'keypoint_arch':
     if arch == 'keypoint_arch':
         return (None, {'id': 'keypoint'})
         return (None, {'id': 'keypoint'})
 
 
+    if anno_file == None or (not os.path.isfile(anno_file)):
+        logger.warning(
+            "anno_file '{}' is None or not set or not exist, "
+            "please recheck TrainDataset/EvalDataset/TestDataset.anno_path, "
+            "otherwise the default categories will be used by metric_type.".
+            format(anno_file))
+
     if metric_type.lower() == 'coco' or metric_type.lower(
     if metric_type.lower() == 'coco' or metric_type.lower(
     ) == 'rbox' or metric_type.lower() == 'snipercoco':
     ) == 'rbox' or metric_type.lower() == 'snipercoco':
         if anno_file and os.path.isfile(anno_file):
         if anno_file and os.path.isfile(anno_file):
-            # lazy import pycocotools here
-            from pycocotools.coco import COCO
-
-            coco = COCO(anno_file)
-            cats = coco.loadCats(coco.getCatIds())
-
-            clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
-            catid2name = {cat['id']: cat['name'] for cat in cats}
+            if anno_file.endswith('json'):
+                # lazy import pycocotools here
+                from pycocotools.coco import COCO
+                coco = COCO(anno_file)
+                cats = coco.loadCats(coco.getCatIds())
+
+                clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
+                catid2name = {cat['id']: cat['name'] for cat in cats}
+
+            elif anno_file.endswith('txt'):
+                cats = []
+                with open(anno_file) as f:
+                    for line in f.readlines():
+                        cats.append(line.strip())
+                if cats[0] == 'background': cats = cats[1:]
+
+                clsid2catid = {i: i for i in range(len(cats))}
+                catid2name = {i: name for i, name in enumerate(cats)}
+
+            else:
+                raise ValueError("anno_file {} should be json or txt.".format(
+                    anno_file))
             return clsid2catid, catid2name
             return clsid2catid, catid2name
 
 
         # anno file not exist, load default categories of COCO17
         # anno file not exist, load default categories of COCO17
         else:
         else:
             if metric_type.lower() == 'rbox':
             if metric_type.lower() == 'rbox':
+                logger.warning(
+                    "metric_type: {}, load default categories of DOTA.".format(
+                        metric_type))
                 return _dota_category()
                 return _dota_category()
-
+            logger.warning("metric_type: {}, load default categories of COCO.".
+                           format(metric_type))
             return _coco17_category()
             return _coco17_category()
 
 
     elif metric_type.lower() == 'voc':
     elif metric_type.lower() == 'voc':
@@ -77,6 +102,8 @@ def get_categories(metric_type, anno_file=None, arch=None):
         # anno file not exist, load default categories of
         # anno file not exist, load default categories of
         # VOC all 20 categories
         # VOC all 20 categories
         else:
         else:
+            logger.warning("metric_type: {}, load default categories of VOC.".
+                           format(metric_type))
             return _vocall_category()
             return _vocall_category()
 
 
     elif metric_type.lower() == 'oid':
     elif metric_type.lower() == 'oid':
@@ -104,6 +131,9 @@ def get_categories(metric_type, anno_file=None, arch=None):
             return clsid2catid, catid2name
             return clsid2catid, catid2name
         # anno file not exist, load default category 'pedestrian'.
         # anno file not exist, load default category 'pedestrian'.
         else:
         else:
+            logger.warning(
+                "metric_type: {}, load default categories of pedestrian MOT.".
+                format(metric_type))
             return _mot_category(category='pedestrian')
             return _mot_category(category='pedestrian')
 
 
     elif metric_type.lower() in ['kitti', 'bdd100kmot']:
     elif metric_type.lower() in ['kitti', 'bdd100kmot']:
@@ -122,6 +152,9 @@ def get_categories(metric_type, anno_file=None, arch=None):
             return clsid2catid, catid2name
             return clsid2catid, catid2name
         # anno file not exist, load default categories of visdrone all 10 categories
         # anno file not exist, load default categories of visdrone all 10 categories
         else:
         else:
+            logger.warning(
+                "metric_type: {}, load default categories of VisDrone.".format(
+                    metric_type))
             return _visdrone_category()
             return _visdrone_category()
 
 
     else:
     else:

+ 159 - 55
paddlers/models/ppdet/data/source/coco.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 import os
 import os
@@ -33,12 +33,13 @@ class COCODataSet(DetDataset):
         anno_path (str): coco annotation file path.
         anno_path (str): coco annotation file path.
         data_fields (list): key name of data dictionary, at least have 'image'.
         data_fields (list): key name of data dictionary, at least have 'image'.
         sample_num (int): number of samples to load, -1 means all.
         sample_num (int): number of samples to load, -1 means all.
-        load_crowd (bool): whether to load crowded ground-truth.
+        load_crowd (bool): whether to load crowded ground-truth. 
             False as default
             False as default
         allow_empty (bool): whether to load empty entry. False as default
         allow_empty (bool): whether to load empty entry. False as default
-        empty_ratio (float): the ratio of empty record number to total
-            record's, if empty_ratio is out of [0. ,1.), do not sample the
+        empty_ratio (float): the ratio of empty record number to total 
+            record's, if empty_ratio is out of [0. ,1.), do not sample the 
             records and use all the empty entries. 1. as default
             records and use all the empty entries. 1. as default
+        repeat (int): repeat times for dataset, use in benchmark.
     """
     """
 
 
     def __init__(self,
     def __init__(self,
@@ -49,9 +50,15 @@ class COCODataSet(DetDataset):
                  sample_num=-1,
                  sample_num=-1,
                  load_crowd=False,
                  load_crowd=False,
                  allow_empty=False,
                  allow_empty=False,
-                 empty_ratio=1.):
-        super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path,
-                                          data_fields, sample_num)
+                 empty_ratio=1.,
+                 repeat=1):
+        super(COCODataSet, self).__init__(
+            dataset_dir,
+            image_dir,
+            anno_path,
+            data_fields,
+            sample_num,
+            repeat=repeat)
         self.load_image_only = False
         self.load_image_only = False
         self.load_semantic = False
         self.load_semantic = False
         self.load_crowd = load_crowd
         self.load_crowd = load_crowd
@@ -138,25 +145,14 @@ class COCODataSet(DetDataset):
                         if not any(np.array(inst['bbox'])):
                         if not any(np.array(inst['bbox'])):
                             continue
                             continue
 
 
-                    # read rbox anno or not
-                    is_rbox_anno = True if len(inst['bbox']) == 5 else False
-                    if is_rbox_anno:
-                        xc, yc, box_w, box_h, angle = inst['bbox']
-                        x1 = xc - box_w / 2.0
-                        y1 = yc - box_h / 2.0
-                        x2 = x1 + box_w
-                        y2 = y1 + box_h
-                    else:
-                        x1, y1, box_w, box_h = inst['bbox']
-                        x2 = x1 + box_w
-                        y2 = y1 + box_h
+                    x1, y1, box_w, box_h = inst['bbox']
+                    x2 = x1 + box_w
+                    y2 = y1 + box_h
                     eps = 1e-5
                     eps = 1e-5
                     if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
                     if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
                         inst['clean_bbox'] = [
                         inst['clean_bbox'] = [
                             round(float(x), 3) for x in [x1, y1, x2, y2]
                             round(float(x), 3) for x in [x1, y1, x2, y2]
                         ]
                         ]
-                        if is_rbox_anno:
-                            inst['clean_rbox'] = [xc, yc, box_w, box_h, angle]
                         bboxes.append(inst)
                         bboxes.append(inst)
                     else:
                     else:
                         logger.warning(
                         logger.warning(
@@ -171,9 +167,6 @@ class COCODataSet(DetDataset):
                     is_empty = True
                     is_empty = True
 
 
                 gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
                 gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
-                if is_rbox_anno:
-                    gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
-                gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
                 gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
                 gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
                 is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
                 is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
                 gt_poly = [None] * num_bbox
                 gt_poly = [None] * num_bbox
@@ -183,13 +176,10 @@ class COCODataSet(DetDataset):
                     catid = box['category_id']
                     catid = box['category_id']
                     gt_class[i][0] = self.catid2clsid[catid]
                     gt_class[i][0] = self.catid2clsid[catid]
                     gt_bbox[i, :] = box['clean_bbox']
                     gt_bbox[i, :] = box['clean_bbox']
-                    # xc, yc, w, h, theta
-                    if is_rbox_anno:
-                        gt_rbox[i, :] = box['clean_rbox']
                     is_crowd[i][0] = box['iscrowd']
                     is_crowd[i][0] = box['iscrowd']
-                    # check RLE format
+                    # check RLE format 
                     if 'segmentation' in box and box['iscrowd'] == 1:
                     if 'segmentation' in box and box['iscrowd'] == 1:
-                        gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
+                        gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
                     elif 'segmentation' in box and box['segmentation']:
                     elif 'segmentation' in box and box['segmentation']:
                         if not np.array(box['segmentation']
                         if not np.array(box['segmentation']
                                         ).size > 0 and not self.allow_empty:
                                         ).size > 0 and not self.allow_empty:
@@ -206,21 +196,12 @@ class COCODataSet(DetDataset):
                         gt_poly) and not self.allow_empty:
                         gt_poly) and not self.allow_empty:
                     continue
                     continue
 
 
-                if is_rbox_anno:
-                    gt_rec = {
-                        'is_crowd': is_crowd,
-                        'gt_class': gt_class,
-                        'gt_bbox': gt_bbox,
-                        'gt_rbox': gt_rbox,
-                        'gt_poly': gt_poly,
-                    }
-                else:
-                    gt_rec = {
-                        'is_crowd': is_crowd,
-                        'gt_class': gt_class,
-                        'gt_bbox': gt_bbox,
-                        'gt_poly': gt_poly,
-                    }
+                gt_rec = {
+                    'is_crowd': is_crowd,
+                    'gt_class': gt_class,
+                    'gt_bbox': gt_bbox,
+                    'gt_poly': gt_poly,
+                }
 
 
                 for k, v in gt_rec.items():
                 for k, v in gt_rec.items():
                     if k in self.data_fields:
                     if k in self.data_fields:
@@ -247,3 +228,126 @@ class COCODataSet(DetDataset):
             empty_records = self._sample_empty(empty_records, len(records))
             empty_records = self._sample_empty(empty_records, len(records))
             records += empty_records
             records += empty_records
         self.roidbs = records
         self.roidbs = records
+
+
+@register
+@serializable
+class SlicedCOCODataSet(COCODataSet):
+    """Sliced COCODataSet"""
+
+    def __init__(
+            self,
+            dataset_dir=None,
+            image_dir=None,
+            anno_path=None,
+            data_fields=['image'],
+            sample_num=-1,
+            load_crowd=False,
+            allow_empty=False,
+            empty_ratio=1.,
+            repeat=1,
+            sliced_size=[640, 640],
+            overlap_ratio=[0.25, 0.25], ):
+        super(SlicedCOCODataSet, self).__init__(
+            dataset_dir=dataset_dir,
+            image_dir=image_dir,
+            anno_path=anno_path,
+            data_fields=data_fields,
+            sample_num=sample_num,
+            load_crowd=load_crowd,
+            allow_empty=allow_empty,
+            empty_ratio=empty_ratio,
+            repeat=repeat, )
+        self.sliced_size = sliced_size
+        self.overlap_ratio = overlap_ratio
+
+    def parse_dataset(self):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+        assert anno_path.endswith('.json'), \
+            'invalid coco annotation file: ' + anno_path
+        from pycocotools.coco import COCO
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        img_ids.sort()
+        cat_ids = coco.getCatIds()
+        records = []
+        empty_records = []
+        ct = 0
+        ct_sub = 0
+
+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
+        self.cname2cid = dict({
+            coco.loadCats(catid)[0]['name']: clsid
+            for catid, clsid in self.catid2clsid.items()
+        })
+
+        if 'annotations' not in coco.dataset:
+            self.load_image_only = True
+            logger.warning('Annotation file: {} does not contains ground truth '
+                           'and load image information only.'.format(anno_path))
+        try:
+            import sahi
+            from sahi.slicing import slice_image
+        except Exception as e:
+            logger.error(
+                'sahi not found, plaese install sahi. '
+                'for example: `pip install sahi`, see https://github.com/obss/sahi.'
+            )
+            raise e
+
+        sub_img_ids = 0
+        for img_id in img_ids:
+            img_anno = coco.loadImgs([img_id])[0]
+            im_fname = img_anno['file_name']
+            im_w = float(img_anno['width'])
+            im_h = float(img_anno['height'])
+
+            im_path = os.path.join(image_dir,
+                                   im_fname) if image_dir else im_fname
+            is_empty = False
+            if not os.path.exists(im_path):
+                logger.warning('Illegal image file: {}, and it will be '
+                               'ignored'.format(im_path))
+                continue
+
+            if im_w < 0 or im_h < 0:
+                logger.warning('Illegal width: {} or height: {} in annotation, '
+                               'and im_id: {} will be ignored'.format(
+                                   im_w, im_h, img_id))
+                continue
+
+            slice_image_result = sahi.slicing.slice_image(
+                image=im_path,
+                slice_height=self.sliced_size[0],
+                slice_width=self.sliced_size[1],
+                overlap_height_ratio=self.overlap_ratio[0],
+                overlap_width_ratio=self.overlap_ratio[1])
+
+            sub_img_num = len(slice_image_result)
+            for _ind in range(sub_img_num):
+                im = slice_image_result.images[_ind]
+                coco_rec = {
+                    'image': im,
+                    'im_id': np.array([sub_img_ids + _ind]),
+                    'h': im.shape[0],
+                    'w': im.shape[1],
+                    'ori_im_id': np.array([img_id]),
+                    'st_pix': np.array(
+                        slice_image_result.starting_pixels[_ind],
+                        dtype=np.float32),
+                    'is_last': 1 if _ind == sub_img_num - 1 else 0,
+                } if 'image' in self.data_fields else {}
+                records.append(coco_rec)
+            ct_sub += sub_img_num
+            ct += 1
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+        assert ct > 0, 'not found any coco record in %s' % (anno_path)
+        logger.info('{} samples and slice to {} sub_samples in file {}'.format(
+            ct, ct_sub, anno_path))
+        if self.allow_empty and len(empty_records) > 0:
+            empty_records = self._sample_empty(empty_records, len(records))
+            records += empty_records
+        self.roidbs = records

+ 126 - 24
paddlers/models/ppdet/data/source/dataset.py

@@ -1,20 +1,20 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 import os
 import os
+import copy
 import numpy as np
 import numpy as np
-
 try:
 try:
     from collections.abc import Sequence
     from collections.abc import Sequence
 except Exception:
 except Exception:
@@ -22,7 +22,10 @@ except Exception:
 from paddle.io import Dataset
 from paddle.io import Dataset
 from paddlers.models.ppdet.core.workspace import register, serializable
 from paddlers.models.ppdet.core.workspace import register, serializable
 from paddlers.models.ppdet.utils.download import get_dataset_path
 from paddlers.models.ppdet.utils.download import get_dataset_path
-import copy
+from paddlers.models.ppdet.data import source
+
+from paddlers.models.ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
 
 
 
 
 @serializable
 @serializable
@@ -37,6 +40,7 @@ class DetDataset(Dataset):
         data_fields (list): key name of data dictionary, at least have 'image'.
         data_fields (list): key name of data dictionary, at least have 'image'.
         sample_num (int): number of samples to load, -1 means all.
         sample_num (int): number of samples to load, -1 means all.
         use_default_label (bool): whether to load default label list.
         use_default_label (bool): whether to load default label list.
+        repeat (int): repeat times for dataset, use in benchmark.
     """
     """
 
 
     def __init__(self,
     def __init__(self,
@@ -46,6 +50,7 @@ class DetDataset(Dataset):
                  data_fields=['image'],
                  data_fields=['image'],
                  sample_num=-1,
                  sample_num=-1,
                  use_default_label=None,
                  use_default_label=None,
+                 repeat=1,
                  **kwargs):
                  **kwargs):
         super(DetDataset, self).__init__()
         super(DetDataset, self).__init__()
         self.dataset_dir = dataset_dir if dataset_dir is not None else ''
         self.dataset_dir = dataset_dir if dataset_dir is not None else ''
@@ -54,28 +59,32 @@ class DetDataset(Dataset):
         self.data_fields = data_fields
         self.data_fields = data_fields
         self.sample_num = sample_num
         self.sample_num = sample_num
         self.use_default_label = use_default_label
         self.use_default_label = use_default_label
+        self.repeat = repeat
         self._epoch = 0
         self._epoch = 0
         self._curr_iter = 0
         self._curr_iter = 0
 
 
     def __len__(self, ):
     def __len__(self, ):
-        return len(self.roidbs)
+        return len(self.roidbs) * self.repeat
+
+    def __call__(self, *args, **kwargs):
+        return self
 
 
     def __getitem__(self, idx):
     def __getitem__(self, idx):
+        n = len(self.roidbs)
+        if self.repeat > 1:
+            idx %= n
         # data batch
         # data batch
         roidb = copy.deepcopy(self.roidbs[idx])
         roidb = copy.deepcopy(self.roidbs[idx])
         if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
         if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
-            n = len(self.roidbs)
             idx = np.random.randint(n)
             idx = np.random.randint(n)
             roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
             roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
         elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
         elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
-            n = len(self.roidbs)
             idx = np.random.randint(n)
             idx = np.random.randint(n)
             roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
             roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
         elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
         elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
-            n = len(self.roidbs)
             roidb = [roidb, ] + [
             roidb = [roidb, ] + [
                 copy.deepcopy(self.roidbs[np.random.randint(n)])
                 copy.deepcopy(self.roidbs[np.random.randint(n)])
-                for _ in range(3)
+                for _ in range(4)
             ]
             ]
         if isinstance(roidb, Sequence):
         if isinstance(roidb, Sequence):
             for r in roidb:
             for r in roidb:
@@ -149,12 +158,15 @@ class ImageFolder(DetDataset):
         self.sample_num = sample_num
         self.sample_num = sample_num
 
 
     def check_or_download_dataset(self):
     def check_or_download_dataset(self):
+        return
+
+    def get_anno(self):
+        if self.anno_path is None:
+            return
         if self.dataset_dir:
         if self.dataset_dir:
-            # NOTE: ImageFolder is only used for prediction, in
-            #       infer mode, image_dir is set by set_images
-            #       so we only check anno_path here
-            self.dataset_dir = get_dataset_path(self.dataset_dir,
-                                                self.anno_path, None)
+            return os.path.join(self.dataset_dir, self.anno_path)
+        else:
+            return self.anno_path
 
 
     def parse_dataset(self, ):
     def parse_dataset(self, ):
         if not self.roidbs:
         if not self.roidbs:
@@ -195,3 +207,93 @@ class ImageFolder(DetDataset):
     def set_images(self, images):
     def set_images(self, images):
         self.image_dir = images
         self.image_dir = images
         self.roidbs = self._load_images()
         self.roidbs = self._load_images()
+
+    def set_slice_images(self,
+                         images,
+                         slice_size=[640, 640],
+                         overlap_ratio=[0.25, 0.25]):
+        self.image_dir = images
+        ori_records = self._load_images()
+        try:
+            import sahi
+            from sahi.slicing import slice_image
+        except Exception as e:
+            logger.error(
+                'sahi not found, plaese install sahi. '
+                'for example: `pip install sahi`, see https://github.com/obss/sahi.'
+            )
+            raise e
+
+        sub_img_ids = 0
+        ct = 0
+        ct_sub = 0
+        records = []
+        for i, ori_rec in enumerate(ori_records):
+            im_path = ori_rec['im_file']
+            slice_image_result = sahi.slicing.slice_image(
+                image=im_path,
+                slice_height=slice_size[0],
+                slice_width=slice_size[1],
+                overlap_height_ratio=overlap_ratio[0],
+                overlap_width_ratio=overlap_ratio[1])
+
+            sub_img_num = len(slice_image_result)
+            for _ind in range(sub_img_num):
+                im = slice_image_result.images[_ind]
+                rec = {
+                    'image': im,
+                    'im_id': np.array([sub_img_ids + _ind]),
+                    'h': im.shape[0],
+                    'w': im.shape[1],
+                    'ori_im_id': np.array([ori_rec['im_id'][0]]),
+                    'st_pix': np.array(
+                        slice_image_result.starting_pixels[_ind],
+                        dtype=np.float32),
+                    'is_last': 1 if _ind == sub_img_num - 1 else 0,
+                } if 'image' in self.data_fields else {}
+                records.append(rec)
+            ct_sub += sub_img_num
+            ct += 1
+        print('{} samples and slice to {} sub_samples'.format(ct, ct_sub))
+        self.roidbs = records
+
+    def get_label_list(self):
+        # Only VOC dataset needs label list in ImageFold 
+        return self.anno_path
+
+
+@register
+class CommonDataset(object):
+    def __init__(self, **dataset_args):
+        super(CommonDataset, self).__init__()
+        dataset_args = copy.deepcopy(dataset_args)
+        type = dataset_args.pop("name")
+        self.dataset = getattr(source, type)(**dataset_args)
+
+    def __call__(self):
+        return self.dataset
+
+
+@register
+class TrainDataset(CommonDataset):
+    pass
+
+
+@register
+class EvalMOTDataset(CommonDataset):
+    pass
+
+
+@register
+class TestMOTDataset(CommonDataset):
+    pass
+
+
+@register
+class EvalDataset(CommonDataset):
+    pass
+
+
+@register
+class TestDataset(CommonDataset):
+    pass

+ 16 - 16
paddlers/models/ppdet/data/source/keypoint_coco.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 """
 """
 this code is base on https://github.com/open-mmlab/mmpose
 this code is base on https://github.com/open-mmlab/mmpose
@@ -27,7 +27,7 @@ from paddlers.models.ppdet.core.workspace import register, serializable
 
 
 @serializable
 @serializable
 class KeypointBottomUpBaseDataset(DetDataset):
 class KeypointBottomUpBaseDataset(DetDataset):
-    """Base class for bottom-up datasets.
+    """Base class for bottom-up datasets. 
 
 
     All datasets should subclass it.
     All datasets should subclass it.
     All subclasses should overwrite:
     All subclasses should overwrite:
@@ -91,7 +91,7 @@ class KeypointBottomUpBaseDataset(DetDataset):
 @register
 @register
 @serializable
 @serializable
 class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
 class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
-    """COCO dataset for bottom-up pose estimation.
+    """COCO dataset for bottom-up pose estimation. 
 
 
     The dataset loads raw features and apply specified transforms
     The dataset loads raw features and apply specified transforms
     to return a dict containing the image tensors and other information.
     to return a dict containing the image tensors and other information.
@@ -262,7 +262,7 @@ class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
 @register
 @register
 @serializable
 @serializable
 class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
 class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
-    """CrowdPose dataset for bottom-up pose estimation.
+    """CrowdPose dataset for bottom-up pose estimation. 
 
 
     The dataset loads raw features and apply specified transforms
     The dataset loads raw features and apply specified transforms
     to return a dict containing the image tensors and other information.
     to return a dict containing the image tensors and other information.
@@ -386,7 +386,7 @@ class KeypointTopDownBaseDataset(DetDataset):
 @register
 @register
 @serializable
 @serializable
 class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
 class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
-    """COCO dataset for top-down pose estimation.
+    """COCO dataset for top-down pose estimation. 
 
 
     The dataset loads raw features and apply specified transforms
     The dataset loads raw features and apply specified transforms
     to return a dict containing the image tensors and other information.
     to return a dict containing the image tensors and other information.

+ 19 - 11
paddlers/models/ppdet/data/source/mot.py

@@ -39,15 +39,16 @@ class MOTDataSet(DetDataset):
         image_lists (str|list): mot data image lists, muiti-source mot dataset.
         image_lists (str|list): mot data image lists, muiti-source mot dataset.
         data_fields (list): key name of data dictionary, at least have 'image'.
         data_fields (list): key name of data dictionary, at least have 'image'.
         sample_num (int): number of samples to load, -1 means all.
         sample_num (int): number of samples to load, -1 means all.
+        repeat (int): repeat times for dataset, use in benchmark.
 
 
     Notes:
     Notes:
         MOT datasets root directory following this:
         MOT datasets root directory following this:
             dataset/mot
             dataset/mot
             |——————image_lists
             |——————image_lists
-            |        |——————caltech.train
-            |        |——————caltech.val
-            |        |——————mot16.train
-            |        |——————mot17.train
+            |        |——————caltech.train  
+            |        |——————caltech.val   
+            |        |——————mot16.train  
+            |        |——————mot17.train  
             |        ......
             |        ......
             |——————Caltech
             |——————Caltech
             |——————MOT17
             |——————MOT17
@@ -77,11 +78,13 @@ class MOTDataSet(DetDataset):
                  dataset_dir=None,
                  dataset_dir=None,
                  image_lists=[],
                  image_lists=[],
                  data_fields=['image'],
                  data_fields=['image'],
-                 sample_num=-1):
+                 sample_num=-1,
+                 repeat=1):
         super(MOTDataSet, self).__init__(
         super(MOTDataSet, self).__init__(
             dataset_dir=dataset_dir,
             dataset_dir=dataset_dir,
             data_fields=data_fields,
             data_fields=data_fields,
-            sample_num=sample_num)
+            sample_num=sample_num,
+            repeat=repeat)
         self.dataset_dir = dataset_dir
         self.dataset_dir = dataset_dir
         self.image_lists = image_lists
         self.image_lists = image_lists
         if isinstance(self.image_lists, str):
         if isinstance(self.image_lists, str):
@@ -243,8 +246,8 @@ class MCMOTDataSet(DetDataset):
         MCMOT datasets root directory following this:
         MCMOT datasets root directory following this:
             dataset/mot
             dataset/mot
             |——————image_lists
             |——————image_lists
-            |        |——————visdrone_mcmot.train
-            |        |——————visdrone_mcmot.val
+            |        |——————visdrone_mcmot.train  
+            |        |——————visdrone_mcmot.val   
             visdrone_mcmot
             visdrone_mcmot
             |——————images
             |——————images
             |        └——————train
             |        └——————train
@@ -348,10 +351,10 @@ class MCMOTDataSet(DetDataset):
         self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
         self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
         self.total_imgs = sum(self.num_imgs_each_data)
         self.total_imgs = sum(self.num_imgs_each_data)
 
 
-        # cname2cid and cid2cname
+        # cname2cid and cid2cname 
         cname2cid = {}
         cname2cid = {}
         if self.label_list is not None:
         if self.label_list is not None:
-            # if use label_list for multi source mix dataset,
+            # if use label_list for multi source mix dataset, 
             # please make sure label_list in the first sub_dataset at least.
             # please make sure label_list in the first sub_dataset at least.
             sub_dataset = self.image_lists[0].split('.')[0]
             sub_dataset = self.image_lists[0].split('.')[0]
             label_path = os.path.join(self.dataset_dir, sub_dataset,
             label_path = os.path.join(self.dataset_dir, sub_dataset,
@@ -461,7 +464,7 @@ class MOTImageFolder(DetDataset):
         video_file (str): path of the video file, default ''.
         video_file (str): path of the video file, default ''.
         frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
         frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
         dataset_dir (str): root directory for dataset.
         dataset_dir (str): root directory for dataset.
-        keep_ori_im (bool): whether to keep original image, default False.
+        keep_ori_im (bool): whether to keep original image, default False. 
             Set True when used during MOT model inference while saving
             Set True when used during MOT model inference while saving
             images or video, or used in DeepSORT.
             images or video, or used in DeepSORT.
     """
     """
@@ -474,6 +477,7 @@ class MOTImageFolder(DetDataset):
                  image_dir=None,
                  image_dir=None,
                  sample_num=-1,
                  sample_num=-1,
                  keep_ori_im=False,
                  keep_ori_im=False,
+                 anno_path=None,
                  **kwargs):
                  **kwargs):
         super(MOTImageFolder, self).__init__(
         super(MOTImageFolder, self).__init__(
             dataset_dir, image_dir, sample_num=sample_num)
             dataset_dir, image_dir, sample_num=sample_num)
@@ -483,6 +487,7 @@ class MOTImageFolder(DetDataset):
         self._imid2path = {}
         self._imid2path = {}
         self.roidbs = None
         self.roidbs = None
         self.frame_rate = frame_rate
         self.frame_rate = frame_rate
+        self.anno_path = anno_path
 
 
     def check_or_download_dataset(self):
     def check_or_download_dataset(self):
         return
         return
@@ -573,6 +578,9 @@ class MOTImageFolder(DetDataset):
                 "wrong or unsupported file format: {}".format(self.video_file)
                 "wrong or unsupported file format: {}".format(self.video_file)
         self.roidbs = self._load_video_images()
         self.roidbs = self._load_video_images()
 
 
+    def get_anno(self):
+        return self.anno_path
+
 
 
 def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
 def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
     return f.lower().endswith(extensions)
     return f.lower().endswith(extensions)

+ 7 - 4
paddlers/models/ppdet/data/source/voc.py

@@ -43,9 +43,10 @@ class VOCDataSet(DetDataset):
         label_list (str): if use_default_label is False, will load
         label_list (str): if use_default_label is False, will load
             mapping between category and class index.
             mapping between category and class index.
         allow_empty (bool): whether to load empty entry. False as default
         allow_empty (bool): whether to load empty entry. False as default
-        empty_ratio (float): the ratio of empty record number to total
-            record's, if empty_ratio is out of [0. ,1.), do not sample the
+        empty_ratio (float): the ratio of empty record number to total 
+            record's, if empty_ratio is out of [0. ,1.), do not sample the 
             records and use all the empty entries. 1. as default
             records and use all the empty entries. 1. as default
+        repeat (int): repeat times for dataset, use in benchmark.
     """
     """
 
 
     def __init__(self,
     def __init__(self,
@@ -56,13 +57,15 @@ class VOCDataSet(DetDataset):
                  sample_num=-1,
                  sample_num=-1,
                  label_list=None,
                  label_list=None,
                  allow_empty=False,
                  allow_empty=False,
-                 empty_ratio=1.):
+                 empty_ratio=1.,
+                 repeat=1):
         super(VOCDataSet, self).__init__(
         super(VOCDataSet, self).__init__(
             dataset_dir=dataset_dir,
             dataset_dir=dataset_dir,
             image_dir=image_dir,
             image_dir=image_dir,
             anno_path=anno_path,
             anno_path=anno_path,
             data_fields=data_fields,
             data_fields=data_fields,
-            sample_num=sample_num)
+            sample_num=sample_num,
+            repeat=repeat)
         self.label_list = label_list
         self.label_list = label_list
         self.allow_empty = allow_empty
         self.allow_empty = allow_empty
         self.empty_ratio = empty_ratio
         self.empty_ratio = empty_ratio

+ 2 - 0
paddlers/models/ppdet/data/transform/__init__.py

@@ -16,11 +16,13 @@ from . import operators
 from . import batch_operators
 from . import batch_operators
 from . import keypoint_operators
 from . import keypoint_operators
 from . import mot_operators
 from . import mot_operators
+from . import rotated_operators
 
 
 from .operators import *
 from .operators import *
 from .batch_operators import *
 from .batch_operators import *
 from .keypoint_operators import *
 from .keypoint_operators import *
 from .mot_operators import *
 from .mot_operators import *
+from .rotated_operators import *
 
 
 __all__ = []
 __all__ = []
 __all__ += registered_ops
 __all__ += registered_ops

+ 2 - 2
paddlers/models/ppdet/data/transform/autoaugment_utils.py

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
-# Reference:
+# Reference: 
 #   https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/autoaugment_utils.py
 #   https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/autoaugment_utils.py
 """AutoAugment util file."""
 """AutoAugment util file."""
 
 
@@ -65,7 +65,7 @@ def policy_v1():
         [('ShearY_Only_BBoxes', 0.8, 2), ('Flip_Only_BBoxes', 0.0, 10)],
         [('ShearY_Only_BBoxes', 0.8, 2), ('Flip_Only_BBoxes', 0.0, 10)],
         [('Equalize', 0.6, 10), ('TranslateX_BBox', 0.2, 2)],
         [('Equalize', 0.6, 10), ('TranslateX_BBox', 0.2, 2)],
         [('Color', 1.0, 10), ('TranslateY_Only_BBoxes', 0.4, 6)],
         [('Color', 1.0, 10), ('TranslateY_Only_BBoxes', 0.4, 6)],
-        [('Rotate_BBox', 0.8, 10), ('Contrast', 0.0, 10)],  # ,
+        [('Rotate_BBox', 0.8, 10), ('Contrast', 0.0, 10)],  # , 
         [('Cutout', 0.2, 2), ('Brightness', 0.8, 10)],
         [('Cutout', 0.2, 2), ('Brightness', 0.8, 10)],
         [('Color', 1.0, 6), ('Equalize', 1.0, 2)],
         [('Color', 1.0, 6), ('Equalize', 1.0, 2)],
         [('Cutout_Only_BBoxes', 0.4, 6), ('TranslateY_Only_BBoxes', 0.8, 2)],
         [('Cutout_Only_BBoxes', 0.4, 6), ('TranslateY_Only_BBoxes', 0.8, 2)],

+ 113 - 16
paddlers/models/ppdet/data/transform/batch_operators.py

@@ -47,6 +47,8 @@ __all__ = [
     'PadMaskBatch',
     'PadMaskBatch',
     'Gt2GFLTarget',
     'Gt2GFLTarget',
     'Gt2CenterNetTarget',
     'Gt2CenterNetTarget',
+    'PadGT',
+    'PadRGT',
 ]
 ]
 
 
 
 
@@ -108,12 +110,6 @@ class PadBatch(BaseOperator):
                 padding_segm[:, :im_h, :im_w] = gt_segm
                 padding_segm[:, :im_h, :im_w] = gt_segm
                 data['gt_segm'] = padding_segm
                 data['gt_segm'] = padding_segm
 
 
-            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
-                # ploy to rbox
-                polys = data['gt_rbox2poly']
-                rbox = bbox_utils.poly2rbox(polys)
-                data['gt_rbox'] = rbox
-
         return samples
         return samples
 
 
 
 
@@ -233,7 +229,7 @@ class Gt2YoloTarget(BaseOperator):
                     gi = int(gx * grid_w)
                     gi = int(gx * grid_w)
                     gj = int(gy * grid_h)
                     gj = int(gy * grid_h)
 
 
-                    # gtbox should be regresed in this layes if best match
+                    # gtbox should be regresed in this layes if best match 
                     # anchor index in anchor mask of this layer
                     # anchor index in anchor mask of this layer
                     if best_idx in mask:
                     if best_idx in mask:
                         best_n = mask.index(best_idx)
                         best_n = mask.index(best_idx)
@@ -253,7 +249,7 @@ class Gt2YoloTarget(BaseOperator):
                         # classification
                         # classification
                         target[best_n, 6 + cls, gj, gi] = 1.
                         target[best_n, 6 + cls, gj, gi] = 1.
 
 
-                    # For non-matched anchors, calculate the target if the iou
+                    # For non-matched anchors, calculate the target if the iou 
                     # between anchor and gt is larger than iou_thresh
                     # between anchor and gt is larger than iou_thresh
                     if self.iou_thresh < 1:
                     if self.iou_thresh < 1:
                         for idx, mask_i in enumerate(mask):
                         for idx, mask_i in enumerate(mask):
@@ -618,7 +614,7 @@ class Gt2TTFTarget(BaseOperator):
     """
     """
     Gt2TTFTarget
     Gt2TTFTarget
     Generate TTFNet targets by ground truth data
     Generate TTFNet targets by ground truth data
-
+    
     Args:
     Args:
         num_classes(int): the number of classes.
         num_classes(int): the number of classes.
         down_ratio(int): the down ratio from images to heatmap, 4 by default.
         down_ratio(int): the down ratio from images to heatmap, 4 by default.
@@ -980,12 +976,6 @@ class PadMaskBatch(BaseOperator):
                 padding_mask[:im_h, :im_w] = 1.
                 padding_mask[:im_h, :im_w] = 1.
                 data['pad_mask'] = padding_mask
                 data['pad_mask'] = padding_mask
 
 
-            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
-                # ploy to rbox
-                polys = data['gt_rbox2poly']
-                rbox = bbox_utils.poly2rbox(polys)
-                data['gt_rbox'] = rbox
-
         return samples
         return samples
 
 
 
 
@@ -994,7 +984,7 @@ class Gt2CenterNetTarget(BaseOperator):
     """Gt2CenterNetTarget
     """Gt2CenterNetTarget
     Genterate CenterNet targets by ground-truth
     Genterate CenterNet targets by ground-truth
     Args:
     Args:
-        down_ratio (int): The down sample ratio between output feature and
+        down_ratio (int): The down sample ratio between output feature and 
                           input image.
                           input image.
         num_classes (int): The number of classes, 80 by default.
         num_classes (int): The number of classes, 80 by default.
         max_objs (int): The maximum objects detected, 128 by default.
         max_objs (int): The maximum objects detected, 128 by default.
@@ -1068,3 +1058,110 @@ class Gt2CenterNetTarget(BaseOperator):
         sample['size'] = wh
         sample['size'] = wh
         sample['offset'] = reg
         sample['offset'] = reg
         return sample
         return sample
+
+
+@register_op
+class PadGT(BaseOperator):
+    """
+    Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
+    The num_max_boxes is the largest for batch.
+    Args:
+        return_gt_mask (bool): If true, return `pad_gt_mask`,
+                                1 means bbox, 0 means no bbox.
+    """
+
+    def __init__(self, return_gt_mask=True):
+        super(PadGT, self).__init__()
+        self.return_gt_mask = return_gt_mask
+
+    def __call__(self, samples, context=None):
+        num_max_boxes = max([len(s['gt_bbox']) for s in samples])
+        for sample in samples:
+            if self.return_gt_mask:
+                sample['pad_gt_mask'] = np.zeros(
+                    (num_max_boxes, 1), dtype=np.float32)
+            if num_max_boxes == 0:
+                continue
+
+            num_gt = len(sample['gt_bbox'])
+            pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
+            pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
+            if num_gt > 0:
+                pad_gt_class[:num_gt] = sample['gt_class']
+                pad_gt_bbox[:num_gt] = sample['gt_bbox']
+            sample['gt_class'] = pad_gt_class
+            sample['gt_bbox'] = pad_gt_bbox
+            # pad_gt_mask
+            if 'pad_gt_mask' in sample:
+                sample['pad_gt_mask'][:num_gt] = 1
+            # gt_score
+            if 'gt_score' in sample:
+                pad_gt_score = np.zeros((num_max_boxes, 1), dtype=np.float32)
+                if num_gt > 0:
+                    pad_gt_score[:num_gt] = sample['gt_score']
+                sample['gt_score'] = pad_gt_score
+            if 'is_crowd' in sample:
+                pad_is_crowd = np.zeros((num_max_boxes, 1), dtype=np.int32)
+                if num_gt > 0:
+                    pad_is_crowd[:num_gt] = sample['is_crowd']
+                sample['is_crowd'] = pad_is_crowd
+            if 'difficult' in sample:
+                pad_diff = np.zeros((num_max_boxes, 1), dtype=np.int32)
+                if num_gt > 0:
+                    pad_diff[:num_gt] = sample['difficult']
+                sample['difficult'] = pad_diff
+        return samples
+
+
+@register_op
+class PadRGT(BaseOperator):
+    """
+    Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
+    The num_max_boxes is the largest for batch.
+    Args:
+        return_gt_mask (bool): If true, return `pad_gt_mask`,
+                                1 means bbox, 0 means no bbox.
+    """
+
+    def __init__(self, return_gt_mask=True):
+        super(PadRGT, self).__init__()
+        self.return_gt_mask = return_gt_mask
+
+    def pad_field(self, sample, field, num_gt):
+        name, shape, dtype = field
+        if name in sample:
+            pad_v = np.zeros(shape, dtype=dtype)
+            if num_gt > 0:
+                pad_v[:num_gt] = sample[name]
+            sample[name] = pad_v
+
+    def __call__(self, samples, context=None):
+        num_max_boxes = max([len(s['gt_bbox']) for s in samples])
+        for sample in samples:
+            if self.return_gt_mask:
+                sample['pad_gt_mask'] = np.zeros(
+                    (num_max_boxes, 1), dtype=np.float32)
+            if num_max_boxes == 0:
+                continue
+
+            num_gt = len(sample['gt_bbox'])
+            pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
+            pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
+            if num_gt > 0:
+                pad_gt_class[:num_gt] = sample['gt_class']
+                pad_gt_bbox[:num_gt] = sample['gt_bbox']
+            sample['gt_class'] = pad_gt_class
+            sample['gt_bbox'] = pad_gt_bbox
+            # pad_gt_mask
+            if 'pad_gt_mask' in sample:
+                sample['pad_gt_mask'][:num_gt] = 1
+            # gt_score
+            names = ['gt_score', 'is_crowd', 'difficult', 'gt_poly', 'gt_rbox']
+            dims = [1, 1, 1, 8, 5]
+            dtypes = [np.float32, np.int32, np.int32, np.float32, np.float32]
+
+            for name, dim, dtype in zip(names, dims, dtypes):
+                self.pad_field(sample, [name, (num_max_boxes, dim), dtype],
+                               num_gt)
+
+        return samples

+ 9 - 9
paddlers/models/ppdet/data/transform/keypoint_operators.py

@@ -511,18 +511,18 @@ class RandomFlipHalfBodyTransform(object):
 
 
 @register_keypointop
 @register_keypointop
 class AugmentationbyInformantionDropping(object):
 class AugmentationbyInformantionDropping(object):
-    """AID: Augmentation by Informantion Dropping. Please refer
-        to https://arxiv.org/abs/2008.07139
-
+    """AID: Augmentation by Informantion Dropping. Please refer 
+        to https://arxiv.org/abs/2008.07139 
+    
     Args:
     Args:
         prob_cutout (float): The probability of the Cutout augmentation.
         prob_cutout (float): The probability of the Cutout augmentation.
         offset_factor (float): Offset factor of cutout center.
         offset_factor (float): Offset factor of cutout center.
-        num_patch (int): Number of patches to be cutout.
+        num_patch (int): Number of patches to be cutout.                       
         records(dict): the dict contained the image and coords
         records(dict): the dict contained the image and coords
-
+        
     Returns:
     Returns:
         records (dict): contain the image and coords after tranformed
         records (dict): contain the image and coords after tranformed
-
+    
     """
     """
 
 
     def __init__(self,
     def __init__(self,
@@ -698,8 +698,8 @@ class ToHeatmapsTopDown(object):
         tmp_size = self.sigma * 3
         tmp_size = self.sigma * 3
         feat_stride = image_size / self.hmsize
         feat_stride = image_size / self.hmsize
         for joint_id in range(num_joints):
         for joint_id in range(num_joints):
-            mu_x = int(joints[joint_id][0] + 0.5) / feat_stride[0]
-            mu_y = int(joints[joint_id][1] + 0.5) / feat_stride[1]
+            mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
+            mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
             # Check that any part of the gaussian is in-bounds
             # Check that any part of the gaussian is in-bounds
             ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
             ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
             br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
             br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
@@ -796,7 +796,7 @@ class ToHeatmapsTopDown_DARK(object):
 class ToHeatmapsTopDown_UDP(object):
 class ToHeatmapsTopDown_UDP(object):
     """This code is based on:
     """This code is based on:
         https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py
         https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py
-
+       
         to generate the gaussian heatmaps of keypoint for heatmap loss.
         to generate the gaussian heatmaps of keypoint for heatmap loss.
         ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
         ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
         for Human Pose Estimation (CVPR 2020).
         for Human Pose Estimation (CVPR 2020).

+ 4 - 4
paddlers/models/ppdet/data/transform/mot_operators.py

@@ -132,7 +132,7 @@ class LetterBoxResize(BaseOperator):
 
 
 @register_op
 @register_op
 class MOTRandomAffine(BaseOperator):
 class MOTRandomAffine(BaseOperator):
-    """
+    """ 
     Affine transform to image and coords to achieve the rotate, scale and
     Affine transform to image and coords to achieve the rotate, scale and
     shift effect for training image.
     shift effect for training image.
 
 
@@ -271,7 +271,7 @@ class Gt2JDETargetThres(BaseOperator):
         anchors (list): anchors of JDE model
         anchors (list): anchors of JDE model
         anchor_masks (list): anchor_masks of JDE model
         anchor_masks (list): anchor_masks of JDE model
         downsample_ratios (list): downsample ratios of JDE model
         downsample_ratios (list): downsample ratios of JDE model
-        ide_thresh (float): thresh of identity, higher is groud truth
+        ide_thresh (float): thresh of identity, higher is groud truth 
         fg_thresh (float): thresh of foreground, higher is foreground
         fg_thresh (float): thresh of foreground, higher is foreground
         bg_thresh (float): thresh of background, lower is background
         bg_thresh (float): thresh of background, lower is background
         num_classes (int): number of classes
         num_classes (int): number of classes
@@ -529,8 +529,8 @@ class Gt2FairMOTTarget(Gt2TTFTarget):
     Generate FairMOT targets by ground truth data.
     Generate FairMOT targets by ground truth data.
     Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
     Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
         1. the gaussian kernal radius to generate a heatmap.
         1. the gaussian kernal radius to generate a heatmap.
-        2. the targets needed during traing.
-
+        2. the targets needed during training.
+    
     Args:
     Args:
         num_classes(int): the number of classes.
         num_classes(int): the number of classes.
         down_ratio(int): the down ratio from images to heatmap, 4 by default.
         down_ratio(int): the down ratio from images to heatmap, 4 by default.

+ 500 - 113
paddlers/models/ppdet/data/transform/operators.py

@@ -41,7 +41,6 @@ import threading
 MUTEX = threading.Lock()
 MUTEX = threading.Lock()
 
 
 from paddlers.models.ppdet.core.workspace import serializable
 from paddlers.models.ppdet.core.workspace import serializable
-from paddlers.models.ppdet.modeling import bbox_utils
 from ..reader import Compose
 from ..reader import Compose
 
 
 from .op_helper import (satisfy_sample_constraint, filter_and_process,
 from .op_helper import (satisfy_sample_constraint, filter_and_process,
@@ -123,12 +122,15 @@ class Decode(BaseOperator):
                 sample['image'] = f.read()
                 sample['image'] = f.read()
             sample.pop('im_file')
             sample.pop('im_file')
 
 
-        im = sample['image']
-        data = np.frombuffer(im, dtype='uint8')
-        im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
-        if 'keep_ori_im' in sample and sample['keep_ori_im']:
-            sample['ori_image'] = im
-        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+        try:
+            im = sample['image']
+            data = np.frombuffer(im, dtype='uint8')
+            im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
+            if 'keep_ori_im' in sample and sample['keep_ori_im']:
+                sample['ori_image'] = im
+            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+        except:
+            im = sample['image']
 
 
         sample['image'] = im
         sample['image'] = im
         if 'h' not in sample:
         if 'h' not in sample:
@@ -357,19 +359,26 @@ class RandomErasingImage(BaseOperator):
 
 
 @register_op
 @register_op
 class NormalizeImage(BaseOperator):
 class NormalizeImage(BaseOperator):
-    def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1],
-                 is_scale=True):
+    def __init__(self,
+                 mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225],
+                 is_scale=True,
+                 norm_type='mean_std'):
         """
         """
         Args:
         Args:
             mean (list): the pixel mean
             mean (list): the pixel mean
             std (list): the pixel variance
             std (list): the pixel variance
+            is_scale (bool): scale the pixel to [0,1]
+            norm_type (str): type in ['mean_std', 'none']
         """
         """
         super(NormalizeImage, self).__init__()
         super(NormalizeImage, self).__init__()
         self.mean = mean
         self.mean = mean
         self.std = std
         self.std = std
         self.is_scale = is_scale
         self.is_scale = is_scale
+        self.norm_type = norm_type
         if not (isinstance(self.mean, list) and isinstance(self.std, list) and
         if not (isinstance(self.mean, list) and isinstance(self.std, list) and
-                isinstance(self.is_scale, bool)):
+                isinstance(self.is_scale, bool) and
+                self.norm_type in ['mean_std', 'none']):
             raise TypeError("{}: input type is invalid.".format(self))
             raise TypeError("{}: input type is invalid.".format(self))
         from functools import reduce
         from functools import reduce
         if reduce(lambda x, y: x * y, self.std) == 0:
         if reduce(lambda x, y: x * y, self.std) == 0:
@@ -378,20 +387,20 @@ class NormalizeImage(BaseOperator):
     def apply(self, sample, context=None):
     def apply(self, sample, context=None):
         """Normalize the image.
         """Normalize the image.
         Operators:
         Operators:
-            1.(optional) Scale the image to [0,1]
-            2. Each pixel minus mean and is divided by std
+            1.(optional) Scale the pixel to [0,1]
+            2.(optional) Each pixel minus mean and is divided by std
         """
         """
         im = sample['image']
         im = sample['image']
         im = im.astype(np.float32, copy=False)
         im = im.astype(np.float32, copy=False)
-        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
-        std = np.array(self.std)[np.newaxis, np.newaxis, :]
-
         if self.is_scale:
         if self.is_scale:
-            im = im / 255.0
-
-        im -= mean
-        im /= std
-
+            scale = 1.0 / 255.0
+            im *= scale
+
+        if self.norm_type == 'mean_std':
+            mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+            std = np.array(self.std)[np.newaxis, np.newaxis, :]
+            im -= mean
+            im /= std
         sample['image'] = im
         sample['image'] = im
         return sample
         return sample
 
 
@@ -448,6 +457,10 @@ class GridMask(BaseOperator):
 @register_op
 @register_op
 class RandomDistort(BaseOperator):
 class RandomDistort(BaseOperator):
     """Random color distortion.
     """Random color distortion.
+    Note:
+        The 'probability' in [lower, upper, probability] is the probability of not using this transformation,
+        not the probability of using this transformation. And this only applies in this operator(RandomDistort),
+        'probability' in other BaseOperator means the probability of using that transformation.
     Args:
     Args:
         hue (list): hue settings. in [lower, upper, probability] format.
         hue (list): hue settings. in [lower, upper, probability] format.
         saturation (list): saturation settings. in [lower, upper, probability] format.
         saturation (list): saturation settings. in [lower, upper, probability] format.
@@ -657,18 +670,6 @@ class RandomFlip(BaseOperator):
         bbox[:, 2] = width - oldx1
         bbox[:, 2] = width - oldx1
         return bbox
         return bbox
 
 
-    def apply_rbox(self, bbox, width):
-        oldx1 = bbox[:, 0].copy()
-        oldx2 = bbox[:, 2].copy()
-        oldx3 = bbox[:, 4].copy()
-        oldx4 = bbox[:, 6].copy()
-        bbox[:, 0] = width - oldx1
-        bbox[:, 2] = width - oldx2
-        bbox[:, 4] = width - oldx3
-        bbox[:, 6] = width - oldx4
-        bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
-        return bbox
-
     def apply(self, sample, context=None):
     def apply(self, sample, context=None):
         """Filp the image and bounding box.
         """Filp the image and bounding box.
         Operators:
         Operators:
@@ -700,10 +701,6 @@ class RandomFlip(BaseOperator):
             if 'gt_segm' in sample and sample['gt_segm'].any():
             if 'gt_segm' in sample and sample['gt_segm'].any():
                 sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
                 sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
 
 
-            if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
-                sample['gt_rbox2poly'] = self.apply_rbox(sample['gt_rbox2poly'],
-                                                         width)
-
             sample['flipped'] = True
             sample['flipped'] = True
             sample['image'] = im
             sample['image'] = im
         return sample
         return sample
@@ -713,7 +710,7 @@ class RandomFlip(BaseOperator):
 class Resize(BaseOperator):
 class Resize(BaseOperator):
     def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
     def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
         """
         """
-        Resize image to target size. if keep_ratio is True,
+        Resize image to target size. if keep_ratio is True, 
         resize the image's long side to the maximum of target_size
         resize the image's long side to the maximum of target_size
         if keep_ratio is False, resize the image to target size(h, w)
         if keep_ratio is False, resize the image to target size(h, w)
         Args:
         Args:
@@ -824,7 +821,7 @@ class Resize(BaseOperator):
             im_scale_x = resize_w / im_shape[1]
             im_scale_x = resize_w / im_shape[1]
 
 
         im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
         im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
-        sample['image'] = im
+        sample['image'] = im.astype(np.float32)
         sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
         sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
         if 'scale_factor' in sample:
         if 'scale_factor' in sample:
             scale_factor = sample['scale_factor']
             scale_factor = sample['scale_factor']
@@ -841,16 +838,6 @@ class Resize(BaseOperator):
                                                 [im_scale_x, im_scale_y],
                                                 [im_scale_x, im_scale_y],
                                                 [resize_w, resize_h])
                                                 [resize_w, resize_h])
 
 
-        # apply rbox
-        if 'gt_rbox2poly' in sample:
-            if np.array(sample['gt_rbox2poly']).shape[1] != 8:
-                logger.warning(
-                    "gt_rbox2poly's length shoule be 8, but actually is {}".
-                    format(len(sample['gt_rbox2poly'])))
-            sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
-                                                     [im_scale_x, im_scale_y],
-                                                     [resize_w, resize_h])
-
         # apply polygon
         # apply polygon
         if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
         if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
             sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
             sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
@@ -1054,7 +1041,7 @@ class CropWithSampling(BaseOperator):
            [max sample, max trial, min scale, max scale,
            [max sample, max trial, min scale, max scale,
             min aspect ratio, max aspect ratio,
             min aspect ratio, max aspect ratio,
             min overlap, max overlap]
             min overlap, max overlap]
-            avoid_no_bbox (bool): whether to to avoid the
+            avoid_no_bbox (bool): whether to avoid the
                                   situation where the box does not appear.
                                   situation where the box does not appear.
         """
         """
         super(CropWithSampling, self).__init__()
         super(CropWithSampling, self).__init__()
@@ -1145,7 +1132,7 @@ class CropWithDataAchorSampling(BaseOperator):
             das_anchor_scales (list[float]): a list of anchor scales in data
             das_anchor_scales (list[float]): a list of anchor scales in data
                 anchor smapling.
                 anchor smapling.
             min_size (float): minimum size of sampled bbox.
             min_size (float): minimum size of sampled bbox.
-            avoid_no_bbox (bool): whether to to avoid the
+            avoid_no_bbox (bool): whether to avoid the
                                   situation where the box does not appear.
                                   situation where the box does not appear.
         """
         """
         super(CropWithDataAchorSampling, self).__init__()
         super(CropWithDataAchorSampling, self).__init__()
@@ -1504,6 +1491,11 @@ class RandomCrop(BaseOperator):
                 if 'is_crowd' in sample:
                 if 'is_crowd' in sample:
                     sample['is_crowd'] = np.take(
                     sample['is_crowd'] = np.take(
                         sample['is_crowd'], valid_ids, axis=0)
                         sample['is_crowd'], valid_ids, axis=0)
+
+                if 'difficult' in sample:
+                    sample['difficult'] = np.take(
+                        sample['difficult'], valid_ids, axis=0)
+
                 return sample
                 return sample
 
 
         return sample
         return sample
@@ -1604,7 +1596,7 @@ class RandomScaledCrop(BaseOperator):
 @register_op
 @register_op
 class Cutmix(BaseOperator):
 class Cutmix(BaseOperator):
     def __init__(self, alpha=1.5, beta=1.5):
     def __init__(self, alpha=1.5, beta=1.5):
-        """
+        """ 
         CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
         CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
         Cutmix image and gt_bbbox/gt_score
         Cutmix image and gt_bbbox/gt_score
         Args:
         Args:
@@ -1747,7 +1739,7 @@ class Mixup(BaseOperator):
             gt_score2 = np.ones_like(sample[1]['gt_class'])
             gt_score2 = np.ones_like(sample[1]['gt_class'])
             gt_score = np.concatenate(
             gt_score = np.concatenate(
                 (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
                 (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
-            result['gt_score'] = gt_score
+            result['gt_score'] = gt_score.astype('float32')
         if 'is_crowd' in sample[0]:
         if 'is_crowd' in sample[0]:
             is_crowd1 = sample[0]['is_crowd']
             is_crowd1 = sample[0]['is_crowd']
             is_crowd2 = sample[1]['is_crowd']
             is_crowd2 = sample[1]['is_crowd']
@@ -2029,13 +2021,14 @@ class Pad(BaseOperator):
         if self.size:
         if self.size:
             h, w = self.size
             h, w = self.size
             assert (
             assert (
-                im_h < h and im_w < w
+                im_h <= h and im_w <= w
             ), '(h, w) of target size should be greater than (im_h, im_w)'
             ), '(h, w) of target size should be greater than (im_h, im_w)'
         else:
         else:
-            h = np.ceil(im_h / self.size_divisor) * self.size_divisor
-            w = np.ceil(im_w / self.size_divisor) * self.size_divisor
+            h = int(np.ceil(im_h / self.size_divisor) * self.size_divisor)
+            w = int(np.ceil(im_w / self.size_divisor) * self.size_divisor)
 
 
         if h == im_h and w == im_w:
         if h == im_h and w == im_w:
+            sample['image'] = im.astype(np.float32)
             return sample
             return sample
 
 
         if self.pad_mode == -1:
         if self.pad_mode == -1:
@@ -2106,44 +2099,30 @@ class Poly2Mask(BaseOperator):
 
 
 
 
 @register_op
 @register_op
-class Rbox2Poly(BaseOperator):
-    """
-    Convert rbbox format to poly format.
+class AugmentHSV(BaseOperator):
+    """ 
+    Augment the SV channel of image data.
+    Args:
+        fraction (float): the fraction for augment. Default: 0.5.
+        is_bgr (bool): whether the image is BGR mode. Default: True.
+        hgain (float): H channel gains
+        sgain (float): S channel gains
+        vgain (float): V channel gains
     """
     """
 
 
-    def __init__(self):
-        super(Rbox2Poly, self).__init__()
-
-    def apply(self, sample, context=None):
-        assert 'gt_rbox' in sample
-        assert sample['gt_rbox'].shape[1] == 5
-        rrects = sample['gt_rbox']
-        x_ctr = rrects[:, 0]
-        y_ctr = rrects[:, 1]
-        width = rrects[:, 2]
-        height = rrects[:, 3]
-        x1 = x_ctr - width / 2.0
-        y1 = y_ctr - height / 2.0
-        x2 = x_ctr + width / 2.0
-        y2 = y_ctr + height / 2.0
-        sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
-        polys = bbox_utils.rbox2poly_np(rrects)
-        sample['gt_rbox2poly'] = polys
-        return sample
-
-
-@register_op
-class AugmentHSV(BaseOperator):
-    def __init__(self, fraction=0.50, is_bgr=True):
-        """
-        Augment the SV channel of image data.
-        Args:
-            fraction (float): the fraction for augment. Default: 0.5.
-            is_bgr (bool): whether the image is BGR mode. Default: True.
-        """
+    def __init__(self,
+                 fraction=0.50,
+                 is_bgr=True,
+                 hgain=None,
+                 sgain=None,
+                 vgain=None):
         super(AugmentHSV, self).__init__()
         super(AugmentHSV, self).__init__()
         self.fraction = fraction
         self.fraction = fraction
         self.is_bgr = is_bgr
         self.is_bgr = is_bgr
+        self.hgain = hgain
+        self.sgain = sgain
+        self.vgain = vgain
+        self.use_hsvgain = False if hgain is None else True
 
 
     def apply(self, sample, context=None):
     def apply(self, sample, context=None):
         img = sample['image']
         img = sample['image']
@@ -2151,27 +2130,39 @@ class AugmentHSV(BaseOperator):
             img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
             img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
         else:
         else:
             img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
             img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
-        S = img_hsv[:, :, 1].astype(np.float32)
-        V = img_hsv[:, :, 2].astype(np.float32)
 
 
-        a = (random.random() * 2 - 1) * self.fraction + 1
-        S *= a
-        if a > 1:
-            np.clip(S, a_min=0, a_max=255, out=S)
+        if self.use_hsvgain:
+            hsv_augs = np.random.uniform(
+                -1, 1, 3) * [self.hgain, self.sgain, self.vgain]
+            # random selection of h, s, v
+            hsv_augs *= np.random.randint(0, 2, 3)
+            img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
+            img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
+            img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
+
+        else:
+            S = img_hsv[:, :, 1].astype(np.float32)
+            V = img_hsv[:, :, 2].astype(np.float32)
+
+            a = (random.random() * 2 - 1) * self.fraction + 1
+            S *= a
+            if a > 1:
+                np.clip(S, a_min=0, a_max=255, out=S)
+
+            a = (random.random() * 2 - 1) * self.fraction + 1
+            V *= a
+            if a > 1:
+                np.clip(V, a_min=0, a_max=255, out=V)
 
 
-        a = (random.random() * 2 - 1) * self.fraction + 1
-        V *= a
-        if a > 1:
-            np.clip(V, a_min=0, a_max=255, out=V)
+            img_hsv[:, :, 1] = S.astype(np.uint8)
+            img_hsv[:, :, 2] = V.astype(np.uint8)
 
 
-        img_hsv[:, :, 1] = S.astype(np.uint8)
-        img_hsv[:, :, 2] = V.astype(np.uint8)
         if self.is_bgr:
         if self.is_bgr:
             cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
             cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
         else:
         else:
             cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
             cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
 
 
-        sample['image'] = img
+        sample['image'] = img.astype(np.float32)
         return sample
         return sample
 
 
 
 
@@ -2223,7 +2214,7 @@ class RandomResizeCrop(BaseOperator):
         'long', resize the image's long side to the maximum of target_size, if keep_ratio is
         'long', resize the image's long side to the maximum of target_size, if keep_ratio is
         True and mode is 'short', resize the image's short side to the minimum of target_size.
         True and mode is 'short', resize the image's short side to the minimum of target_size.
         cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
         cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
-        mode (str): resize mode, `long` or `short`. Details see resizes.
+        mode (str): resize mode, `long` or `short`. Details see resizes. 
         prob (float): probability of this op.
         prob (float): probability of this op.
         keep_ratio (bool): whether keep_ratio or not, default true
         keep_ratio (bool): whether keep_ratio or not, default true
         interp (int): the interpolation method
         interp (int): the interpolation method
@@ -2425,16 +2416,6 @@ class RandomResizeCrop(BaseOperator):
                                                 [im_scale_x, im_scale_y],
                                                 [im_scale_x, im_scale_y],
                                                 [resize_w, resize_h])
                                                 [resize_w, resize_h])
 
 
-        # apply rbox
-        if 'gt_rbox2poly' in sample:
-            if np.array(sample['gt_rbox2poly']).shape[1] != 8:
-                logger.warn(
-                    "gt_rbox2poly's length shoule be 8, but actually is {}".
-                    format(len(sample['gt_rbox2poly'])))
-            sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
-                                                     [im_scale_x, im_scale_y],
-                                                     [resize_w, resize_h])
-
         # apply polygon
         # apply polygon
         if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
         if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
             sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
             sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
@@ -2892,7 +2873,7 @@ class FlipWarpAffine(BaseOperator):
         """FlipWarpAffine
         """FlipWarpAffine
         1. Random Crop
         1. Random Crop
         2. Flip the image horizontal
         2. Flip the image horizontal
-        3. Warp affine the image
+        3. Warp affine the image 
         """
         """
         super(FlipWarpAffine, self).__init__()
         super(FlipWarpAffine, self).__init__()
         self.keep_res = keep_res
         self.keep_res = keep_res
@@ -3013,3 +2994,409 @@ class CenterRandColor(BaseOperator):
             img = func(img, img_gray)
             img = func(img, img_gray)
         sample['image'] = img
         sample['image'] = img
         return sample
         return sample
+
+
+@register_op
+class Mosaic(BaseOperator):
+    """ Mosaic operator for image and gt_bboxes
+    The code is based on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/datasets/mosaicdetection.py
+
+    1. get mosaic coords
+    2. clip bbox and get mosaic_labels
+    3. random_affine augment
+    4. Mixup augment as copypaste (optinal), not used in tiny/nano
+
+    Args:
+        prob (float): probability of using Mosaic, 1.0 as default
+        input_dim (list[int]): input shape
+        degrees (list[2]): the rotate range to apply, transform range is [min, max]
+        translate (list[2]): the translate range to apply, transform range is [min, max]
+        scale (list[2]): the scale range to apply, transform range is [min, max]
+        shear (list[2]): the shear range to apply, transform range is [min, max]
+        enable_mixup (bool): whether to enable Mixup or not
+        mixup_prob (float): probability of using Mixup, 1.0 as default
+        mixup_scale (list[int]): scale range of Mixup
+        remove_outside_box (bool): whether remove outside boxes, False as
+            default in COCO dataset, True in MOT dataset
+    """
+
+    def __init__(self,
+                 prob=1.0,
+                 input_dim=[640, 640],
+                 degrees=[-10, 10],
+                 translate=[-0.1, 0.1],
+                 scale=[0.1, 2],
+                 shear=[-2, 2],
+                 enable_mixup=True,
+                 mixup_prob=1.0,
+                 mixup_scale=[0.5, 1.5],
+                 remove_outside_box=False):
+        super(Mosaic, self).__init__()
+        self.prob = prob
+        if isinstance(input_dim, Integral):
+            input_dim = [input_dim, input_dim]
+        self.input_dim = input_dim
+        self.degrees = degrees
+        self.translate = translate
+        self.scale = scale
+        self.shear = shear
+        self.enable_mixup = enable_mixup
+        self.mixup_prob = mixup_prob
+        self.mixup_scale = mixup_scale
+        self.remove_outside_box = remove_outside_box
+
+    def get_mosaic_coords(self, mosaic_idx, xc, yc, w, h, input_h, input_w):
+        # (x1, y1, x2, y2) means coords in large image,
+        # small_coords means coords in small image in mosaic aug.
+        if mosaic_idx == 0:
+            # top left
+            x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
+            small_coords = w - (x2 - x1), h - (y2 - y1), w, h
+        elif mosaic_idx == 1:
+            # top right
+            x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
+            small_coords = 0, h - (y2 - y1), min(w, x2 - x1), h
+        elif mosaic_idx == 2:
+            # bottom left
+            x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
+            small_coords = w - (x2 - x1), 0, w, min(y2 - y1, h)
+        elif mosaic_idx == 3:
+            # bottom right
+            x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2,
+                                                                   yc + h)
+            small_coords = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
+
+        return (x1, y1, x2, y2), small_coords
+
+    def random_affine_augment(self,
+                              img,
+                              labels=[],
+                              input_dim=[640, 640],
+                              degrees=[-10, 10],
+                              scales=[0.1, 2],
+                              shears=[-2, 2],
+                              translates=[-0.1, 0.1]):
+        # random rotation and scale
+        degree = random.uniform(degrees[0], degrees[1])
+        scale = random.uniform(scales[0], scales[1])
+        assert scale > 0, "Argument scale should be positive."
+        R = cv2.getRotationMatrix2D(angle=degree, center=(0, 0), scale=scale)
+        M = np.ones([2, 3])
+
+        # random shear
+        shear = random.uniform(shears[0], shears[1])
+        shear_x = math.tan(shear * math.pi / 180)
+        shear_y = math.tan(shear * math.pi / 180)
+        M[0] = R[0] + shear_y * R[1]
+        M[1] = R[1] + shear_x * R[0]
+
+        # random translation
+        translate = random.uniform(translates[0], translates[1])
+        translation_x = translate * input_dim[0]
+        translation_y = translate * input_dim[1]
+        M[0, 2] = translation_x
+        M[1, 2] = translation_y
+
+        # warpAffine
+        img = cv2.warpAffine(
+            img, M, dsize=tuple(input_dim), borderValue=(114, 114, 114))
+
+        num_gts = len(labels)
+        if num_gts > 0:
+            # warp corner points
+            corner_points = np.ones((4 * num_gts, 3))
+            corner_points[:, :2] = labels[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+                4 * num_gts, 2)  # x1y1, x2y2, x1y2, x2y1
+            # apply affine transform
+            corner_points = corner_points @M.T
+            corner_points = corner_points.reshape(num_gts, 8)
+
+            # create new boxes
+            corner_xs = corner_points[:, 0::2]
+            corner_ys = corner_points[:, 1::2]
+            new_bboxes = np.concatenate((corner_xs.min(1), corner_ys.min(1),
+                                         corner_xs.max(1), corner_ys.max(1)))
+            new_bboxes = new_bboxes.reshape(4, num_gts).T
+
+            # clip boxes
+            new_bboxes[:, 0::2] = np.clip(new_bboxes[:, 0::2], 0, input_dim[0])
+            new_bboxes[:, 1::2] = np.clip(new_bboxes[:, 1::2], 0, input_dim[1])
+            labels[:, :4] = new_bboxes
+
+        return img, labels
+
+    def __call__(self, sample, context=None):
+        if not isinstance(sample, Sequence):
+            return sample
+
+        assert len(
+            sample) == 5, "Mosaic needs 5 samples, 4 for mosaic and 1 for mixup."
+        if np.random.uniform(0., 1.) > self.prob:
+            return sample[0]
+
+        mosaic_gt_bbox, mosaic_gt_class, mosaic_is_crowd, mosaic_difficult = [], [], [], []
+        input_h, input_w = self.input_dim
+        yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
+        xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
+        mosaic_img = np.full((input_h * 2, input_w * 2, 3), 114, dtype=np.uint8)
+
+        # 1. get mosaic coords
+        for mosaic_idx, sp in enumerate(sample[:4]):
+            img = sp['image']
+            gt_bbox = sp['gt_bbox']
+            h0, w0 = img.shape[:2]
+            scale = min(1. * input_h / h0, 1. * input_w / w0)
+            img = cv2.resize(
+                img, (int(w0 * scale), int(h0 * scale)),
+                interpolation=cv2.INTER_LINEAR)
+            (h, w, c) = img.shape[:3]
+
+            # suffix l means large image, while s means small image in mosaic aug.
+            (l_x1, l_y1, l_x2, l_y2), (
+                s_x1, s_y1, s_x2, s_y2) = self.get_mosaic_coords(
+                    mosaic_idx, xc, yc, w, h, input_h, input_w)
+
+            mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
+            padw, padh = l_x1 - s_x1, l_y1 - s_y1
+
+            # Normalized xywh to pixel xyxy format
+            _gt_bbox = gt_bbox.copy()
+            if len(gt_bbox) > 0:
+                _gt_bbox[:, 0] = scale * gt_bbox[:, 0] + padw
+                _gt_bbox[:, 1] = scale * gt_bbox[:, 1] + padh
+                _gt_bbox[:, 2] = scale * gt_bbox[:, 2] + padw
+                _gt_bbox[:, 3] = scale * gt_bbox[:, 3] + padh
+
+            mosaic_gt_bbox.append(_gt_bbox)
+            mosaic_gt_class.append(sp['gt_class'])
+            if 'is_crowd' in sp:
+                mosaic_is_crowd.append(sp['is_crowd'])
+            if 'difficult' in sp:
+                mosaic_difficult.append(sp['difficult'])
+
+        # 2. clip bbox and get mosaic_labels([gt_bbox, gt_class, is_crowd])
+        if len(mosaic_gt_bbox):
+            mosaic_gt_bbox = np.concatenate(mosaic_gt_bbox, 0)
+            mosaic_gt_class = np.concatenate(mosaic_gt_class, 0)
+            if mosaic_is_crowd:
+                mosaic_is_crowd = np.concatenate(mosaic_is_crowd, 0)
+                mosaic_labels = np.concatenate([
+                    mosaic_gt_bbox,
+                    mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
+                    mosaic_is_crowd.astype(mosaic_gt_bbox.dtype)
+                ], 1)
+            elif mosaic_difficult:
+                mosaic_difficult = np.concatenate(mosaic_difficult, 0)
+                mosaic_labels = np.concatenate([
+                    mosaic_gt_bbox,
+                    mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
+                    mosaic_difficult.astype(mosaic_gt_bbox.dtype)
+                ], 1)
+            else:
+                mosaic_labels = np.concatenate([
+                    mosaic_gt_bbox, mosaic_gt_class.astype(mosaic_gt_bbox.dtype)
+                ], 1)
+            if self.remove_outside_box:
+                # for MOT dataset
+                flag1 = mosaic_gt_bbox[:, 0] < 2 * input_w
+                flag2 = mosaic_gt_bbox[:, 2] > 0
+                flag3 = mosaic_gt_bbox[:, 1] < 2 * input_h
+                flag4 = mosaic_gt_bbox[:, 3] > 0
+                flag_all = flag1 * flag2 * flag3 * flag4
+                mosaic_labels = mosaic_labels[flag_all]
+            else:
+                mosaic_labels[:, 0] = np.clip(mosaic_labels[:, 0], 0,
+                                              2 * input_w)
+                mosaic_labels[:, 1] = np.clip(mosaic_labels[:, 1], 0,
+                                              2 * input_h)
+                mosaic_labels[:, 2] = np.clip(mosaic_labels[:, 2], 0,
+                                              2 * input_w)
+                mosaic_labels[:, 3] = np.clip(mosaic_labels[:, 3], 0,
+                                              2 * input_h)
+        else:
+            mosaic_labels = np.zeros((1, 6))
+
+        # 3. random_affine augment
+        mosaic_img, mosaic_labels = self.random_affine_augment(
+            mosaic_img,
+            mosaic_labels,
+            input_dim=self.input_dim,
+            degrees=self.degrees,
+            translates=self.translate,
+            scales=self.scale,
+            shears=self.shear)
+
+        # 4. Mixup augment as copypaste, https://arxiv.org/abs/2012.07177
+        # optinal, not used(enable_mixup=False) in tiny/nano
+        if (self.enable_mixup and not len(mosaic_labels) == 0 and
+                random.random() < self.mixup_prob):
+            sample_mixup = sample[4]
+            mixup_img = sample_mixup['image']
+            if 'is_crowd' in sample_mixup:
+                cp_labels = np.concatenate([
+                    sample_mixup['gt_bbox'],
+                    sample_mixup['gt_class'].astype(mosaic_labels.dtype),
+                    sample_mixup['is_crowd'].astype(mosaic_labels.dtype)
+                ], 1)
+            elif 'difficult' in sample_mixup:
+                cp_labels = np.concatenate([
+                    sample_mixup['gt_bbox'],
+                    sample_mixup['gt_class'].astype(mosaic_labels.dtype),
+                    sample_mixup['difficult'].astype(mosaic_labels.dtype)
+                ], 1)
+            else:
+                cp_labels = np.concatenate([
+                    sample_mixup['gt_bbox'],
+                    sample_mixup['gt_class'].astype(mosaic_labels.dtype)
+                ], 1)
+            mosaic_img, mosaic_labels = self.mixup_augment(
+                mosaic_img, mosaic_labels, self.input_dim, cp_labels, mixup_img)
+
+        sample0 = sample[0]
+        sample0['image'] = mosaic_img.astype(np.uint8)  # can not be float32
+        sample0['h'] = float(mosaic_img.shape[0])
+        sample0['w'] = float(mosaic_img.shape[1])
+        sample0['im_shape'][0] = sample0['h']
+        sample0['im_shape'][1] = sample0['w']
+        sample0['gt_bbox'] = mosaic_labels[:, :4].astype(np.float32)
+        sample0['gt_class'] = mosaic_labels[:, 4:5].astype(np.float32)
+        if 'is_crowd' in sample[0]:
+            sample0['is_crowd'] = mosaic_labels[:, 5:6].astype(np.float32)
+        if 'difficult' in sample[0]:
+            sample0['difficult'] = mosaic_labels[:, 5:6].astype(np.float32)
+        return sample0
+
+    def mixup_augment(self, origin_img, origin_labels, input_dim, cp_labels,
+                      img):
+        jit_factor = random.uniform(*self.mixup_scale)
+        FLIP = random.uniform(0, 1) > 0.5
+        if len(img.shape) == 3:
+            cp_img = np.ones(
+                (input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114
+        else:
+            cp_img = np.ones(input_dim, dtype=np.uint8) * 114
+
+        cp_scale_ratio = min(input_dim[0] / img.shape[0],
+                             input_dim[1] / img.shape[1])
+        resized_img = cv2.resize(
+            img, (int(img.shape[1] * cp_scale_ratio),
+                  int(img.shape[0] * cp_scale_ratio)),
+            interpolation=cv2.INTER_LINEAR)
+
+        cp_img[:int(img.shape[0] * cp_scale_ratio), :int(img.shape[
+            1] * cp_scale_ratio)] = resized_img
+
+        cp_img = cv2.resize(cp_img, (int(cp_img.shape[1] * jit_factor),
+                                     int(cp_img.shape[0] * jit_factor)))
+        cp_scale_ratio *= jit_factor
+
+        if FLIP:
+            cp_img = cp_img[:, ::-1, :]
+
+        origin_h, origin_w = cp_img.shape[:2]
+        target_h, target_w = origin_img.shape[:2]
+        padded_img = np.zeros(
+            (max(origin_h, target_h), max(origin_w, target_w), 3),
+            dtype=np.uint8)
+        padded_img[:origin_h, :origin_w] = cp_img
+
+        x_offset, y_offset = 0, 0
+        if padded_img.shape[0] > target_h:
+            y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
+        if padded_img.shape[1] > target_w:
+            x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
+        padded_cropped_img = padded_img[y_offset:y_offset + target_h, x_offset:
+                                        x_offset + target_w]
+
+        # adjust boxes
+        cp_bboxes_origin_np = cp_labels[:, :4].copy()
+        cp_bboxes_origin_np[:, 0::2] = np.clip(cp_bboxes_origin_np[:, 0::2] *
+                                               cp_scale_ratio, 0, origin_w)
+        cp_bboxes_origin_np[:, 1::2] = np.clip(cp_bboxes_origin_np[:, 1::2] *
+                                               cp_scale_ratio, 0, origin_h)
+
+        if FLIP:
+            cp_bboxes_origin_np[:, 0::2] = (
+                origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1])
+        cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
+        if self.remove_outside_box:
+            # for MOT dataset
+            cp_bboxes_transformed_np[:, 0::2] -= x_offset
+            cp_bboxes_transformed_np[:, 1::2] -= y_offset
+        else:
+            cp_bboxes_transformed_np[:, 0::2] = np.clip(
+                cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w)
+            cp_bboxes_transformed_np[:, 1::2] = np.clip(
+                cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h)
+
+        cls_labels = cp_labels[:, 4:5].copy()
+        box_labels = cp_bboxes_transformed_np
+        if cp_labels.shape[-1] == 6:
+            crd_labels = cp_labels[:, 5:6].copy()
+            labels = np.hstack((box_labels, cls_labels, crd_labels))
+        else:
+            labels = np.hstack((box_labels, cls_labels))
+        if self.remove_outside_box:
+            labels = labels[labels[:, 0] < target_w]
+            labels = labels[labels[:, 2] > 0]
+            labels = labels[labels[:, 1] < target_h]
+            labels = labels[labels[:, 3] > 0]
+
+        origin_labels = np.vstack((origin_labels, labels))
+        origin_img = origin_img.astype(np.float32)
+        origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(
+            np.float32)
+
+        return origin_img.astype(np.uint8), origin_labels
+
+
+@register_op
+class PadResize(BaseOperator):
+    """ PadResize for image and gt_bbbox
+
+    Args:
+        target_size (list[int]): input shape
+        fill_value (float): pixel value of padded image
+    """
+
+    def __init__(self, target_size, fill_value=114):
+        super(PadResize, self).__init__()
+        if isinstance(target_size, Integral):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+        self.fill_value = fill_value
+
+    def _resize(self, img, bboxes, labels):
+        ratio = min(self.target_size[0] / img.shape[0],
+                    self.target_size[1] / img.shape[1])
+        w, h = int(img.shape[1] * ratio), int(img.shape[0] * ratio)
+        resized_img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR)
+
+        if len(bboxes) > 0:
+            bboxes *= ratio
+            mask = np.minimum(bboxes[:, 2] - bboxes[:, 0],
+                              bboxes[:, 3] - bboxes[:, 1]) > 1
+            bboxes = bboxes[mask]
+            labels = labels[mask]
+        return resized_img, bboxes, labels
+
+    def _pad(self, img):
+        h, w, _ = img.shape
+        if h == self.target_size[0] and w == self.target_size[1]:
+            return img
+        padded_img = np.full(
+            (self.target_size[0], self.target_size[1], 3),
+            self.fill_value,
+            dtype=np.uint8)
+        padded_img[:h, :w] = img
+        return padded_img
+
+    def apply(self, sample, context=None):
+        image = sample['image']
+        bboxes = sample['gt_bbox']
+        labels = sample['gt_class']
+        image, bboxes, labels = self._resize(image, bboxes, labels)
+        sample['image'] = self._pad(image).astype(np.float32)
+        sample['gt_bbox'] = bboxes
+        sample['gt_class'] = labels
+        return sample

+ 479 - 0
paddlers/models/ppdet/data/transform/rotated_operators.py

@@ -0,0 +1,479 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+
+from numbers import Number, Integral
+
+import cv2
+import numpy as np
+import math
+import copy
+
+from .operators import register_op, BaseOperator
+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
+from paddlers.models.ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+@register_op
+class RRotate(BaseOperator):
+    """ Rotate Image, Polygon, Box
+
+    Args:
+        scale (float): rotate scale
+        angle (float): rotate angle
+        fill_value (int, tuple): fill color
+        auto_bound (bool): whether auto bound or not
+    """
+
+    def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
+        super(RRotate, self).__init__()
+        self.scale = scale
+        self.angle = angle
+        self.fill_value = fill_value
+        self.auto_bound = auto_bound
+
+    def get_rotated_matrix(self, angle, scale, h, w):
+        center = ((w - 1) * 0.5, (h - 1) * 0.5)
+        matrix = cv2.getRotationMatrix2D(center, -angle, scale)
+        # calculate the new size
+        cos = np.abs(matrix[0, 0])
+        sin = np.abs(matrix[0, 1])
+        new_w = h * sin + w * cos
+        new_h = h * cos + w * sin
+        # calculate offset
+        n_w = int(np.round(new_w))
+        n_h = int(np.round(new_h))
+        if self.auto_bound:
+            ratio = min(w / n_w, h / n_h)
+            matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
+        else:
+            matrix[0, 2] += (new_w - w) * 0.5
+            matrix[1, 2] += (new_h - h) * 0.5
+            w = n_w
+            h = n_h
+        return matrix, h, w
+
+    def get_rect_from_pts(self, pts, h, w):
+        """ get minimum rectangle of points
+        """
+        assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
+        min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
+                                                            axis=1)
+        max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
+                                                            axis=1)
+        min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
+        max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
+        boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
+        return boxes
+
+    def apply_image(self, image, matrix, h, w):
+        return cv2.warpAffine(
+            image, matrix, (w, h), borderValue=self.fill_value)
+
+    def apply_pts(self, pts, matrix, h, w):
+        assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
+        # n is number of samples and m is two times the number of points due to (x, y)
+        _, m = pts.shape
+        # transpose points
+        pts_ = pts.reshape(-1, 2).T
+        # pad 1 to convert the points to homogeneous coordinates
+        padding = np.ones((1, pts_.shape[1]), pts.dtype)
+        rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
+        return rotated_pts[:2, :].T.reshape(-1, m)
+
+    def apply(self, sample, context=None):
+        image = sample['image']
+        h, w = image.shape[:2]
+        matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
+        sample['image'] = self.apply_image(image, matrix, h, w)
+        polys = sample['gt_poly']
+        # TODO: segment or keypoint to be processed 
+        if len(polys) > 0:
+            pts = self.apply_pts(polys, matrix, h, w)
+            sample['gt_poly'] = pts
+            sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
+
+        return sample
+
+
+@register_op
+class RandomRRotate(BaseOperator):
+    """ Random Rotate Image
+    Args:
+        scale (float, tuple, list): rotate scale
+        scale_mode (str): mode of scale, [range, value, None]
+        angle (float, tuple, list): rotate angle
+        angle_mode (str): mode of angle, [range, value, None]
+        fill_value (float, tuple, list): fill value
+        rotate_prob (float): probability of rotation
+        auto_bound (bool): whether auto bound or not
+    """
+
+    def __init__(self,
+                 scale=1.0,
+                 scale_mode=None,
+                 angle=0.,
+                 angle_mode=None,
+                 fill_value=0.,
+                 rotate_prob=1.0,
+                 auto_bound=True):
+        super(RandomRRotate, self).__init__()
+        self.scale = scale
+        self.scale_mode = scale_mode
+        self.angle = angle
+        self.angle_mode = angle_mode
+        self.fill_value = fill_value
+        self.rotate_prob = rotate_prob
+        self.auto_bound = auto_bound
+
+    def get_angle(self, angle, angle_mode):
+        assert not angle_mode or angle_mode in [
+            'range', 'value'
+        ], 'angle mode should be in [range, value, None]'
+        if not angle_mode:
+            return angle
+        elif angle_mode == 'range':
+            low, high = angle
+            return np.random.rand() * (high - low) + low
+        elif angle_mode == 'value':
+            return np.random.choice(angle)
+
+    def get_scale(self, scale, scale_mode):
+        assert not scale_mode or scale_mode in [
+            'range', 'value'
+        ], 'scale mode should be in [range, value, None]'
+        if not scale_mode:
+            return scale
+        elif scale_mode == 'range':
+            low, high = scale
+            return np.random.rand() * (high - low) + low
+        elif scale_mode == 'value':
+            return np.random.choice(scale)
+
+    def apply(self, sample, context=None):
+        if np.random.rand() > self.rotate_prob:
+            return sample
+
+        angle = self.get_angle(self.angle, self.angle_mode)
+        scale = self.get_scale(self.scale, self.scale_mode)
+        rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
+        return rotator(sample)
+
+
+@register_op
+class Poly2RBox(BaseOperator):
+    """ Polygon to Rotated Box, using new OpenCV definition since 4.5.1
+
+    Args:
+        filter_threshold (int, float): threshold to filter annotations
+        filter_mode (str): filter mode, ['area', 'edge']
+        rbox_type (str): rbox type, ['le135', 'oc']
+
+    """
+
+    def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
+        super(Poly2RBox, self).__init__()
+        self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
+        self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
+
+    def filter(self, size, threshold, mode):
+        if mode == 'area':
+            if size[0] * size[1] < threshold:
+                return True
+        elif mode == 'edge':
+            if min(size) < threshold:
+                return True
+        return False
+
+    def get_rbox(self, polys):
+        valid_ids, rboxes, bboxes = [], [], []
+        for i, poly in enumerate(polys):
+            cx, cy, w, h, angle = self.rbox_fn(poly)
+            if self.filter_fn((w, h)):
+                continue
+            rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
+            valid_ids.append(i)
+            xmin, ymin = min(poly[0::2]), min(poly[1::2])
+            xmax, ymax = max(poly[0::2]), max(poly[1::2])
+            bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
+
+        if len(valid_ids) == 0:
+            rboxes = np.zeros((0, 5), dtype=np.float32)
+            bboxes = np.zeros((0, 4), dtype=np.float32)
+        else:
+            rboxes = np.stack(rboxes)
+            bboxes = np.stack(bboxes)
+
+        return rboxes, bboxes, valid_ids
+
+    def apply(self, sample, context=None):
+        rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
+        sample['gt_rbox'] = rboxes
+        sample['gt_bbox'] = bboxes
+        for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
+            if k in sample:
+                sample[k] = sample[k][valid_ids]
+
+        return sample
+
+
+@register_op
+class Poly2Array(BaseOperator):
+    """ convert gt_poly to np.array for rotated bboxes
+    """
+
+    def __init__(self):
+        super(Poly2Array, self).__init__()
+
+    def apply(self, sample, context=None):
+        if 'gt_poly' in sample:
+            sample['gt_poly'] = np.array(
+                sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
+
+        return sample
+
+
+@register_op
+class RResize(BaseOperator):
+    def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
+        """
+        Resize image to target size. if keep_ratio is True, 
+        resize the image's long side to the maximum of target_size
+        if keep_ratio is False, resize the image to target size(h, w)
+        Args:
+            target_size (int|list): image target size
+            keep_ratio (bool): whether keep_ratio or not, default true
+            interp (int): the interpolation method
+        """
+        super(RResize, self).__init__()
+        self.keep_ratio = keep_ratio
+        self.interp = interp
+        if not isinstance(target_size, (Integral, Sequence)):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
+                format(type(target_size)))
+        if isinstance(target_size, Integral):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+
+    def apply_image(self, image, scale):
+        im_scale_x, im_scale_y = scale
+
+        return cv2.resize(
+            image,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=self.interp)
+
+    def apply_pts(self, pts, scale, size):
+        im_scale_x, im_scale_y = scale
+        resize_w, resize_h = size
+        pts[:, 0::2] *= im_scale_x
+        pts[:, 1::2] *= im_scale_y
+        pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
+        pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
+        return pts
+
+    def apply(self, sample, context=None):
+        """ Resize the image numpy.
+        """
+        im = sample['image']
+        if not isinstance(im, np.ndarray):
+            raise TypeError("{}: image type is not numpy.".format(self))
+        if len(im.shape) != 3:
+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
+
+        # apply image
+        im_shape = im.shape
+        if self.keep_ratio:
+
+            im_size_min = np.min(im_shape[0:2])
+            im_size_max = np.max(im_shape[0:2])
+
+            target_size_min = np.min(self.target_size)
+            target_size_max = np.max(self.target_size)
+
+            im_scale = min(target_size_min / im_size_min,
+                           target_size_max / im_size_max)
+
+            resize_h = im_scale * float(im_shape[0])
+            resize_w = im_scale * float(im_shape[1])
+
+            im_scale_x = im_scale
+            im_scale_y = im_scale
+        else:
+            resize_h, resize_w = self.target_size
+            im_scale_y = resize_h / im_shape[0]
+            im_scale_x = resize_w / im_shape[1]
+
+        im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
+        sample['image'] = im.astype(np.float32)
+        sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
+        if 'scale_factor' in sample:
+            scale_factor = sample['scale_factor']
+            sample['scale_factor'] = np.asarray(
+                [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
+                dtype=np.float32)
+        else:
+            sample['scale_factor'] = np.asarray(
+                [im_scale_y, im_scale_x], dtype=np.float32)
+
+        # apply bbox
+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+            sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
+                                               [im_scale_x, im_scale_y],
+                                               [resize_w, resize_h])
+
+        # apply polygon
+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+            sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
+                                               [im_scale_x, im_scale_y],
+                                               [resize_w, resize_h])
+
+        return sample
+
+
+@register_op
+class RandomRFlip(BaseOperator):
+    def __init__(self, prob=0.5):
+        """
+        Args:
+            prob (float): the probability of flipping image
+        """
+        super(RandomRFlip, self).__init__()
+        self.prob = prob
+        if not (isinstance(self.prob, float)):
+            raise TypeError("{}: input type is invalid.".format(self))
+
+    def apply_image(self, image):
+        return image[:, ::-1, :]
+
+    def apply_pts(self, pts, width):
+        oldx = pts[:, 0::2].copy()
+        pts[:, 0::2] = width - oldx - 1
+        return pts
+
+    def apply(self, sample, context=None):
+        """Filp the image and bounding box.
+        Operators:
+            1. Flip the image numpy.
+            2. Transform the bboxes' x coordinates.
+              (Must judge whether the coordinates are normalized!)
+            3. Transform the segmentations' x coordinates.
+              (Must judge whether the coordinates are normalized!)
+        Output:
+            sample: the image, bounding box and segmentation part
+                    in sample are flipped.
+        """
+        if np.random.uniform(0, 1) < self.prob:
+            im = sample['image']
+            height, width = im.shape[:2]
+            im = self.apply_image(im)
+            if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+                sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
+            if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+                sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
+
+            sample['flipped'] = True
+            sample['image'] = im
+        return sample
+
+
+@register_op
+class VisibleRBox(BaseOperator):
+    """
+    In debug mode, visualize images according to `gt_box`.
+    (Currently only supported when not cropping and flipping image.)
+    """
+
+    def __init__(self, output_dir='debug'):
+        super(VisibleRBox, self).__init__()
+        self.output_dir = output_dir
+        if not os.path.isdir(output_dir):
+            os.makedirs(output_dir)
+
+    def apply(self, sample, context=None):
+        image = Image.fromarray(sample['image'].astype(np.uint8))
+        out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
+        width = sample['w']
+        height = sample['h']
+        # gt_poly = sample['gt_rbox']
+        gt_poly = sample['gt_poly']
+        gt_class = sample['gt_class']
+        draw = ImageDraw.Draw(image)
+        for i in range(gt_poly.shape[0]):
+            x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
+            draw.line(
+                [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
+                width=2,
+                fill='green')
+            # draw label
+            xmin = min(x1, x2, x3, x4)
+            ymin = min(y1, y2, y3, y4)
+            text = str(gt_class[i][0])
+            tw, th = draw.textsize(text)
+            draw.rectangle(
+                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
+            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+
+        if 'gt_keypoint' in sample.keys():
+            gt_keypoint = sample['gt_keypoint']
+            if self.is_normalized:
+                for i in range(gt_keypoint.shape[1]):
+                    if i % 2:
+                        gt_keypoint[:, i] = gt_keypoint[:, i] * height
+                    else:
+                        gt_keypoint[:, i] = gt_keypoint[:, i] * width
+            for i in range(gt_keypoint.shape[0]):
+                keypoint = gt_keypoint[i]
+                for j in range(int(keypoint.shape[0] / 2)):
+                    x1 = round(keypoint[2 * j]).astype(np.int32)
+                    y1 = round(keypoint[2 * j + 1]).astype(np.int32)
+                    draw.ellipse(
+                        (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
+        save_path = os.path.join(self.output_dir, out_file_name)
+        image.save(save_path, quality=95)
+        return sample
+
+
+@register_op
+class Rbox2Poly(BaseOperator):
+    """
+    Convert rbbox format to poly format.
+    """
+
+    def __init__(self):
+        super(Rbox2Poly, self).__init__()
+
+    def apply(self, sample, context=None):
+        assert 'gt_rbox' in sample
+        assert sample['gt_rbox'].shape[1] == 5
+        rboxes = sample['gt_rbox']
+        polys = rbox2poly_np(rboxes)
+        sample['gt_poly'] = polys
+        xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
+        xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
+        sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
+        return sample

+ 72 - 0
paddlers/models/ppdet/data/utils.py

@@ -0,0 +1,72 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import numbers
+import numpy as np
+
+try:
+    from collections.abc import Sequence, Mapping
+except:
+    from collections import Sequence, Mapping
+
+
+def default_collate_fn(batch):
+    """
+    Default batch collating function for :code:`paddle.io.DataLoader`,
+    get input data as a list of sample datas, each element in list
+    if the data of a sample, and sample data should composed of list,
+    dictionary, string, number, numpy array, this
+    function will parse input data recursively and stack number,
+    numpy array and paddle.Tensor datas as batch datas. e.g. for
+    following input data:
+    [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 3},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 4},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
+    
+    
+    This default collate function zipped each number and numpy array
+    field together and stack each field as the batch field as follows:
+    {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
+    Args:  
+        batch(list of sample data): batch should be a list of sample data.
+    
+    Returns:
+        Batched data: batched each number, numpy array and paddle.Tensor
+                      in input data.
+    """
+    sample = batch[0]
+    if isinstance(sample, np.ndarray):
+        batch = np.stack(batch, axis=0)
+        return batch
+    elif isinstance(sample, numbers.Number):
+        batch = np.array(batch)
+        return batch
+    elif isinstance(sample, (str, bytes)):
+        return batch
+    elif isinstance(sample, Mapping):
+        return {
+            key: default_collate_fn([d[key] for d in batch])
+            for key in sample
+        }
+    elif isinstance(sample, Sequence):
+        sample_fields_num = len(sample)
+        if not all(len(sample) == sample_fields_num for sample in iter(batch)):
+            raise RuntimeError(
+                "fileds number not same among samples in a batch")
+        return [default_collate_fn(fields) for fields in zip(*batch)]
+
+    raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
+                    "dict, list, number, but got {}".format(type(sample)))

+ 12 - 12
paddlers/models/ppdet/engine/__init__.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from . import trainer
 from . import trainer

+ 179 - 18
paddlers/models/ppdet/engine/callbacks.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -182,7 +182,7 @@ class Checkpointer(Callback):
                 ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
                 ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
                     save_name = str(
                     save_name = str(
                         epoch_id) if epoch_id != end_epoch - 1 else "model_final"
                         epoch_id) if epoch_id != end_epoch - 1 else "model_final"
-                    weight = self.weight
+                    weight = self.weight.state_dict()
             elif mode == 'eval':
             elif mode == 'eval':
                 if 'save_best_model' in status and status['save_best_model']:
                 if 'save_best_model' in status and status['save_best_model']:
                     for metric in self.model._metrics:
                     for metric in self.model._metrics:
@@ -198,15 +198,25 @@ class Checkpointer(Callback):
                                         "training iterations being too few or not " \
                                         "training iterations being too few or not " \
                                         "loading the correct weights.")
                                         "loading the correct weights.")
                             return
                             return
-                        if map_res[key][0] > self.best_ap:
+                        if map_res[key][0] >= self.best_ap:
                             self.best_ap = map_res[key][0]
                             self.best_ap = map_res[key][0]
                             save_name = 'best_model'
                             save_name = 'best_model'
-                            weight = self.weight
+                            weight = self.weight.state_dict()
                         logger.info("Best test {} ap is {:0.3f}.".format(
                         logger.info("Best test {} ap is {:0.3f}.".format(
                             key, self.best_ap))
                             key, self.best_ap))
             if weight:
             if weight:
-                save_model(weight, self.model.optimizer, self.save_dir,
-                           save_name, epoch_id + 1)
+                if self.model.use_ema:
+                    # save model and ema_model
+                    save_model(
+                        status['weight'],
+                        self.model.optimizer,
+                        self.save_dir,
+                        save_name,
+                        epoch_id + 1,
+                        ema_model=weight)
+                else:
+                    save_model(weight, self.model.optimizer, self.save_dir,
+                               save_name, epoch_id + 1)
 
 
 
 
 class WiferFaceEval(Callback):
 class WiferFaceEval(Callback):
@@ -251,7 +261,7 @@ class VisualDLWriter(Callback):
                 for loss_name, loss_value in training_staus.get().items():
                 for loss_name, loss_value in training_staus.get().items():
                     self.vdl_writer.add_scalar(loss_name, loss_value,
                     self.vdl_writer.add_scalar(loss_name, loss_value,
                                                self.vdl_loss_step)
                                                self.vdl_loss_step)
-                    self.vdl_loss_step += 1
+                self.vdl_loss_step += 1
             elif mode == 'test':
             elif mode == 'test':
                 ori_image = status['original_image']
                 ori_image = status['original_image']
                 result_image = status['result_image']
                 result_image = status['result_image']
@@ -279,6 +289,157 @@ class VisualDLWriter(Callback):
                 self.vdl_mAP_step += 1
                 self.vdl_mAP_step += 1
 
 
 
 
+class WandbCallback(Callback):
+    def __init__(self, model):
+        super(WandbCallback, self).__init__(model)
+
+        try:
+            import wandb
+            self.wandb = wandb
+        except Exception as e:
+            logger.error('wandb not found, please install wandb. '
+                         'Use: `pip install wandb`.')
+            raise e
+
+        self.wandb_params = model.cfg.get('wandb', None)
+        self.save_dir = os.path.join(self.model.cfg.save_dir,
+                                     self.model.cfg.filename)
+        if self.wandb_params is None:
+            self.wandb_params = {}
+        for k, v in model.cfg.items():
+            if k.startswith("wandb_"):
+                self.wandb_params.update({k.lstrip("wandb_"): v})
+
+        self._run = None
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            _ = self.run
+            self.run.config.update(self.model.cfg)
+            self.run.define_metric("epoch")
+            self.run.define_metric("eval/*", step_metric="epoch")
+
+        self.best_ap = 0
+
+    @property
+    def run(self):
+        if self._run is None:
+            if self.wandb.run is not None:
+                logger.info(
+                    "There is an ongoing wandb run which will be used"
+                    "for logging. Please use `wandb.finish()` to end that"
+                    "if the behaviour is not intended")
+                self._run = self.wandb.run
+            else:
+                self._run = self.wandb.init(**self.wandb_params)
+        return self._run
+
+    def save_model(self,
+                   optimizer,
+                   save_dir,
+                   save_name,
+                   last_epoch,
+                   ema_model=None,
+                   ap=None,
+                   tags=None):
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            model_path = os.path.join(save_dir, save_name)
+            metadata = {}
+            metadata["last_epoch"] = last_epoch
+            if ap:
+                metadata["ap"] = ap
+            if ema_model is None:
+                ema_artifact = self.wandb.Artifact(
+                    name="ema_model-{}".format(self.run.id),
+                    type="model",
+                    metadata=metadata)
+                model_artifact = self.wandb.Artifact(
+                    name="model-{}".format(self.run.id),
+                    type="model",
+                    metadata=metadata)
+
+                ema_artifact.add_file(model_path + ".pdema", name="model_ema")
+                model_artifact.add_file(model_path + ".pdparams", name="model")
+
+                self.run.log_artifact(ema_artifact, aliases=tags)
+                self.run.log_artfact(model_artifact, aliases=tags)
+            else:
+                model_artifact = self.wandb.Artifact(
+                    name="model-{}".format(self.run.id),
+                    type="model",
+                    metadata=metadata)
+                model_artifact.add_file(model_path + ".pdparams", name="model")
+                self.run.log_artifact(model_artifact, aliases=tags)
+
+    def on_step_end(self, status):
+
+        mode = status['mode']
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            if mode == 'train':
+                training_status = status['training_staus'].get()
+                for k, v in training_status.items():
+                    training_status[k] = float(v)
+                metrics = {"train/" + k: v for k, v in training_status.items()}
+                self.run.log(metrics)
+
+    def on_epoch_end(self, status):
+        mode = status['mode']
+        epoch_id = status['epoch_id']
+        save_name = None
+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
+            if mode == 'train':
+                end_epoch = self.model.cfg.epoch
+                if (
+                        epoch_id + 1
+                ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
+                    save_name = str(
+                        epoch_id) if epoch_id != end_epoch - 1 else "model_final"
+                    tags = ["latest", "epoch_{}".format(epoch_id)]
+                    self.save_model(
+                        self.model.optimizer,
+                        self.save_dir,
+                        save_name,
+                        epoch_id + 1,
+                        self.model.use_ema,
+                        tags=tags)
+            if mode == 'eval':
+                merged_dict = {}
+                for metric in self.model._metrics:
+                    for key, map_value in metric.get_results().items():
+                        merged_dict["eval/{}-mAP".format(key)] = map_value[0]
+                merged_dict["epoch"] = status["epoch_id"]
+                self.run.log(merged_dict)
+
+                if 'save_best_model' in status and status['save_best_model']:
+                    for metric in self.model._metrics:
+                        map_res = metric.get_results()
+                        if 'bbox' in map_res:
+                            key = 'bbox'
+                        elif 'keypoint' in map_res:
+                            key = 'keypoint'
+                        else:
+                            key = 'mask'
+                        if key not in map_res:
+                            logger.warning("Evaluation results empty, this may be due to " \
+                                        "training iterations being too few or not " \
+                                        "loading the correct weights.")
+                            return
+                        if map_res[key][0] >= self.best_ap:
+                            self.best_ap = map_res[key][0]
+                            save_name = 'best_model'
+                            tags = ["best", "epoch_{}".format(epoch_id)]
+
+                            self.save_model(
+                                self.model.optimizer,
+                                self.save_dir,
+                                save_name,
+                                last_epoch=epoch_id + 1,
+                                ema_model=self.model.use_ema,
+                                ap=self.best_ap,
+                                tags=tags)
+
+    def on_train_end(self, status):
+        self.run.finish()
+
+
 class SniperProposalsGenerator(Callback):
 class SniperProposalsGenerator(Callback):
     def __init__(self, model):
     def __init__(self, model):
         super(SniperProposalsGenerator, self).__init__(model)
         super(SniperProposalsGenerator, self).__init__(model)

+ 12 - 12
paddlers/models/ppdet/engine/env.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 44 - 20
paddlers/models/ppdet/engine/export_utils.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -41,22 +41,26 @@ TRT_MIN_SUBGRAPH = {
     'HigherHRNet': 3,
     'HigherHRNet': 3,
     'HRNet': 3,
     'HRNet': 3,
     'DeepSORT': 3,
     'DeepSORT': 3,
+    'ByteTrack': 10,
     'JDE': 10,
     'JDE': 10,
     'FairMOT': 5,
     'FairMOT': 5,
     'GFL': 16,
     'GFL': 16,
     'PicoDet': 3,
     'PicoDet': 3,
     'CenterNet': 5,
     'CenterNet': 5,
     'TOOD': 5,
     'TOOD': 5,
+    'YOLOX': 8,
 }
 }
 
 
 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
 
 
 
 
 def _prune_input_spec(input_spec, program, targets):
 def _prune_input_spec(input_spec, program, targets):
     # try to prune static program to figure out pruned input spec
     # try to prune static program to figure out pruned input spec
     # so we perform following operations in static mode
     # so we perform following operations in static mode
+    device = paddle.get_device()
     paddle.enable_static()
     paddle.enable_static()
+    paddle.set_device(device)
     pruned_input_spec = [{}]
     pruned_input_spec = [{}]
     program = program.clone()
     program = program.clone()
     program = program._prune(targets=targets)
     program = program._prune(targets=targets)
@@ -67,7 +71,7 @@ def _prune_input_spec(input_spec, program, targets):
             pruned_input_spec[0][name] = spec
             pruned_input_spec[0][name] = spec
         except Exception:
         except Exception:
             pass
             pass
-    paddle.disable_static()
+    paddle.disable_static(place=device)
     return pruned_input_spec
     return pruned_input_spec
 
 
 
 
@@ -88,6 +92,7 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
             if key == 'Resize':
             if key == 'Resize':
                 if int(image_shape[1]) != -1:
                 if int(image_shape[1]) != -1:
                     value['target_size'] = image_shape[1:]
                     value['target_size'] = image_shape[1:]
+                value['interp'] = value.get('interp', 1)  # cv2.INTER_LINEAR
             if fuse_normalize and key == 'NormalizeImage':
             if fuse_normalize and key == 'NormalizeImage':
                 continue
                 continue
             p.update(value)
             p.update(value)
@@ -120,12 +125,20 @@ def _dump_infer_config(config, path, image_shape, model):
     setup_orderdict()
     setup_orderdict()
     use_dynamic_shape = True if image_shape[2] == -1 else False
     use_dynamic_shape = True if image_shape[2] == -1 else False
     infer_cfg = OrderedDict({
     infer_cfg = OrderedDict({
-        'mode': 'fluid',
+        'mode': 'paddle',
         'draw_threshold': 0.5,
         'draw_threshold': 0.5,
         'metric': config['metric'],
         'metric': config['metric'],
         'use_dynamic_shape': use_dynamic_shape
         'use_dynamic_shape': use_dynamic_shape
     })
     })
+    export_onnx = config.get('export_onnx', False)
+    export_eb = config.get('export_eb', False)
+
     infer_arch = config['architecture']
     infer_arch = config['architecture']
+    if 'RCNN' in infer_arch and export_onnx:
+        logger.warning(
+            "Exporting RCNN model to ONNX only support batch_size = 1")
+        infer_cfg['export_onnx'] = True
+        infer_cfg['export_eb'] = export_eb
 
 
     if infer_arch in MOT_ARCH:
     if infer_arch in MOT_ARCH:
         if infer_arch == 'DeepSORT':
         if infer_arch == 'DeepSORT':
@@ -140,6 +153,12 @@ def _dump_infer_config(config, path, image_shape, model):
             infer_cfg['min_subgraph_size'] = min_subgraph_size
             infer_cfg['min_subgraph_size'] = min_subgraph_size
             arch_state = True
             arch_state = True
             break
             break
+
+    if infer_arch == 'YOLOX':
+        infer_cfg['arch'] = infer_arch
+        infer_cfg['min_subgraph_size'] = TRT_MIN_SUBGRAPH[infer_arch]
+        arch_state = True
+
     if not arch_state:
     if not arch_state:
         logger.error(
         logger.error(
             'Architecture: {} is not supported for exporting model now.\n'.
             'Architecture: {} is not supported for exporting model now.\n'.
@@ -165,12 +184,17 @@ def _dump_infer_config(config, path, image_shape, model):
         reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape[1:])
         reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape[1:])
 
 
     if infer_arch == 'PicoDet':
     if infer_arch == 'PicoDet':
-        infer_cfg['NMS'] = config['PicoHead']['nms']
-        # In order to speed up the prediction, the threshold of nms
+        if hasattr(config, 'export') and config['export'].get(
+                'post_process',
+                False) and not config['export'].get('benchmark', False):
+            infer_cfg['arch'] = 'GFL'
+        head_name = 'PicoHeadV2' if config['PicoHeadV2'] else 'PicoHead'
+        infer_cfg['NMS'] = config[head_name]['nms']
+        # In order to speed up the prediction, the threshold of nms 
         # is adjusted here, which can be changed in infer_cfg.yml
         # is adjusted here, which can be changed in infer_cfg.yml
-        config['PicoHead']['nms']["score_threshold"] = 0.3
-        config['PicoHead']['nms']["nms_threshold"] = 0.5
-        infer_cfg['fpn_stride'] = config['PicoHead']['fpn_stride']
+        config[head_name]['nms']["score_threshold"] = 0.3
+        config[head_name]['nms']["nms_threshold"] = 0.5
+        infer_cfg['fpn_stride'] = config[head_name]['fpn_stride']
 
 
     yaml.dump(infer_cfg, open(path, 'w'))
     yaml.dump(infer_cfg, open(path, 'w'))
     logger.info("Export inference config file to {}".format(os.path.join(path)))
     logger.info("Export inference config file to {}".format(os.path.join(path)))

+ 174 - 77
paddlers/models/ppdet/engine/tracker.py

@@ -17,27 +17,33 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import print_function
 
 
 import os
 import os
-import cv2
 import glob
 import glob
 import re
 import re
 import paddle
 import paddle
+import paddle.nn as nn
 import numpy as np
 import numpy as np
-import os.path as osp
+from tqdm import tqdm
 from collections import defaultdict
 from collections import defaultdict
 
 
 from paddlers.models.ppdet.core.workspace import create
 from paddlers.models.ppdet.core.workspace import create
 from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
 from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
 from paddlers.models.ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results
 from paddlers.models.ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results
-
-from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric
-from paddlers.models.ppdet.metrics import MCMOTMetric
+from paddlers.models.ppdet.modeling.mot.tracker import JDETracker, DeepSORTTracker, OCSORTTracker
+from paddlers.models.ppdet.modeling.architectures import YOLOX
+from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric, MCMOTMetric
+import paddlers.models.ppdet.utils.stats as stats
 
 
 from .callbacks import Callback, ComposeCallback
 from .callbacks import Callback, ComposeCallback
 
 
 from paddlers.models.ppdet.utils.logger import setup_logger
 from paddlers.models.ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 logger = setup_logger(__name__)
 
 
+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
+MOT_ARCH_JDE = ['JDE', 'FairMOT']
+MOT_ARCH_SDE = ['DeepSORT', 'ByteTrack']
+MOT_DATA_TYPE = ['mot', 'mcmot', 'kitti']
+
 __all__ = ['Tracker']
 __all__ = ['Tracker']
 
 
 
 
@@ -55,6 +61,12 @@ class Tracker(object):
         # build model
         # build model
         self.model = create(cfg.architecture)
         self.model = create(cfg.architecture)
 
 
+        if isinstance(self.model.detector, YOLOX):
+            for k, m in self.model.named_sublayers():
+                if isinstance(m, nn.BatchNorm2D):
+                    m._epsilon = 1e-3  # for amp(fp16)
+                    m._momentum = 0.97  # 0.03 in pytorch
+
         self.status = {}
         self.status = {}
         self.start_epoch = 0
         self.start_epoch = 0
 
 
@@ -108,11 +120,15 @@ class Tracker(object):
         load_weight(self.model, weights, self.optimizer)
         load_weight(self.model, weights, self.optimizer)
 
 
     def load_weights_sde(self, det_weights, reid_weights):
     def load_weights_sde(self, det_weights, reid_weights):
-        if self.model.detector:
+        with_detector = self.model.detector is not None
+        with_reid = self.model.reid is not None
+
+        if with_detector:
             load_weight(self.model.detector, det_weights)
             load_weight(self.model.detector, det_weights)
-            load_weight(self.model.reid, reid_weights)
+            if with_reid:
+                load_weight(self.model.reid, reid_weights)
         else:
         else:
-            load_weight(self.model.reid, reid_weights, self.optimizer)
+            load_weight(self.model.reid, reid_weights)
 
 
     def _eval_seq_jde(self,
     def _eval_seq_jde(self,
                       dataloader,
                       dataloader,
@@ -131,11 +147,8 @@ class Tracker(object):
         self.model.eval()
         self.model.eval()
         results = defaultdict(list)  # support single class and multi classes
         results = defaultdict(list)  # support single class and multi classes
 
 
-        for step_id, data in enumerate(dataloader):
+        for step_id, data in enumerate(tqdm(dataloader)):
             self.status['step_id'] = step_id
             self.status['step_id'] = step_id
-            if frame_id % 40 == 0:
-                logger.info('Processing frame {} ({:.2f} fps)'.format(
-                    frame_id, 1. / max(1e-5, timer.average_time)))
             # forward
             # forward
             timer.tic()
             timer.tic()
             pred_dets, pred_embs = self.model(data)
             pred_dets, pred_embs = self.model(data)
@@ -184,24 +197,23 @@ class Tracker(object):
         if save_dir:
         if save_dir:
             if not os.path.exists(save_dir): os.makedirs(save_dir)
             if not os.path.exists(save_dir): os.makedirs(save_dir)
         use_detector = False if not self.model.detector else True
         use_detector = False if not self.model.detector else True
+        use_reid = False if not self.model.reid else True
 
 
         timer = MOTTimer()
         timer = MOTTimer()
         results = defaultdict(list)
         results = defaultdict(list)
         frame_id = 0
         frame_id = 0
         self.status['mode'] = 'track'
         self.status['mode'] = 'track'
         self.model.eval()
         self.model.eval()
-        self.model.reid.eval()
+        if use_reid:
+            self.model.reid.eval()
         if not use_detector:
         if not use_detector:
             dets_list = load_det_results(det_file, len(dataloader))
             dets_list = load_det_results(det_file, len(dataloader))
             logger.info('Finish loading detection results file {}.'.format(
             logger.info('Finish loading detection results file {}.'.format(
                 det_file))
                 det_file))
 
 
-        for step_id, data in enumerate(dataloader):
+        tracker = self.model.tracker
+        for step_id, data in enumerate(tqdm(dataloader)):
             self.status['step_id'] = step_id
             self.status['step_id'] = step_id
-            if frame_id % 40 == 0:
-                logger.info('Processing frame {} ({:.2f} fps)'.format(
-                    frame_id, 1. / max(1e-5, timer.average_time)))
-
             ori_image = data['ori_image']  # [bs, H, W, 3]
             ori_image = data['ori_image']  # [bs, H, W, 3]
             ori_image_shape = data['ori_image'].shape[1:3]
             ori_image_shape = data['ori_image'].shape[1:3]
             # ori_image_shape: [H, W]
             # ori_image_shape: [H, W]
@@ -214,7 +226,7 @@ class Tracker(object):
             scale_factor = data['scale_factor'][0].numpy()
             scale_factor = data['scale_factor'][0].numpy()
 
 
             empty_detections = False
             empty_detections = False
-            # when it has no detected bboxes, will not inference reid model
+            # when it has no detected bboxes, will not inference reid model 
             # and if visualize, use original image instead
             # and if visualize, use original image instead
 
 
             # forward
             # forward
@@ -240,7 +252,7 @@ class Tracker(object):
                 outs['bbox'] = outs['bbox'].numpy()
                 outs['bbox'] = outs['bbox'].numpy()
                 outs['bbox_num'] = outs['bbox_num'].numpy()
                 outs['bbox_num'] = outs['bbox_num'].numpy()
 
 
-                if outs['bbox_num'] > 0 and empty_detections == False:
+                if len(outs['bbox']) > 0 and empty_detections == False:
                     # detector outputs: pred_cls_ids, pred_scores, pred_bboxes
                     # detector outputs: pred_cls_ids, pred_scores, pred_bboxes
                     pred_cls_ids = outs['bbox'][:, 0:1]
                     pred_cls_ids = outs['bbox'][:, 0:1]
                     pred_scores = outs['bbox'][:, 1:2]
                     pred_scores = outs['bbox'][:, 1:2]
@@ -249,13 +261,15 @@ class Tracker(object):
                         # with LetterBoxResize and JDEBBoxPostProcess.
                         # with LetterBoxResize and JDEBBoxPostProcess.
                         #
                         #
                         # 'scaled' means whether the coords after detector outputs
                         # 'scaled' means whether the coords after detector outputs
-                        # have been scaled back to the original image, set True
+                        # have been scaled back to the original image, set True 
                         # in general detector, set False in JDE YOLOv3.
                         # in general detector, set False in JDE YOLOv3.
                         pred_bboxes = scale_coords(outs['bbox'][:, 2:],
                         pred_bboxes = scale_coords(outs['bbox'][:, 2:],
                                                    input_shape, im_shape,
                                                    input_shape, im_shape,
                                                    scale_factor)
                                                    scale_factor)
                     else:
                     else:
                         pred_bboxes = outs['bbox'][:, 2:]
                         pred_bboxes = outs['bbox'][:, 2:]
+                    pred_dets_old = np.concatenate(
+                        (pred_cls_ids, pred_scores, pred_bboxes), axis=1)
                 else:
                 else:
                     logger.warning(
                     logger.warning(
                         'Frame {} has not detected object, try to modify score threshold.'.
                         'Frame {} has not detected object, try to modify score threshold.'.
@@ -281,52 +295,104 @@ class Tracker(object):
                 # thus will not inference reid model
                 # thus will not inference reid model
                 continue
                 continue
 
 
-            pred_scores = pred_scores[keep_idx[0]]
             pred_cls_ids = pred_cls_ids[keep_idx[0]]
             pred_cls_ids = pred_cls_ids[keep_idx[0]]
-            pred_tlwhs = np.concatenate(
-                (pred_xyxys[:, 0:2],
-                 pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
-                axis=1)
+            pred_scores = pred_scores[keep_idx[0]]
             pred_dets = np.concatenate(
             pred_dets = np.concatenate(
-                (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
-
-            tracker = self.model.tracker
-            crops = get_crops(
-                pred_xyxys,
-                ori_image,
-                w=tracker.input_size[0],
-                h=tracker.input_size[1])
-            crops = paddle.to_tensor(crops)
-
-            data.update({'crops': crops})
-            pred_embs = self.model(data).numpy()
-
-            tracker.predict()
-            online_targets = tracker.update(pred_dets, pred_embs)
-
-            online_tlwhs, online_scores, online_ids = [], [], []
-            for t in online_targets:
-                if not t.is_confirmed() or t.time_since_update > 1:
-                    continue
-                tlwh = t.to_tlwh()
-                tscore = t.score
-                tid = t.track_id
-                if tscore < draw_threshold: continue
-                if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
-                if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
-                        3] > tracker.vertical_ratio:
-                    continue
-                online_tlwhs.append(tlwh)
-                online_scores.append(tscore)
-                online_ids.append(tid)
-            timer.toc()
+                (pred_cls_ids, pred_scores, pred_xyxys), axis=1)
+
+            if use_reid:
+                crops = get_crops(
+                    pred_xyxys,
+                    ori_image,
+                    w=tracker.input_size[0],
+                    h=tracker.input_size[1])
+                crops = paddle.to_tensor(crops)
+
+                data.update({'crops': crops})
+                pred_embs = self.model(data)['embeddings'].numpy()
+            else:
+                pred_embs = None
 
 
-            # save results
-            results[0].append(
-                (frame_id + 1, online_tlwhs, online_scores, online_ids))
-            save_vis_results(data, frame_id, online_ids, online_tlwhs,
-                             online_scores, timer.average_time, show_image,
-                             save_dir, self.cfg.num_classes)
+            if isinstance(tracker, DeepSORTTracker):
+                online_tlwhs, online_scores, online_ids = [], [], []
+                tracker.predict()
+                online_targets = tracker.update(pred_dets, pred_embs)
+                for t in online_targets:
+                    if not t.is_confirmed() or t.time_since_update > 1:
+                        continue
+                    tlwh = t.to_tlwh()
+                    tscore = t.score
+                    tid = t.track_id
+                    if tscore < draw_threshold: continue
+                    if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
+                    if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
+                            3] > tracker.vertical_ratio:
+                        continue
+                    online_tlwhs.append(tlwh)
+                    online_scores.append(tscore)
+                    online_ids.append(tid)
+                timer.toc()
+
+                # save results
+                results[0].append(
+                    (frame_id + 1, online_tlwhs, online_scores, online_ids))
+                save_vis_results(data, frame_id, online_ids, online_tlwhs,
+                                 online_scores, timer.average_time, show_image,
+                                 save_dir, self.cfg.num_classes)
+
+            elif isinstance(tracker, JDETracker):
+                # trick hyperparams only used for MOTChallenge (MOT17, MOT20) Test-set
+                tracker.track_buffer, tracker.conf_thres = get_trick_hyperparams(
+                    seq_name, tracker.track_buffer, tracker.conf_thres)
+
+                online_targets_dict = tracker.update(pred_dets_old, pred_embs)
+                online_tlwhs = defaultdict(list)
+                online_scores = defaultdict(list)
+                online_ids = defaultdict(list)
+                for cls_id in range(self.cfg.num_classes):
+                    online_targets = online_targets_dict[cls_id]
+                    for t in online_targets:
+                        tlwh = t.tlwh
+                        tid = t.track_id
+                        tscore = t.score
+                        if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
+                        if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
+                                3] > tracker.vertical_ratio:
+                            continue
+                        online_tlwhs[cls_id].append(tlwh)
+                        online_ids[cls_id].append(tid)
+                        online_scores[cls_id].append(tscore)
+                    # save results
+                    results[cls_id].append(
+                        (frame_id + 1, online_tlwhs[cls_id],
+                         online_scores[cls_id], online_ids[cls_id]))
+                timer.toc()
+                save_vis_results(data, frame_id, online_ids, online_tlwhs,
+                                 online_scores, timer.average_time, show_image,
+                                 save_dir, self.cfg.num_classes)
+            elif isinstance(tracker, OCSORTTracker):
+                # OC_SORT Tracker
+                online_targets = tracker.update(pred_dets_old, pred_embs)
+                online_tlwhs = []
+                online_ids = []
+                online_scores = []
+                for t in online_targets:
+                    tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]]
+                    tscore = float(t[4])
+                    tid = int(t[5])
+                    if tlwh[2] * tlwh[3] > 0:
+                        online_tlwhs.append(tlwh)
+                        online_ids.append(tid)
+                        online_scores.append(tscore)
+                timer.toc()
+                # save results
+                results[0].append(
+                    (frame_id + 1, online_tlwhs, online_scores, online_ids))
+                save_vis_results(data, frame_id, online_ids, online_tlwhs,
+                                 online_scores, timer.average_time, show_image,
+                                 save_dir, self.cfg.num_classes)
+            else:
+                raise ValueError(tracker)
             frame_id += 1
             frame_id += 1
 
 
         return results, frame_id, timer.average_time, timer.calls
         return results, frame_id, timer.average_time, timer.calls
@@ -345,10 +411,10 @@ class Tracker(object):
         if not os.path.exists(output_dir): os.makedirs(output_dir)
         if not os.path.exists(output_dir): os.makedirs(output_dir)
         result_root = os.path.join(output_dir, 'mot_results')
         result_root = os.path.join(output_dir, 'mot_results')
         if not os.path.exists(result_root): os.makedirs(result_root)
         if not os.path.exists(result_root): os.makedirs(result_root)
-        assert data_type in ['mot', 'mcmot', 'kitti'], \
+        assert data_type in MOT_DATA_TYPE, \
             "data_type should be 'mot', 'mcmot' or 'kitti'"
             "data_type should be 'mot', 'mcmot' or 'kitti'"
-        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
-            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
+        assert model_type in MOT_ARCH, \
+            "model_type should be 'JDE', 'DeepSORT', 'FairMOT' or 'ByteTrack'"
 
 
         # run tracking
         # run tracking
         n_frame = 0
         n_frame = 0
@@ -371,7 +437,7 @@ class Tracker(object):
 
 
             save_dir = os.path.join(output_dir, 'mot_outputs',
             save_dir = os.path.join(output_dir, 'mot_outputs',
                                     seq) if save_images or save_videos else None
                                     seq) if save_images or save_videos else None
-            logger.info('start seq: {}'.format(seq))
+            logger.info('Evaluate seq: {}'.format(seq))
 
 
             self.dataset.set_images(self.get_infer_images(infer_dir))
             self.dataset.set_images(self.get_infer_images(infer_dir))
             dataloader = create('EvalMOTReader')(self.dataset, 0)
             dataloader = create('EvalMOTReader')(self.dataset, 0)
@@ -379,13 +445,13 @@ class Tracker(object):
             result_filename = os.path.join(result_root, '{}.txt'.format(seq))
             result_filename = os.path.join(result_root, '{}.txt'.format(seq))
 
 
             with paddle.no_grad():
             with paddle.no_grad():
-                if model_type in ['JDE', 'FairMOT']:
+                if model_type in MOT_ARCH_JDE:
                     results, nf, ta, tc = self._eval_seq_jde(
                     results, nf, ta, tc = self._eval_seq_jde(
                         dataloader,
                         dataloader,
                         save_dir=save_dir,
                         save_dir=save_dir,
                         show_image=show_image,
                         show_image=show_image,
                         frame_rate=frame_rate)
                         frame_rate=frame_rate)
-                elif model_type in ['DeepSORT']:
+                elif model_type in MOT_ARCH_SDE:
                     results, nf, ta, tc = self._eval_seq_sde(
                     results, nf, ta, tc = self._eval_seq_sde(
                         dataloader,
                         dataloader,
                         save_dir=save_dir,
                         save_dir=save_dir,
@@ -412,7 +478,6 @@ class Tracker(object):
                 os.system(cmd_str)
                 os.system(cmd_str)
                 logger.info('Save video in {}.'.format(output_video_path))
                 logger.info('Save video in {}.'.format(output_video_path))
 
 
-            logger.info('Evaluate seq: {}'.format(seq))
             # update metrics
             # update metrics
             for metric in self._metrics:
             for metric in self._metrics:
                 metric.update(data_root, seq, data_type, result_root,
                 metric.update(data_root, seq, data_type, result_root,
@@ -471,12 +536,12 @@ class Tracker(object):
         if not os.path.exists(output_dir): os.makedirs(output_dir)
         if not os.path.exists(output_dir): os.makedirs(output_dir)
         result_root = os.path.join(output_dir, 'mot_results')
         result_root = os.path.join(output_dir, 'mot_results')
         if not os.path.exists(result_root): os.makedirs(result_root)
         if not os.path.exists(result_root): os.makedirs(result_root)
-        assert data_type in ['mot', 'mcmot', 'kitti'], \
+        assert data_type in MOT_DATA_TYPE, \
             "data_type should be 'mot', 'mcmot' or 'kitti'"
             "data_type should be 'mot', 'mcmot' or 'kitti'"
-        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
-            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
+        assert model_type in MOT_ARCH, \
+            "model_type should be 'JDE', 'DeepSORT', 'FairMOT' or 'ByteTrack'"
 
 
-        # run tracking
+        # run tracking        
         if video_file:
         if video_file:
             seq = video_file.split('/')[-1].split('.')[0]
             seq = video_file.split('/')[-1].split('.')[0]
             self.dataset.set_video(video_file, frame_rate)
             self.dataset.set_video(video_file, frame_rate)
@@ -504,14 +569,14 @@ class Tracker(object):
             frame_rate = self.dataset.frame_rate
             frame_rate = self.dataset.frame_rate
 
 
         with paddle.no_grad():
         with paddle.no_grad():
-            if model_type in ['JDE', 'FairMOT']:
+            if model_type in MOT_ARCH_JDE:
                 results, nf, ta, tc = self._eval_seq_jde(
                 results, nf, ta, tc = self._eval_seq_jde(
                     dataloader,
                     dataloader,
                     save_dir=save_dir,
                     save_dir=save_dir,
                     show_image=show_image,
                     show_image=show_image,
                     frame_rate=frame_rate,
                     frame_rate=frame_rate,
                     draw_threshold=draw_threshold)
                     draw_threshold=draw_threshold)
-            elif model_type in ['DeepSORT']:
+            elif model_type in MOT_ARCH_SDE:
                 results, nf, ta, tc = self._eval_seq_sde(
                 results, nf, ta, tc = self._eval_seq_sde(
                     dataloader,
                     dataloader,
                     save_dir=save_dir,
                     save_dir=save_dir,
@@ -535,3 +600,35 @@ class Tracker(object):
 
 
         write_mot_results(result_filename, results, data_type,
         write_mot_results(result_filename, results, data_type,
                           self.cfg.num_classes)
                           self.cfg.num_classes)
+
+
+def get_trick_hyperparams(video_name, ori_buffer, ori_thresh):
+    if video_name[:3] != 'MOT':
+        # only used for MOTChallenge (MOT17, MOT20) Test-set
+        return ori_buffer, ori_thresh
+
+    video_name = video_name[:8]
+    if 'MOT17-05' in video_name:
+        track_buffer = 14
+    elif 'MOT17-13' in video_name:
+        track_buffer = 25
+    else:
+        track_buffer = ori_buffer
+
+    if 'MOT17-01' in video_name:
+        track_thresh = 0.65
+    elif 'MOT17-06' in video_name:
+        track_thresh = 0.65
+    elif 'MOT17-12' in video_name:
+        track_thresh = 0.7
+    elif 'MOT17-14' in video_name:
+        track_thresh = 0.67
+    else:
+        track_thresh = ori_thresh
+
+    if 'MOT20-06' in video_name or 'MOT20-08' in video_name:
+        track_thresh = 0.3
+    else:
+        track_thresh = ori_thresh
+
+    return track_buffer, ori_thresh

+ 533 - 128
paddlers/models/ppdet/engine/trainer.py

@@ -20,38 +20,44 @@ import os
 import sys
 import sys
 import copy
 import copy
 import time
 import time
+from tqdm import tqdm
 
 
 import numpy as np
 import numpy as np
 import typing
 import typing
-from PIL import Image, ImageOps
+from PIL import Image, ImageOps, ImageFile
+
+ImageFile.LOAD_TRUNCATED_IMAGES = True
 
 
 import paddle
 import paddle
+import paddle.nn as nn
 import paddle.distributed as dist
 import paddle.distributed as dist
 from paddle.distributed import fleet
 from paddle.distributed import fleet
-from paddle import amp
 from paddle.static import InputSpec
 from paddle.static import InputSpec
 from paddlers.models.ppdet.optimizer import ModelEMA
 from paddlers.models.ppdet.optimizer import ModelEMA
 
 
 from paddlers.models.ppdet.core.workspace import create
 from paddlers.models.ppdet.core.workspace import create
-from paddlers.models.ppdet.modeling.architectures.meta_arch import BaseArch
 from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from paddlers.models.ppdet.utils.visualizer import visualize_results, save_result
 from paddlers.models.ppdet.utils.visualizer import visualize_results, save_result
 from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
 from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
 from paddlers.models.ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
 from paddlers.models.ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
 from paddlers.models.ppdet.data.source.sniper_coco import SniperCOCODataSet
 from paddlers.models.ppdet.data.source.sniper_coco import SniperCOCODataSet
 from paddlers.models.ppdet.data.source.category import get_categories
 from paddlers.models.ppdet.data.source.category import get_categories
-from paddlers.models.ppdet.utils import stats
+import paddlers.models.ppdet.utils.stats as stats
+from paddlers.models.ppdet.utils.fuse_utils import fuse_conv_bn
 from paddlers.models.ppdet.utils import profiler
 from paddlers.models.ppdet.utils import profiler
+from paddlers.models.ppdet.modeling.post_process import multiclass_nms
 
 
-from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator
+from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator, WandbCallback
 from .export_utils import _dump_infer_config, _prune_input_spec
 from .export_utils import _dump_infer_config, _prune_input_spec
 
 
+from paddle.distributed.fleet.utils.hybrid_parallel_util import fused_allreduce_gradients
+
 from paddlers.models.ppdet.utils.logger import setup_logger
 from paddlers.models.ppdet.utils.logger import setup_logger
 logger = setup_logger('ppdet.engine')
 logger = setup_logger('ppdet.engine')
 
 
 __all__ = ['Trainer']
 __all__ = ['Trainer']
 
 
-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
 
 
 
 
 class Trainer(object):
 class Trainer(object):
@@ -62,19 +68,30 @@ class Trainer(object):
         self.mode = mode.lower()
         self.mode = mode.lower()
         self.optimizer = None
         self.optimizer = None
         self.is_loaded_weights = False
         self.is_loaded_weights = False
+        self.use_amp = self.cfg.get('amp', False)
+        self.amp_level = self.cfg.get('amp_level', 'O1')
+        self.custom_white_list = self.cfg.get('custom_white_list', None)
+        self.custom_black_list = self.cfg.get('custom_black_list', None)
 
 
         # build data loader
         # build data loader
+        capital_mode = self.mode.capitalize()
         if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']:
         if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']:
-            self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
+            self.dataset = self.cfg['{}MOTDataset'.format(
+                capital_mode)] = create('{}MOTDataset'.format(capital_mode))()
         else:
         else:
-            self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
+            self.dataset = self.cfg['{}Dataset'.format(capital_mode)] = create(
+                '{}Dataset'.format(capital_mode))()
 
 
         if cfg.architecture == 'DeepSORT' and self.mode == 'train':
         if cfg.architecture == 'DeepSORT' and self.mode == 'train':
             logger.error('DeepSORT has no need of training on mot dataset.')
             logger.error('DeepSORT has no need of training on mot dataset.')
             sys.exit(1)
             sys.exit(1)
 
 
+        if cfg.architecture == 'FairMOT' and self.mode == 'eval':
+            images = self.parse_mot_images(cfg)
+            self.dataset.set_images(images)
+
         if self.mode == 'train':
         if self.mode == 'train':
-            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
+            self.loader = create('{}Reader'.format(capital_mode))(
                 self.dataset, cfg.worker_num)
                 self.dataset, cfg.worker_num)
 
 
         if cfg.architecture == 'JDE' and self.mode == 'train':
         if cfg.architecture == 'JDE' and self.mode == 'train':
@@ -94,41 +111,73 @@ class Trainer(object):
             self.model = self.cfg.model
             self.model = self.cfg.model
             self.is_loaded_weights = True
             self.is_loaded_weights = True
 
 
-        #normalize params for deploy
-        self.model.load_meanstd(cfg['TestReader']['sample_transforms'])
+        if cfg.architecture == 'YOLOX':
+            for k, m in self.model.named_sublayers():
+                if isinstance(m, nn.BatchNorm2D):
+                    m._epsilon = 1e-3  # for amp(fp16)
+                    m._momentum = 0.97  # 0.03 in pytorch
 
 
-        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
-        if self.use_ema:
-            ema_decay = self.cfg.get('ema_decay', 0.9998)
-            cycle_epoch = self.cfg.get('cycle_epoch', -1)
-            self.ema = ModelEMA(
-                self.model,
-                decay=ema_decay,
-                use_thres_step=True,
-                cycle_epoch=cycle_epoch)
+        #normalize params for deploy
+        if 'slim' in cfg and cfg['slim_type'] == 'OFA':
+            self.model.model.load_meanstd(cfg['TestReader'][
+                'sample_transforms'])
+        elif 'slim' in cfg and cfg['slim_type'] == 'Distill':
+            self.model.student_model.load_meanstd(cfg['TestReader'][
+                'sample_transforms'])
+        elif 'slim' in cfg and cfg[
+                'slim_type'] == 'DistillPrune' and self.mode == 'train':
+            self.model.student_model.load_meanstd(cfg['TestReader'][
+                'sample_transforms'])
+        else:
+            self.model.load_meanstd(cfg['TestReader']['sample_transforms'])
 
 
         # EvalDataset build with BatchSampler to evaluate in single device
         # EvalDataset build with BatchSampler to evaluate in single device
         # TODO: multi-device evaluate
         # TODO: multi-device evaluate
         if self.mode == 'eval':
         if self.mode == 'eval':
-            self._eval_batch_sampler = paddle.io.BatchSampler(
-                self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
-            reader_name = '{}Reader'.format(self.mode.capitalize())
-            # If metric is VOC, need to be set collate_batch=False.
-            if cfg.metric == 'VOC':
-                cfg[reader_name]['collate_batch'] = False
-            self.loader = create(reader_name)(self.dataset, cfg.worker_num,
-                                              self._eval_batch_sampler)
+            if cfg.architecture == 'FairMOT':
+                self.loader = create('EvalMOTReader')(self.dataset, 0)
+            else:
+                self._eval_batch_sampler = paddle.io.BatchSampler(
+                    self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
+                reader_name = '{}Reader'.format(self.mode.capitalize())
+                # If metric is VOC, need to be set collate_batch=False.
+                if cfg.metric == 'VOC':
+                    cfg[reader_name]['collate_batch'] = False
+                self.loader = create(reader_name)(self.dataset, cfg.worker_num,
+                                                  self._eval_batch_sampler)
         # TestDataset build after user set images, skip loader creation here
         # TestDataset build after user set images, skip loader creation here
 
 
         # build optimizer in train mode
         # build optimizer in train mode
         if self.mode == 'train':
         if self.mode == 'train':
             steps_per_epoch = len(self.loader)
             steps_per_epoch = len(self.loader)
+            if steps_per_epoch < 1:
+                logger.warning(
+                    "Samples in dataset are less than batch_size, please set smaller batch_size in TrainReader."
+                )
             self.lr = create('LearningRate')(steps_per_epoch)
             self.lr = create('LearningRate')(steps_per_epoch)
             self.optimizer = create('OptimizerBuilder')(self.lr, self.model)
             self.optimizer = create('OptimizerBuilder')(self.lr, self.model)
 
 
-        if self.cfg.get('unstructured_prune'):
-            self.pruner = create('UnstructuredPruner')(self.model,
-                                                       steps_per_epoch)
+            # Unstructured pruner is only enabled in the train mode.
+            if self.cfg.get('unstructured_prune'):
+                self.pruner = create('UnstructuredPruner')(self.model,
+                                                           steps_per_epoch)
+        if self.use_amp and self.amp_level == 'O2':
+            self.model, self.optimizer = paddle.amp.decorate(
+                models=self.model,
+                optimizers=self.optimizer,
+                level=self.amp_level)
+        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
+        if self.use_ema:
+            ema_decay = self.cfg.get('ema_decay', 0.9998)
+            ema_decay_type = self.cfg.get('ema_decay_type', 'threshold')
+            cycle_epoch = self.cfg.get('cycle_epoch', -1)
+            ema_black_list = self.cfg.get('ema_black_list', None)
+            self.ema = ModelEMA(
+                self.model,
+                decay=ema_decay,
+                ema_decay_type=ema_decay_type,
+                cycle_epoch=cycle_epoch,
+                ema_black_list=ema_black_list)
 
 
         self._nranks = dist.get_world_size()
         self._nranks = dist.get_world_size()
         self._local_rank = dist.get_rank()
         self._local_rank = dist.get_rank()
@@ -152,6 +201,8 @@ class Trainer(object):
                 self._callbacks.append(VisualDLWriter(self))
                 self._callbacks.append(VisualDLWriter(self))
             if self.cfg.get('save_proposals', False):
             if self.cfg.get('save_proposals', False):
                 self._callbacks.append(SniperProposalsGenerator(self))
                 self._callbacks.append(SniperProposalsGenerator(self))
+            if self.cfg.get('use_wandb', False) or 'wandb' in self.cfg:
+                self._callbacks.append(WandbCallback(self))
             self._compose_callback = ComposeCallback(self._callbacks)
             self._compose_callback = ComposeCallback(self._callbacks)
         elif self.mode == 'eval':
         elif self.mode == 'eval':
             self._callbacks = [LogPrinter(self)]
             self._callbacks = [LogPrinter(self)]
@@ -172,7 +223,7 @@ class Trainer(object):
         classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False
         classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False
         if self.cfg.metric == 'COCO' or self.cfg.metric == "SNIPERCOCO":
         if self.cfg.metric == 'COCO' or self.cfg.metric == "SNIPERCOCO":
             # TODO: bias should be unified
             # TODO: bias should be unified
-            bias = self.cfg['bias'] if 'bias' in self.cfg else 0
+            bias = 1 if self.cfg.get('bias', False) else 0
             output_eval = self.cfg['output_eval'] \
             output_eval = self.cfg['output_eval'] \
                 if 'output_eval' in self.cfg else None
                 if 'output_eval' in self.cfg else None
             save_prediction_only = self.cfg.get('save_prediction_only', False)
             save_prediction_only = self.cfg.get('save_prediction_only', False)
@@ -184,13 +235,14 @@ class Trainer(object):
 
 
             # when do validation in train, annotation file should be get from
             # when do validation in train, annotation file should be get from
             # EvalReader instead of self.dataset(which is TrainReader)
             # EvalReader instead of self.dataset(which is TrainReader)
-            anno_file = self.dataset.get_anno()
-            dataset = self.dataset
             if self.mode == 'train' and validate:
             if self.mode == 'train' and validate:
                 eval_dataset = self.cfg['EvalDataset']
                 eval_dataset = self.cfg['EvalDataset']
                 eval_dataset.check_or_download_dataset()
                 eval_dataset.check_or_download_dataset()
                 anno_file = eval_dataset.get_anno()
                 anno_file = eval_dataset.get_anno()
                 dataset = eval_dataset
                 dataset = eval_dataset
+            else:
+                dataset = self.dataset
+                anno_file = dataset.get_anno()
 
 
             IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox'
             IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox'
             if self.cfg.metric == "COCO":
             if self.cfg.metric == "COCO":
@@ -222,11 +274,7 @@ class Trainer(object):
             output_eval = self.cfg['output_eval'] \
             output_eval = self.cfg['output_eval'] \
                 if 'output_eval' in self.cfg else None
                 if 'output_eval' in self.cfg else None
             save_prediction_only = self.cfg.get('save_prediction_only', False)
             save_prediction_only = self.cfg.get('save_prediction_only', False)
-
-            # pass clsid2catid info to metric instance to avoid multiple loading
-            # annotation file
-            clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
-                                if self.mode == 'eval' else None
+            imid2path = self.cfg.get('imid2path', None)
 
 
             # when do validation in train, annotation file should be get from
             # when do validation in train, annotation file should be get from
             # EvalReader instead of self.dataset(which is TrainReader)
             # EvalReader instead of self.dataset(which is TrainReader)
@@ -239,19 +287,25 @@ class Trainer(object):
             self._metrics = [
             self._metrics = [
                 RBoxMetric(
                 RBoxMetric(
                     anno_file=anno_file,
                     anno_file=anno_file,
-                    clsid2catid=clsid2catid,
                     classwise=classwise,
                     classwise=classwise,
                     output_eval=output_eval,
                     output_eval=output_eval,
                     bias=bias,
                     bias=bias,
-                    save_prediction_only=save_prediction_only)
+                    save_prediction_only=save_prediction_only,
+                    imid2path=imid2path)
             ]
             ]
         elif self.cfg.metric == 'VOC':
         elif self.cfg.metric == 'VOC':
+            output_eval = self.cfg['output_eval'] \
+                if 'output_eval' in self.cfg else None
+            save_prediction_only = self.cfg.get('save_prediction_only', False)
+
             self._metrics = [
             self._metrics = [
                 VOCMetric(
                 VOCMetric(
                     label_list=self.dataset.get_label_list(),
                     label_list=self.dataset.get_label_list(),
                     class_num=self.cfg.num_classes,
                     class_num=self.cfg.num_classes,
                     map_type=self.cfg.map_type,
                     map_type=self.cfg.map_type,
-                    classwise=classwise)
+                    classwise=classwise,
+                    output_eval=output_eval,
+                    save_prediction_only=save_prediction_only)
             ]
             ]
         elif self.cfg.metric == 'WiderFace':
         elif self.cfg.metric == 'WiderFace':
             multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True
             multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True
@@ -334,19 +388,29 @@ class Trainer(object):
             self.start_epoch = load_weight(self.model.student_model, weights,
             self.start_epoch = load_weight(self.model.student_model, weights,
                                            self.optimizer)
                                            self.optimizer)
         else:
         else:
-            self.start_epoch = load_weight(self.model, weights, self.optimizer)
+            self.start_epoch = load_weight(self.model, weights, self.optimizer,
+                                           self.ema if self.use_ema else None)
         logger.debug("Resume weights of epoch {}".format(self.start_epoch))
         logger.debug("Resume weights of epoch {}".format(self.start_epoch))
 
 
     def train(self, validate=False):
     def train(self, validate=False):
         assert self.mode == 'train', "Model not in 'train' mode"
         assert self.mode == 'train', "Model not in 'train' mode"
         Init_mark = False
         Init_mark = False
+        if validate:
+            self.cfg['EvalDataset'] = self.cfg.EvalDataset = create(
+                "EvalDataset")()
 
 
-        sync_bn = (getattr(self.cfg, 'norm_type', None) in [None, 'sync_bn'] and
+        model = self.model
+        sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
                    self.cfg.use_gpu and self._nranks > 1)
                    self.cfg.use_gpu and self._nranks > 1)
         if sync_bn:
         if sync_bn:
-            self.model = BaseArch.convert_sync_batchnorm(self.model)
-
-        model = self.model
+            model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
+
+        # enabel auto mixed precision mode
+        if self.use_amp:
+            scaler = paddle.amp.GradScaler(
+                enable=self.cfg.use_gpu or self.cfg.use_npu,
+                init_loss_scaling=self.cfg.get('init_loss_scaling', 1024))
+        # get distributed model
         if self.cfg.get('fleet', False):
         if self.cfg.get('fleet', False):
             model = fleet.distributed_model(model)
             model = fleet.distributed_model(model)
             self.optimizer = fleet.distributed_optimizer(self.optimizer)
             self.optimizer = fleet.distributed_optimizer(self.optimizer)
@@ -354,12 +418,7 @@ class Trainer(object):
             find_unused_parameters = self.cfg[
             find_unused_parameters = self.cfg[
                 'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
                 'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
             model = paddle.DataParallel(
             model = paddle.DataParallel(
-                self.model, find_unused_parameters=find_unused_parameters)
-
-        # initial fp16
-        if self.cfg.get('fp16', False):
-            scaler = amp.GradScaler(
-                enable=self.cfg.use_gpu, init_loss_scaling=1024)
+                model, find_unused_parameters=find_unused_parameters)
 
 
         self.status.update({
         self.status.update({
             'epoch_id': self.start_epoch,
             'epoch_id': self.start_epoch,
@@ -381,6 +440,9 @@ class Trainer(object):
 
 
         self._compose_callback.on_train_begin(self.status)
         self._compose_callback.on_train_begin(self.status)
 
 
+        use_fused_allreduce_gradients = self.cfg[
+            'use_fused_allreduce_gradients'] if 'use_fused_allreduce_gradients' in self.cfg else False
+
         for epoch_id in range(self.start_epoch, self.cfg.epoch):
         for epoch_id in range(self.start_epoch, self.cfg.epoch):
             self.status['mode'] = 'train'
             self.status['mode'] = 'train'
             self.status['epoch_id'] = epoch_id
             self.status['epoch_id'] = epoch_id
@@ -395,23 +457,56 @@ class Trainer(object):
                 self._compose_callback.on_step_begin(self.status)
                 self._compose_callback.on_step_begin(self.status)
                 data['epoch_id'] = epoch_id
                 data['epoch_id'] = epoch_id
 
 
-                if self.cfg.get('fp16', False):
-                    with amp.auto_cast(enable=self.cfg.use_gpu):
-                        # model forward
-                        outputs = model(data)
-                        loss = outputs['loss']
-
-                    # model backward
-                    scaled_loss = scaler.scale(loss)
-                    scaled_loss.backward()
+                if self.use_amp:
+                    if isinstance(
+                            model, paddle.
+                            DataParallel) and use_fused_allreduce_gradients:
+                        with model.no_sync():
+                            with paddle.amp.auto_cast(
+                                    enable=self.cfg.use_gpu,
+                                    custom_white_list=self.custom_white_list,
+                                    custom_black_list=self.custom_black_list,
+                                    level=self.amp_level):
+                                # model forward
+                                outputs = model(data)
+                                loss = outputs['loss']
+                            # model backward
+                            scaled_loss = scaler.scale(loss)
+                            scaled_loss.backward()
+                        fused_allreduce_gradients(
+                            list(model.parameters()), None)
+                    else:
+                        with paddle.amp.auto_cast(
+                                enable=self.cfg.use_gpu,
+                                custom_white_list=self.custom_white_list,
+                                custom_black_list=self.custom_black_list,
+                                level=self.amp_level):
+                            # model forward
+                            outputs = model(data)
+                            loss = outputs['loss']
+                        # model backward
+                        scaled_loss = scaler.scale(loss)
+                        scaled_loss.backward()
                     # in dygraph mode, optimizer.minimize is equal to optimizer.step
                     # in dygraph mode, optimizer.minimize is equal to optimizer.step
                     scaler.minimize(self.optimizer, scaled_loss)
                     scaler.minimize(self.optimizer, scaled_loss)
                 else:
                 else:
-                    # model forward
-                    outputs = model(data)
-                    loss = outputs['loss']
-                    # model backward
-                    loss.backward()
+                    if isinstance(
+                            model, paddle.
+                            DataParallel) and use_fused_allreduce_gradients:
+                        with model.no_sync():
+                            # model forward
+                            outputs = model(data)
+                            loss = outputs['loss']
+                            # model backward
+                            loss.backward()
+                        fused_allreduce_gradients(
+                            list(model.parameters()), None)
+                    else:
+                        # model forward
+                        outputs = model(data)
+                        loss = outputs['loss']
+                        # model backward
+                        loss.backward()
                     self.optimizer.step()
                     self.optimizer.step()
                 curr_lr = self.optimizer.get_lr()
                 curr_lr = self.optimizer.get_lr()
                 self.lr.step()
                 self.lr.step()
@@ -426,21 +521,23 @@ class Trainer(object):
                 self.status['batch_time'].update(time.time() - iter_tic)
                 self.status['batch_time'].update(time.time() - iter_tic)
                 self._compose_callback.on_step_end(self.status)
                 self._compose_callback.on_step_end(self.status)
                 if self.use_ema:
                 if self.use_ema:
-                    self.ema.update(self.model)
+                    self.ema.update()
                 iter_tic = time.time()
                 iter_tic = time.time()
 
 
-            # apply ema weight on model
-            if self.use_ema:
-                weight = copy.deepcopy(self.model.state_dict())
-                self.model.set_dict(self.ema.apply())
             if self.cfg.get('unstructured_prune'):
             if self.cfg.get('unstructured_prune'):
                 self.pruner.update_params()
                 self.pruner.update_params()
 
 
+            is_snapshot = (self._nranks < 2 or self._local_rank == 0) \
+                       and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 or epoch_id == self.end_epoch - 1)
+            if is_snapshot and self.use_ema:
+                # apply ema weight on model
+                weight = copy.deepcopy(self.model.state_dict())
+                self.model.set_dict(self.ema.apply())
+                self.status['weight'] = weight
+
             self._compose_callback.on_epoch_end(self.status)
             self._compose_callback.on_epoch_end(self.status)
 
 
-            if validate and (self._nranks < 2 or self._local_rank == 0) \
-                    and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \
-                             or epoch_id == self.end_epoch - 1):
+            if validate and is_snapshot:
                 if not hasattr(self, '_eval_loader'):
                 if not hasattr(self, '_eval_loader'):
                     # build evaluation dataset and loader
                     # build evaluation dataset and loader
                     self._eval_dataset = self.cfg.EvalDataset
                     self._eval_dataset = self.cfg.EvalDataset
@@ -461,13 +558,15 @@ class Trainer(object):
                     Init_mark = True
                     Init_mark = True
                     self._init_metrics(validate=validate)
                     self._init_metrics(validate=validate)
                     self._reset_metrics()
                     self._reset_metrics()
+
                 with paddle.no_grad():
                 with paddle.no_grad():
                     self.status['save_best_model'] = True
                     self.status['save_best_model'] = True
                     self._eval_with_loader(self._eval_loader)
                     self._eval_with_loader(self._eval_loader)
 
 
-            # restore origin weight on model
-            if self.use_ema:
+            if is_snapshot and self.use_ema:
+                # reset original weight
                 self.model.set_dict(weight)
                 self.model.set_dict(weight)
+                self.status.pop('weight')
 
 
         self._compose_callback.on_train_end(self.status)
         self._compose_callback.on_train_end(self.status)
 
 
@@ -485,7 +584,15 @@ class Trainer(object):
             self.status['step_id'] = step_id
             self.status['step_id'] = step_id
             self._compose_callback.on_step_begin(self.status)
             self._compose_callback.on_step_begin(self.status)
             # forward
             # forward
-            outs = self.model(data)
+            if self.use_amp:
+                with paddle.amp.auto_cast(
+                        enable=self.cfg.use_gpu,
+                        custom_white_list=self.custom_white_list,
+                        custom_black_list=self.custom_black_list,
+                        level=self.amp_level):
+                    outs = self.model(data)
+            else:
+                outs = self.model(data)
 
 
             # update metrics
             # update metrics
             for metric in self._metrics:
             for metric in self._metrics:
@@ -513,32 +620,267 @@ class Trainer(object):
         with paddle.no_grad():
         with paddle.no_grad():
             self._eval_with_loader(self.loader)
             self._eval_with_loader(self.loader)
 
 
+    def _eval_with_loader_slice(self,
+                                loader,
+                                slice_size=[640, 640],
+                                overlap_ratio=[0.25, 0.25],
+                                combine_method='nms',
+                                match_threshold=0.6,
+                                match_metric='iou'):
+        sample_num = 0
+        tic = time.time()
+        self._compose_callback.on_epoch_begin(self.status)
+        self.status['mode'] = 'eval'
+        self.model.eval()
+        if self.cfg.get('print_flops', False):
+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
+                self.dataset, self.cfg.worker_num, self._eval_batch_sampler)
+            self._flops(flops_loader)
+
+        merged_bboxs = []
+        for step_id, data in enumerate(loader):
+            self.status['step_id'] = step_id
+            self._compose_callback.on_step_begin(self.status)
+            # forward
+            if self.use_amp:
+                with paddle.amp.auto_cast(
+                        enable=self.cfg.use_gpu,
+                        custom_white_list=self.custom_white_list,
+                        custom_black_list=self.custom_black_list,
+                        level=self.amp_level):
+                    outs = self.model(data)
+            else:
+                outs = self.model(data)
+
+            shift_amount = data['st_pix']
+            outs['bbox'][:, 2:4] = outs['bbox'][:, 2:4] + shift_amount
+            outs['bbox'][:, 4:6] = outs['bbox'][:, 4:6] + shift_amount
+            merged_bboxs.append(outs['bbox'])
+
+            if data['is_last'] > 0:
+                # merge matching predictions
+                merged_results = {'bbox': []}
+                if combine_method == 'nms':
+                    final_boxes = multiclass_nms(
+                        np.concatenate(merged_bboxs), self.cfg.num_classes,
+                        match_threshold, match_metric)
+                    merged_results['bbox'] = np.concatenate(final_boxes)
+                elif combine_method == 'concat':
+                    merged_results['bbox'] = np.concatenate(merged_bboxs)
+                else:
+                    raise ValueError(
+                        "Now only support 'nms' or 'concat' to fuse detection results."
+                    )
+                merged_results['im_id'] = np.array([[0]])
+                merged_results['bbox_num'] = np.array(
+                    [len(merged_results['bbox'])])
+
+                merged_bboxs = []
+                data['im_id'] = data['ori_im_id']
+                # update metrics
+                for metric in self._metrics:
+                    metric.update(data, merged_results)
+
+                # multi-scale inputs: all inputs have same im_id
+                if isinstance(data, typing.Sequence):
+                    sample_num += data[0]['im_id'].numpy().shape[0]
+                else:
+                    sample_num += data['im_id'].numpy().shape[0]
+
+            self._compose_callback.on_step_end(self.status)
+
+        self.status['sample_num'] = sample_num
+        self.status['cost_time'] = time.time() - tic
+
+        # accumulate metric to log out
+        for metric in self._metrics:
+            metric.accumulate()
+            metric.log()
+        self._compose_callback.on_epoch_end(self.status)
+        # reset metric states for metric may performed multiple times
+        self._reset_metrics()
+
+    def evaluate_slice(self,
+                       slice_size=[640, 640],
+                       overlap_ratio=[0.25, 0.25],
+                       combine_method='nms',
+                       match_threshold=0.6,
+                       match_metric='iou'):
+        with paddle.no_grad():
+            self._eval_with_loader_slice(self.loader, slice_size, overlap_ratio,
+                                         combine_method, match_threshold,
+                                         match_metric)
+
+    def slice_predict(self,
+                      images,
+                      slice_size=[640, 640],
+                      overlap_ratio=[0.25, 0.25],
+                      combine_method='nms',
+                      match_threshold=0.6,
+                      match_metric='iou',
+                      draw_threshold=0.5,
+                      output_dir='output',
+                      save_results=False,
+                      visualize=True):
+        self.dataset.set_slice_images(images, slice_size, overlap_ratio)
+        loader = create('TestReader')(self.dataset, 0)
+
+        imid2path = self.dataset.get_imid2path()
+
+        anno_file = self.dataset.get_anno()
+        clsid2catid, catid2name = get_categories(
+            self.cfg.metric, anno_file=anno_file)
+
+        # Run Infer 
+        self.status['mode'] = 'test'
+        self.model.eval()
+        if self.cfg.get('print_flops', False):
+            flops_loader = create('TestReader')(self.dataset, 0)
+            self._flops(flops_loader)
+
+        results = []  # all images
+        merged_bboxs = []  # single image
+        for step_id, data in enumerate(tqdm(loader)):
+            self.status['step_id'] = step_id
+            # forward
+            outs = self.model(data)
+
+            outs['bbox'] = outs['bbox'].numpy()  # only in test mode
+            shift_amount = data['st_pix']
+            outs['bbox'][:, 2:4] = outs['bbox'][:, 2:4] + shift_amount.numpy()
+            outs['bbox'][:, 4:6] = outs['bbox'][:, 4:6] + shift_amount.numpy()
+            merged_bboxs.append(outs['bbox'])
+
+            if data['is_last'] > 0:
+                # merge matching predictions
+                merged_results = {'bbox': []}
+                if combine_method == 'nms':
+                    final_boxes = multiclass_nms(
+                        np.concatenate(merged_bboxs), self.cfg.num_classes,
+                        match_threshold, match_metric)
+                    merged_results['bbox'] = np.concatenate(final_boxes)
+                elif combine_method == 'concat':
+                    merged_results['bbox'] = np.concatenate(merged_bboxs)
+                else:
+                    raise ValueError(
+                        "Now only support 'nms' or 'concat' to fuse detection results."
+                    )
+                merged_results['im_id'] = np.array([[0]])
+                merged_results['bbox_num'] = np.array(
+                    [len(merged_results['bbox'])])
+
+                merged_bboxs = []
+                data['im_id'] = data['ori_im_id']
+
+                for key in ['im_shape', 'scale_factor', 'im_id']:
+                    if isinstance(data, typing.Sequence):
+                        merged_results[key] = data[0][key]
+                    else:
+                        merged_results[key] = data[key]
+                for key, value in merged_results.items():
+                    if hasattr(value, 'numpy'):
+                        merged_results[key] = value.numpy()
+                results.append(merged_results)
+
+        if visualize:
+            for outs in results:
+                batch_res = get_infer_results(outs, clsid2catid)
+                bbox_num = outs['bbox_num']
+                start = 0
+                for i, im_id in enumerate(outs['im_id']):
+                    image_path = imid2path[int(im_id)]
+                    image = Image.open(image_path).convert('RGB')
+                    image = ImageOps.exif_transpose(image)
+                    self.status['original_image'] = np.array(image.copy())
+                    end = start + bbox_num[i]
+                    bbox_res = batch_res['bbox'][start:end] \
+                            if 'bbox' in batch_res else None
+                    mask_res, segm_res, keypoint_res = None, None, None
+                    image = visualize_results(
+                        image, bbox_res, mask_res, segm_res, keypoint_res,
+                        int(im_id), catid2name, draw_threshold)
+                    self.status['result_image'] = np.array(image.copy())
+                    if self._compose_callback:
+                        self._compose_callback.on_step_end(self.status)
+                    # save image with detection
+                    save_name = self._get_save_image_name(output_dir,
+                                                          image_path)
+                    logger.info("Detection bbox results save in {}".format(
+                        save_name))
+                    image.save(save_name, quality=95)
+                    start = end
+
     def predict(self,
     def predict(self,
                 images,
                 images,
                 draw_threshold=0.5,
                 draw_threshold=0.5,
                 output_dir='output',
                 output_dir='output',
-                save_txt=False):
+                save_results=False,
+                visualize=True):
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
         self.dataset.set_images(images)
         self.dataset.set_images(images)
         loader = create('TestReader')(self.dataset, 0)
         loader = create('TestReader')(self.dataset, 0)
 
 
         imid2path = self.dataset.get_imid2path()
         imid2path = self.dataset.get_imid2path()
 
 
+        def setup_metrics_for_loader():
+            # mem
+            metrics = copy.deepcopy(self._metrics)
+            mode = self.mode
+            save_prediction_only = self.cfg[
+                'save_prediction_only'] if 'save_prediction_only' in self.cfg else None
+            output_eval = self.cfg[
+                'output_eval'] if 'output_eval' in self.cfg else None
+
+            # modify
+            self.mode = '_test'
+            self.cfg['save_prediction_only'] = True
+            self.cfg['output_eval'] = output_dir
+            self.cfg['imid2path'] = imid2path
+            self._init_metrics()
+
+            # restore
+            self.mode = mode
+            self.cfg.pop('save_prediction_only')
+            if save_prediction_only is not None:
+                self.cfg['save_prediction_only'] = save_prediction_only
+
+            self.cfg.pop('output_eval')
+            if output_eval is not None:
+                self.cfg['output_eval'] = output_eval
+
+            self.cfg.pop('imid2path')
+
+            _metrics = copy.deepcopy(self._metrics)
+            self._metrics = metrics
+
+            return _metrics
+
+        if save_results:
+            metrics = setup_metrics_for_loader()
+        else:
+            metrics = []
+
         anno_file = self.dataset.get_anno()
         anno_file = self.dataset.get_anno()
         clsid2catid, catid2name = get_categories(
         clsid2catid, catid2name = get_categories(
             self.cfg.metric, anno_file=anno_file)
             self.cfg.metric, anno_file=anno_file)
 
 
-        # Run Infer
+        # Run Infer 
         self.status['mode'] = 'test'
         self.status['mode'] = 'test'
         self.model.eval()
         self.model.eval()
         if self.cfg.get('print_flops', False):
         if self.cfg.get('print_flops', False):
             flops_loader = create('TestReader')(self.dataset, 0)
             flops_loader = create('TestReader')(self.dataset, 0)
             self._flops(flops_loader)
             self._flops(flops_loader)
         results = []
         results = []
-        for step_id, data in enumerate(loader):
+        for step_id, data in enumerate(tqdm(loader)):
             self.status['step_id'] = step_id
             self.status['step_id'] = step_id
             # forward
             # forward
             outs = self.model(data)
             outs = self.model(data)
 
 
+            for _m in metrics:
+                _m.update(data, outs)
+
             for key in ['im_shape', 'scale_factor', 'im_id']:
             for key in ['im_shape', 'scale_factor', 'im_id']:
                 if isinstance(data, typing.Sequence):
                 if isinstance(data, typing.Sequence):
                     outs[key] = data[0][key]
                     outs[key] = data[0][key]
@@ -548,64 +890,64 @@ class Trainer(object):
                 if hasattr(value, 'numpy'):
                 if hasattr(value, 'numpy'):
                     outs[key] = value.numpy()
                     outs[key] = value.numpy()
             results.append(outs)
             results.append(outs)
+
         # sniper
         # sniper
         if type(self.dataset) == SniperCOCODataSet:
         if type(self.dataset) == SniperCOCODataSet:
             results = self.dataset.anno_cropper.aggregate_chips_detections(
             results = self.dataset.anno_cropper.aggregate_chips_detections(
                 results)
                 results)
 
 
-        for outs in results:
-            batch_res = get_infer_results(outs, clsid2catid)
-            bbox_num = outs['bbox_num']
-
-            start = 0
-            for i, im_id in enumerate(outs['im_id']):
-                image_path = imid2path[int(im_id)]
-                image = Image.open(image_path).convert('RGB')
-                image = ImageOps.exif_transpose(image)
-                self.status['original_image'] = np.array(image.copy())
-
-                end = start + bbox_num[i]
-                bbox_res = batch_res['bbox'][start:end] \
-                        if 'bbox' in batch_res else None
-                mask_res = batch_res['mask'][start:end] \
-                        if 'mask' in batch_res else None
-                segm_res = batch_res['segm'][start:end] \
-                        if 'segm' in batch_res else None
-                keypoint_res = batch_res['keypoint'][start:end] \
-                        if 'keypoint' in batch_res else None
-                image = visualize_results(
-                    image, bbox_res, mask_res, segm_res, keypoint_res,
-                    int(im_id), catid2name, draw_threshold)
-                self.status['result_image'] = np.array(image.copy())
-                if self._compose_callback:
-                    self._compose_callback.on_step_end(self.status)
-                # save image with detection
-                save_name = self._get_save_image_name(output_dir, image_path)
-                logger.info("Detection bbox results save in {}".format(
-                    save_name))
-                image.save(save_name, quality=95)
-                if save_txt:
-                    save_path = os.path.splitext(save_name)[0] + '.txt'
-                    results = {}
-                    results["im_id"] = im_id
-                    if bbox_res:
-                        results["bbox_res"] = bbox_res
-                    if keypoint_res:
-                        results["keypoint_res"] = keypoint_res
-                    save_result(save_path, results, catid2name, draw_threshold)
-                start = end
+        for _m in metrics:
+            _m.accumulate()
+            _m.reset()
+
+        if visualize:
+            for outs in results:
+                batch_res = get_infer_results(outs, clsid2catid)
+                bbox_num = outs['bbox_num']
+
+                start = 0
+                for i, im_id in enumerate(outs['im_id']):
+                    image_path = imid2path[int(im_id)]
+                    image = Image.open(image_path).convert('RGB')
+                    image = ImageOps.exif_transpose(image)
+                    self.status['original_image'] = np.array(image.copy())
+
+                    end = start + bbox_num[i]
+                    bbox_res = batch_res['bbox'][start:end] \
+                            if 'bbox' in batch_res else None
+                    mask_res = batch_res['mask'][start:end] \
+                            if 'mask' in batch_res else None
+                    segm_res = batch_res['segm'][start:end] \
+                            if 'segm' in batch_res else None
+                    keypoint_res = batch_res['keypoint'][start:end] \
+                            if 'keypoint' in batch_res else None
+                    image = visualize_results(
+                        image, bbox_res, mask_res, segm_res, keypoint_res,
+                        int(im_id), catid2name, draw_threshold)
+                    self.status['result_image'] = np.array(image.copy())
+                    if self._compose_callback:
+                        self._compose_callback.on_step_end(self.status)
+                    # save image with detection
+                    save_name = self._get_save_image_name(output_dir,
+                                                          image_path)
+                    logger.info("Detection bbox results save in {}".format(
+                        save_name))
+                    image.save(save_name, quality=95)
+
+                    start = end
 
 
     def _get_save_image_name(self, output_dir, image_path):
     def _get_save_image_name(self, output_dir, image_path):
         """
         """
         Get save image name from source image path.
         Get save image name from source image path.
         """
         """
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
         image_name = os.path.split(image_path)[-1]
         image_name = os.path.split(image_path)[-1]
         name, ext = os.path.splitext(image_name)
         name, ext = os.path.splitext(image_name)
         return os.path.join(output_dir, "{}".format(name)) + ext
         return os.path.join(output_dir, "{}".format(name)) + ext
 
 
-    def _get_infer_cfg_and_input_spec(self, save_dir, prune_input=True):
+    def _get_infer_cfg_and_input_spec(self,
+                                      save_dir,
+                                      prune_input=True,
+                                      kl_quant=False):
         image_shape = None
         image_shape = None
         im_shape = [None, 2]
         im_shape = [None, 2]
         scale_factor = [None, 2]
         scale_factor = [None, 2]
@@ -628,9 +970,27 @@ class Trainer(object):
 
 
         if hasattr(self.model, 'deploy'):
         if hasattr(self.model, 'deploy'):
             self.model.deploy = True
             self.model.deploy = True
+
+        if 'slim' not in self.cfg:
+            for layer in self.model.sublayers():
+                if hasattr(layer, 'convert_to_deploy'):
+                    layer.convert_to_deploy()
+
+        export_post_process = self.cfg['export'].get(
+            'post_process', False) if hasattr(self.cfg, 'export') else True
+        export_nms = self.cfg['export'].get('nms', False) if hasattr(
+            self.cfg, 'export') else True
+        export_benchmark = self.cfg['export'].get(
+            'benchmark', False) if hasattr(self.cfg, 'export') else False
         if hasattr(self.model, 'fuse_norm'):
         if hasattr(self.model, 'fuse_norm'):
             self.model.fuse_norm = self.cfg['TestReader'].get('fuse_normalize',
             self.model.fuse_norm = self.cfg['TestReader'].get('fuse_normalize',
                                                               False)
                                                               False)
+        if hasattr(self.model, 'export_post_process'):
+            self.model.export_post_process = export_post_process if not export_benchmark else False
+        if hasattr(self.model, 'export_nms'):
+            self.model.export_nms = export_nms if not export_benchmark else False
+        if export_post_process and not export_benchmark:
+            image_shape = [None] + image_shape[1:]
 
 
         # Save infer cfg
         # Save infer cfg
         _dump_infer_config(self.cfg,
         _dump_infer_config(self.cfg,
@@ -663,16 +1023,34 @@ class Trainer(object):
             pruned_input_spec = input_spec
             pruned_input_spec = input_spec
 
 
         # TODO: Hard code, delete it when support prune input_spec.
         # TODO: Hard code, delete it when support prune input_spec.
-        if self.cfg.architecture == 'PicoDet':
+        if self.cfg.architecture == 'PicoDet' and not export_post_process:
             pruned_input_spec = [{
             pruned_input_spec = [{
                 "image": InputSpec(
                 "image": InputSpec(
                     shape=image_shape, name='image')
                     shape=image_shape, name='image')
             }]
             }]
+        if kl_quant:
+            if self.cfg.architecture == 'PicoDet' or 'ppyoloe' in self.cfg.weights:
+                pruned_input_spec = [{
+                    "image": InputSpec(
+                        shape=image_shape, name='image'),
+                    "scale_factor": InputSpec(
+                        shape=scale_factor, name='scale_factor')
+                }]
+            elif 'tinypose' in self.cfg.weights:
+                pruned_input_spec = [{
+                    "image": InputSpec(
+                        shape=image_shape, name='image')
+                }]
 
 
         return static_model, pruned_input_spec
         return static_model, pruned_input_spec
 
 
     def export(self, output_dir='output_inference'):
     def export(self, output_dir='output_inference'):
         self.model.eval()
         self.model.eval()
+
+        if hasattr(self.cfg, 'export') and 'fuse_conv_bn' in self.cfg[
+                'export'] and self.cfg['export']['fuse_conv_bn']:
+            self.model = fuse_conv_bn(self.model)
+
         model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
         model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
         save_dir = os.path.join(output_dir, model_name)
         save_dir = os.path.join(output_dir, model_name)
         if not os.path.exists(save_dir):
         if not os.path.exists(save_dir):
@@ -682,7 +1060,7 @@ class Trainer(object):
             save_dir)
             save_dir)
 
 
         # dy2st and save model
         # dy2st and save model
-        if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
+        if 'slim' not in self.cfg or 'QAT' not in self.cfg['slim_type']:
             paddle.jit.save(
             paddle.jit.save(
                 static_model,
                 static_model,
                 os.path.join(save_dir, 'model'),
                 os.path.join(save_dir, 'model'),
@@ -706,8 +1084,9 @@ class Trainer(object):
                 break
                 break
 
 
         # TODO: support prune input_spec
         # TODO: support prune input_spec
+        kl_quant = True if hasattr(self.cfg.slim, 'ptq') else False
         _, pruned_input_spec = self._get_infer_cfg_and_input_spec(
         _, pruned_input_spec = self._get_infer_cfg_and_input_spec(
-            save_dir, prune_input=False)
+            save_dir, prune_input=False, kl_quant=kl_quant)
 
 
         self.cfg.slim.save_quantized_model(
         self.cfg.slim.save_quantized_model(
             self.model,
             self.model,
@@ -739,3 +1118,29 @@ class Trainer(object):
         flops = flops(self.model, input_spec) / (1000**3)
         flops = flops(self.model, input_spec) / (1000**3)
         logger.info(" Model FLOPs : {:.6f}G. (image shape is {})".format(
         logger.info(" Model FLOPs : {:.6f}G. (image shape is {})".format(
             flops, input_data['image'][0].unsqueeze(0).shape))
             flops, input_data['image'][0].unsqueeze(0).shape))
+
+    def parse_mot_images(self, cfg):
+        import glob
+        # for quant
+        dataset_dir = cfg['EvalMOTDataset'].dataset_dir
+        data_root = cfg['EvalMOTDataset'].data_root
+        data_root = '{}/{}'.format(dataset_dir, data_root)
+        seqs = os.listdir(data_root)
+        seqs.sort()
+        all_images = []
+        for seq in seqs:
+            infer_dir = os.path.join(data_root, seq)
+            assert infer_dir is None or os.path.isdir(infer_dir), \
+                "{} is not a directory".format(infer_dir)
+            images = set()
+            exts = ['jpg', 'jpeg', 'png', 'bmp']
+            exts += [ext.upper() for ext in exts]
+            for ext in exts:
+                images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
+            images = list(images)
+            images.sort()
+            assert len(images) > 0, "no image found in {}".format(infer_dir)
+            all_images.extend(images)
+            logger.info("Found {} inference images in total.".format(
+                len(images)))
+        return all_images

+ 35 - 0
paddlers/models/ppdet/ext_op/README.md

@@ -0,0 +1,35 @@
+# 自定义OP编译
+旋转框IOU计算OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/custom_op/new_cpp_op_cn.html) 。
+
+## 1. 环境依赖
+- Paddle >= 2.0.1
+- gcc 8.2
+
+## 2. 安装
+```
+python setup.py install
+```
+
+编译完成后即可使用,以下为`rbox_iou`的使用示例
+```
+# 引入自定义op
+from ext_op import rbox_iou
+
+paddle.set_device('gpu:0')
+paddle.disable_static()
+
+rbox1 = np.random.rand(13000, 5)
+rbox2 = np.random.rand(7, 5)
+
+pd_rbox1 = paddle.to_tensor(rbox1)
+pd_rbox2 = paddle.to_tensor(rbox2)
+
+iou = rbox_iou(pd_rbox1, pd_rbox2)
+print('iou', iou)
+```
+
+## 3. 单元测试
+可以通过执行单元测试来确认自定义算子功能的正确性,执行单元测试的示例如下所示:
+```
+python unittest/test_matched_rbox_iou.py
+```

+ 90 - 0
paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc

@@ -0,0 +1,90 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#include "paddle/extension.h"
+#include "rbox_iou_op.h"
+
+template <typename T>
+void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr,
+                            const T *rbox2_data_ptr, T *output_data_ptr) {
+
+  int i;
+  for (i = 0; i < rbox_num; i++) {
+    output_data_ptr[i] =
+        rbox_iou_single<T>(rbox1_data_ptr + i * 5, rbox2_data_ptr + i * 5);
+  }
+}
+
+#define CHECK_INPUT_CPU(x)                                                     \
+  PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+
+std::vector<paddle::Tensor> MatchedRboxIouCPUForward(const paddle::Tensor &rbox1,
+                                                 const paddle::Tensor &rbox2) {
+  CHECK_INPUT_CPU(rbox1);
+  CHECK_INPUT_CPU(rbox2);
+  PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
+
+  auto rbox_num = rbox1.shape()[0];
+  auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox_num});
+
+  PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rotated_iou_cpu_kernel", ([&] {
+                               matched_rbox_iou_cpu_kernel<data_t>(
+                                   rbox_num, rbox1.data<data_t>(),
+                                   rbox2.data<data_t>(),
+                                   output.mutable_data<data_t>());
+                             }));
+
+  return {output};
+}
+
+#ifdef PADDLE_WITH_CUDA
+std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
+                                                  const paddle::Tensor &rbox2);
+#endif
+
+#define CHECK_INPUT_SAME(x1, x2)                                               \
+  PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
+
+std::vector<paddle::Tensor> MatchedRboxIouForward(const paddle::Tensor &rbox1,
+                                              const paddle::Tensor &rbox2) {
+  CHECK_INPUT_SAME(rbox1, rbox2);
+  if (rbox1.place() == paddle::PlaceType::kCPU) {
+    return MatchedRboxIouCPUForward(rbox1, rbox2);
+#ifdef PADDLE_WITH_CUDA
+  } else if (rbox1.place() == paddle::PlaceType::kGPU) {
+    return MatchedRboxIouCUDAForward(rbox1, rbox2);
+#endif
+  }
+}
+
+std::vector<std::vector<int64_t>>
+MatchedRboxIouInferShape(std::vector<int64_t> rbox1_shape,
+                     std::vector<int64_t> rbox2_shape) {
+  return {{rbox1_shape[0]}};
+}
+
+std::vector<paddle::DataType> MatchedRboxIouInferDtype(paddle::DataType t1,
+                                                   paddle::DataType t2) {
+  return {t1};
+}
+
+PD_BUILD_OP(matched_rbox_iou)
+    .Inputs({"RBOX1", "RBOX2"})
+    .Outputs({"Output"})
+    .SetKernelFn(PD_KERNEL(MatchedRboxIouForward))
+    .SetInferShapeFn(PD_INFER_SHAPE(MatchedRboxIouInferShape))
+    .SetInferDtypeFn(PD_INFER_DTYPE(MatchedRboxIouInferDtype));

+ 63 - 0
paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu

@@ -0,0 +1,63 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#include "paddle/extension.h"
+#include "rbox_iou_op.h"
+
+/**
+   Computes ceil(a / b)
+*/
+
+static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
+
+template <typename T>
+__global__ void
+matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr,
+                        const T *rbox2_data_ptr, T *output_data_ptr) {
+  for (int tid = blockIdx.x * blockDim.x + threadIdx.x; tid < rbox_num;
+       tid += blockDim.x * gridDim.x) {
+    output_data_ptr[tid] =
+        rbox_iou_single<T>(rbox1_data_ptr + tid * 5, rbox2_data_ptr + tid * 5);
+  }
+}
+
+#define CHECK_INPUT_GPU(x)                                                     \
+  PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
+
+std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
+                                                  const paddle::Tensor &rbox2) {
+  CHECK_INPUT_GPU(rbox1);
+  CHECK_INPUT_GPU(rbox2);
+  PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
+
+  auto rbox_num = rbox1.shape()[0];
+
+  auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox_num});
+
+  const int thread_per_block = 512;
+  const int block_per_grid = CeilDiv(rbox_num, thread_per_block);
+
+  PD_DISPATCH_FLOATING_TYPES(
+      rbox1.type(), "matched_rbox_iou_cuda_kernel", ([&] {
+        matched_rbox_iou_cuda_kernel<
+            data_t><<<block_per_grid, thread_per_block, 0, rbox1.stream()>>>(
+            rbox_num, rbox1.data<data_t>(), rbox2.data<data_t>(),
+            output.mutable_data<data_t>());
+      }));
+
+  return {output};
+}

+ 97 - 0
paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc

@@ -0,0 +1,97 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#include "rbox_iou_op.h"
+#include "paddle/extension.h"
+
+
+template <typename T>
+void rbox_iou_cpu_kernel(
+    const int rbox1_num,
+    const int rbox2_num,
+    const T* rbox1_data_ptr,
+    const T* rbox2_data_ptr,
+    T* output_data_ptr) {
+
+    int i, j;
+    for (i = 0; i < rbox1_num; i++) {
+        for (j = 0; j < rbox2_num; j++) {
+		int offset = i * rbox2_num + j;
+		output_data_ptr[offset] = rbox_iou_single<T>(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5);
+        }
+    }
+}
+
+
+#define CHECK_INPUT_CPU(x) PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+
+std::vector<paddle::Tensor> RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
+    CHECK_INPUT_CPU(rbox1);
+    CHECK_INPUT_CPU(rbox2);
+
+    auto rbox1_num = rbox1.shape()[0];
+    auto rbox2_num = rbox2.shape()[0];
+
+    auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox1_num, rbox2_num});
+
+    PD_DISPATCH_FLOATING_TYPES(
+        rbox1.type(),
+        "rbox_iou_cpu_kernel",
+        ([&] {
+            rbox_iou_cpu_kernel<data_t>(
+                rbox1_num,
+                rbox2_num,
+                rbox1.data<data_t>(),
+                rbox2.data<data_t>(),
+                output.mutable_data<data_t>());
+        }));
+    
+    return {output};
+}
+
+
+#ifdef PADDLE_WITH_CUDA
+std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2);
+#endif
+
+
+#define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
+
+std::vector<paddle::Tensor> RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
+    CHECK_INPUT_SAME(rbox1, rbox2);
+    if (rbox1.place() == paddle::PlaceType::kCPU) {
+        return RboxIouCPUForward(rbox1, rbox2);
+#ifdef PADDLE_WITH_CUDA
+    } else if (rbox1.place() == paddle::PlaceType::kGPU) {
+        return RboxIouCUDAForward(rbox1, rbox2);
+#endif
+    }
+}
+
+std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> rbox1_shape, std::vector<int64_t> rbox2_shape) {
+    return {{rbox1_shape[0], rbox2_shape[0]}};
+}
+
+std::vector<paddle::DataType> InferDtype(paddle::DataType t1, paddle::DataType t2) {
+    return {t1};
+}
+
+PD_BUILD_OP(rbox_iou)
+    .Inputs({"RBOX1", "RBOX2"})
+    .Outputs({"Output"})
+    .SetKernelFn(PD_KERNEL(RboxIouForward))
+    .SetInferShapeFn(PD_INFER_SHAPE(InferShape))
+    .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));

+ 114 - 0
paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu

@@ -0,0 +1,114 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#include "paddle/extension.h"
+#include "rbox_iou_op.h"
+
+// 2D block with 32 * 16 = 512 threads per block
+const int BLOCK_DIM_X = 32;
+const int BLOCK_DIM_Y = 16;
+
+/**
+   Computes ceil(a / b)
+*/
+
+static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
+
+template <typename T>
+__global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num,
+                                     const T *rbox1_data_ptr,
+                                     const T *rbox2_data_ptr,
+                                     T *output_data_ptr) {
+
+  // get row_start and col_start
+  const int rbox1_block_idx = blockIdx.x * blockDim.x;
+  const int rbox2_block_idx = blockIdx.y * blockDim.y;
+
+  const int rbox1_thread_num = min(rbox1_num - rbox1_block_idx, blockDim.x);
+  const int rbox2_thread_num = min(rbox2_num - rbox2_block_idx, blockDim.y);
+
+  __shared__ T block_boxes1[BLOCK_DIM_X * 5];
+  __shared__ T block_boxes2[BLOCK_DIM_Y * 5];
+
+  // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y
+  if (threadIdx.x < rbox1_thread_num && threadIdx.y == 0) {
+    block_boxes1[threadIdx.x * 5 + 0] =
+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 0];
+    block_boxes1[threadIdx.x * 5 + 1] =
+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 1];
+    block_boxes1[threadIdx.x * 5 + 2] =
+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 2];
+    block_boxes1[threadIdx.x * 5 + 3] =
+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 3];
+    block_boxes1[threadIdx.x * 5 + 4] =
+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 4];
+  }
+
+  // threadIdx.x < BLOCK_DIM_Y=rbox2_thread_num, just use same condition as
+  // above: threadIdx.y == 0
+  if (threadIdx.x < rbox2_thread_num && threadIdx.y == 0) {
+    block_boxes2[threadIdx.x * 5 + 0] =
+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 0];
+    block_boxes2[threadIdx.x * 5 + 1] =
+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 1];
+    block_boxes2[threadIdx.x * 5 + 2] =
+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 2];
+    block_boxes2[threadIdx.x * 5 + 3] =
+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 3];
+    block_boxes2[threadIdx.x * 5 + 4] =
+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 4];
+  }
+
+  // sync
+  __syncthreads();
+
+  if (threadIdx.x < rbox1_thread_num && threadIdx.y < rbox2_thread_num) {
+    int offset = (rbox1_block_idx + threadIdx.x) * rbox2_num + rbox2_block_idx +
+                 threadIdx.y;
+    output_data_ptr[offset] = rbox_iou_single<T>(
+        block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5);
+  }
+}
+
+#define CHECK_INPUT_GPU(x)                                                     \
+  PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
+
+std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor &rbox1,
+                                               const paddle::Tensor &rbox2) {
+  CHECK_INPUT_GPU(rbox1);
+  CHECK_INPUT_GPU(rbox2);
+
+  auto rbox1_num = rbox1.shape()[0];
+  auto rbox2_num = rbox2.shape()[0];
+
+  auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox1_num, rbox2_num});
+
+  const int blocks_x = CeilDiv(rbox1_num, BLOCK_DIM_X);
+  const int blocks_y = CeilDiv(rbox2_num, BLOCK_DIM_Y);
+
+  dim3 blocks(blocks_x, blocks_y);
+  dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
+
+  PD_DISPATCH_FLOATING_TYPES(
+      rbox1.type(), "rbox_iou_cuda_kernel", ([&] {
+        rbox_iou_cuda_kernel<data_t><<<blocks, threads, 0, rbox1.stream()>>>(
+            rbox1_num, rbox2_num, rbox1.data<data_t>(), rbox2.data<data_t>(),
+            output.mutable_data<data_t>());
+      }));
+
+  return {output};
+}

+ 348 - 0
paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h

@@ -0,0 +1,348 @@
+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The code is based on
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
+
+#pragma once
+
+#include <cassert>
+#include <cmath>
+#include <vector>
+
+#ifdef __CUDACC__
+// Designates functions callable from the host (CPU) and the device (GPU)
+#define HOST_DEVICE __host__ __device__
+#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
+#else
+#include <algorithm>
+#define HOST_DEVICE
+#define HOST_DEVICE_INLINE HOST_DEVICE inline
+#endif
+
+namespace {
+
+template <typename T> struct RotatedBox { T x_ctr, y_ctr, w, h, a; };
+
+template <typename T> struct Point {
+  T x, y;
+  HOST_DEVICE_INLINE Point(const T &px = 0, const T &py = 0) : x(px), y(py) {}
+  HOST_DEVICE_INLINE Point operator+(const Point &p) const {
+    return Point(x + p.x, y + p.y);
+  }
+  HOST_DEVICE_INLINE Point &operator+=(const Point &p) {
+    x += p.x;
+    y += p.y;
+    return *this;
+  }
+  HOST_DEVICE_INLINE Point operator-(const Point &p) const {
+    return Point(x - p.x, y - p.y);
+  }
+  HOST_DEVICE_INLINE Point operator*(const T coeff) const {
+    return Point(x * coeff, y * coeff);
+  }
+};
+
+template <typename T>
+HOST_DEVICE_INLINE T dot_2d(const Point<T> &A, const Point<T> &B) {
+  return A.x * B.x + A.y * B.y;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE T cross_2d(const Point<T> &A, const Point<T> &B) {
+  return A.x * B.y - B.x * A.y;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE void get_rotated_vertices(const RotatedBox<T> &box,
+                                             Point<T> (&pts)[4]) {
+  // M_PI / 180. == 0.01745329251
+  // double theta = box.a * 0.01745329251;
+  // MODIFIED
+  double theta = box.a;
+  T cosTheta2 = (T)cos(theta) * 0.5f;
+  T sinTheta2 = (T)sin(theta) * 0.5f;
+
+  // y: top --> down; x: left --> right
+  pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w;
+  pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
+  pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w;
+  pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
+  pts[2].x = 2 * box.x_ctr - pts[0].x;
+  pts[2].y = 2 * box.y_ctr - pts[0].y;
+  pts[3].x = 2 * box.x_ctr - pts[1].x;
+  pts[3].y = 2 * box.y_ctr - pts[1].y;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE int get_intersection_points(const Point<T> (&pts1)[4],
+                                               const Point<T> (&pts2)[4],
+                                               Point<T> (&intersections)[24]) {
+  // Line vector
+  // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
+  Point<T> vec1[4], vec2[4];
+  for (int i = 0; i < 4; i++) {
+    vec1[i] = pts1[(i + 1) % 4] - pts1[i];
+    vec2[i] = pts2[(i + 1) % 4] - pts2[i];
+  }
+
+  // Line test - test all line combos for intersection
+  int num = 0; // number of intersections
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      // Solve for 2x2 Ax=b
+      T det = cross_2d<T>(vec2[j], vec1[i]);
+
+      // This takes care of parallel lines
+      if (fabs(det) <= 1e-14) {
+        continue;
+      }
+
+      auto vec12 = pts2[j] - pts1[i];
+
+      T t1 = cross_2d<T>(vec2[j], vec12) / det;
+      T t2 = cross_2d<T>(vec1[i], vec12) / det;
+
+      if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
+        intersections[num++] = pts1[i] + vec1[i] * t1;
+      }
+    }
+  }
+
+  // Check for vertices of rect1 inside rect2
+  {
+    const auto &AB = vec2[0];
+    const auto &DA = vec2[3];
+    auto ABdotAB = dot_2d<T>(AB, AB);
+    auto ADdotAD = dot_2d<T>(DA, DA);
+    for (int i = 0; i < 4; i++) {
+      // assume ABCD is the rectangle, and P is the point to be judged
+      // P is inside ABCD iff. P's projection on AB lies within AB
+      // and P's projection on AD lies within AD
+
+      auto AP = pts1[i] - pts2[0];
+
+      auto APdotAB = dot_2d<T>(AP, AB);
+      auto APdotAD = -dot_2d<T>(AP, DA);
+
+      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+          (APdotAD <= ADdotAD)) {
+        intersections[num++] = pts1[i];
+      }
+    }
+  }
+
+  // Reverse the check - check for vertices of rect2 inside rect1
+  {
+    const auto &AB = vec1[0];
+    const auto &DA = vec1[3];
+    auto ABdotAB = dot_2d<T>(AB, AB);
+    auto ADdotAD = dot_2d<T>(DA, DA);
+    for (int i = 0; i < 4; i++) {
+      auto AP = pts2[i] - pts1[0];
+
+      auto APdotAB = dot_2d<T>(AP, AB);
+      auto APdotAD = -dot_2d<T>(AP, DA);
+
+      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+          (APdotAD <= ADdotAD)) {
+        intersections[num++] = pts2[i];
+      }
+    }
+  }
+
+  return num;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24],
+                                          const int &num_in, Point<T> (&q)[24],
+                                          bool shift_to_zero = false) {
+  assert(num_in >= 2);
+
+  // Step 1:
+  // Find point with minimum y
+  // if more than 1 points have the same minimum y,
+  // pick the one with the minimum x.
+  int t = 0;
+  for (int i = 1; i < num_in; i++) {
+    if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
+      t = i;
+    }
+  }
+  auto &start = p[t]; // starting point
+
+  // Step 2:
+  // Subtract starting point from every points (for sorting in the next step)
+  for (int i = 0; i < num_in; i++) {
+    q[i] = p[i] - start;
+  }
+
+  // Swap the starting point to position 0
+  auto tmp = q[0];
+  q[0] = q[t];
+  q[t] = tmp;
+
+  // Step 3:
+  // Sort point 1 ~ num_in according to their relative cross-product values
+  // (essentially sorting according to angles)
+  // If the angles are the same, sort according to their distance to origin
+  T dist[24];
+  for (int i = 0; i < num_in; i++) {
+    dist[i] = dot_2d<T>(q[i], q[i]);
+  }
+
+#ifdef __CUDACC__
+  // CUDA version
+  // In the future, we can potentially use thrust
+  // for sorting here to improve speed (though not guaranteed)
+  for (int i = 1; i < num_in - 1; i++) {
+    for (int j = i + 1; j < num_in; j++) {
+      T crossProduct = cross_2d<T>(q[i], q[j]);
+      if ((crossProduct < -1e-6) ||
+          (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
+        auto q_tmp = q[i];
+        q[i] = q[j];
+        q[j] = q_tmp;
+        auto dist_tmp = dist[i];
+        dist[i] = dist[j];
+        dist[j] = dist_tmp;
+      }
+    }
+  }
+#else
+  // CPU version
+  std::sort(q + 1, q + num_in,
+            [](const Point<T> &A, const Point<T> &B) -> bool {
+              T temp = cross_2d<T>(A, B);
+              if (fabs(temp) < 1e-6) {
+                return dot_2d<T>(A, A) < dot_2d<T>(B, B);
+              } else {
+                return temp > 0;
+              }
+            });
+#endif
+
+  // Step 4:
+  // Make sure there are at least 2 points (that don't overlap with each other)
+  // in the stack
+  int k; // index of the non-overlapped second point
+  for (k = 1; k < num_in; k++) {
+    if (dist[k] > 1e-8) {
+      break;
+    }
+  }
+  if (k == num_in) {
+    // We reach the end, which means the convex hull is just one point
+    q[0] = p[t];
+    return 1;
+  }
+  q[1] = q[k];
+  int m = 2; // 2 points in the stack
+  // Step 5:
+  // Finally we can start the scanning process.
+  // When a non-convex relationship between the 3 points is found
+  // (either concave shape or duplicated points),
+  // we pop the previous point from the stack
+  // until the 3-point relationship is convex again, or
+  // until the stack only contains two points
+  for (int i = k + 1; i < num_in; i++) {
+    while (m > 1 && cross_2d<T>(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
+      m--;
+    }
+    q[m++] = q[i];
+  }
+
+  // Step 6 (Optional):
+  // In general sense we need the original coordinates, so we
+  // need to shift the points back (reverting Step 2)
+  // But if we're only interested in getting the area/perimeter of the shape
+  // We can simply return.
+  if (!shift_to_zero) {
+    for (int i = 0; i < m; i++) {
+      q[i] += start;
+    }
+  }
+
+  return m;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE T polygon_area(const Point<T> (&q)[24], const int &m) {
+  if (m <= 2) {
+    return 0;
+  }
+
+  T area = 0;
+  for (int i = 1; i < m - 1; i++) {
+    area += fabs(cross_2d<T>(q[i] - q[0], q[i + 1] - q[0]));
+  }
+
+  return area / 2.0;
+}
+
+template <typename T>
+HOST_DEVICE_INLINE T rboxes_intersection(const RotatedBox<T> &box1,
+                                         const RotatedBox<T> &box2) {
+  // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
+  // from rotated_rect_intersection_pts
+  Point<T> intersectPts[24], orderedPts[24];
+
+  Point<T> pts1[4];
+  Point<T> pts2[4];
+  get_rotated_vertices<T>(box1, pts1);
+  get_rotated_vertices<T>(box2, pts2);
+
+  int num = get_intersection_points<T>(pts1, pts2, intersectPts);
+
+  if (num <= 2) {
+    return 0.0;
+  }
+
+  // Convex Hull to order the intersection points in clockwise order and find
+  // the contour area.
+  int num_convex = convex_hull_graham<T>(intersectPts, num, orderedPts, true);
+  return polygon_area<T>(orderedPts, num_convex);
+}
+
+} // namespace
+
+template <typename T>
+HOST_DEVICE_INLINE T rbox_iou_single(T const *const box1_raw,
+                                     T const *const box2_raw) {
+  // shift center to the middle point to achieve higher precision in result
+  RotatedBox<T> box1, box2;
+  auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
+  auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
+  box1.x_ctr = box1_raw[0] - center_shift_x;
+  box1.y_ctr = box1_raw[1] - center_shift_y;
+  box1.w = box1_raw[2];
+  box1.h = box1_raw[3];
+  box1.a = box1_raw[4];
+  box2.x_ctr = box2_raw[0] - center_shift_x;
+  box2.y_ctr = box2_raw[1] - center_shift_y;
+  box2.w = box2_raw[2];
+  box2.h = box2_raw[3];
+  box2.a = box2_raw[4];
+
+  const T area1 = box1.w * box1.h;
+  const T area2 = box2.w * box2.h;
+  if (area1 < 1e-14 || area2 < 1e-14) {
+    return 0.f;
+  }
+
+  const T intersection = rboxes_intersection<T>(box1, box2);
+  const T iou = intersection / (area1 + area2 - intersection);
+  return iou;
+}

+ 33 - 0
paddlers/models/ppdet/ext_op/setup.py

@@ -0,0 +1,33 @@
+import os
+import glob
+import paddle
+from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup
+
+
+def get_extensions():
+    root_dir = os.path.dirname(os.path.abspath(__file__))
+    ext_root_dir = os.path.join(root_dir, 'csrc')
+    sources = []
+    for ext_name in os.listdir(ext_root_dir):
+        ext_dir = os.path.join(ext_root_dir, ext_name)
+        source = glob.glob(os.path.join(ext_dir, '*.cc'))
+        kwargs = dict()
+        if paddle.device.is_compiled_with_cuda():
+            source += glob.glob(os.path.join(ext_dir, '*.cu'))
+
+        if not source:
+            continue
+
+        sources += source
+
+    if paddle.device.is_compiled_with_cuda():
+        extension = CUDAExtension(
+            sources, extra_compile_args={'cxx': ['-DPADDLE_WITH_CUDA']})
+    else:
+        extension = CppExtension(sources)
+
+    return extension
+
+
+if __name__ == "__main__":
+    setup(name='ext_op', ext_modules=get_extensions())

+ 149 - 0
paddlers/models/ppdet/ext_op/unittest/test_matched_rbox_iou.py

@@ -0,0 +1,149 @@
+import numpy as np
+import sys
+import time
+from shapely.geometry import Polygon
+import paddle
+import unittest
+
+from ext_op import matched_rbox_iou
+
+
+def rbox2poly_single(rrect, get_best_begin_point=False):
+    """
+    rrect:[x_ctr,y_ctr,w,h,angle]
+    to
+    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
+    """
+    x_ctr, y_ctr, width, height, angle = rrect[:5]
+    tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
+    # rect 2x4
+    rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
+    R = np.array([[np.cos(angle), -np.sin(angle)],
+                  [np.sin(angle), np.cos(angle)]])
+    # poly
+    poly = R.dot(rect)
+    x0, x1, x2, x3 = poly[0, :4] + x_ctr
+    y0, y1, y2, y3 = poly[1, :4] + y_ctr
+    poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float64)
+    return poly
+
+
+def intersection(g, p):
+    """
+    Intersection.
+    """
+
+    g = g[:8].reshape((4, 2))
+    p = p[:8].reshape((4, 2))
+
+    a = g
+    b = p
+
+    use_filter = True
+    if use_filter:
+        # step1:
+        inter_x1 = np.maximum(np.min(a[:, 0]), np.min(b[:, 0]))
+        inter_x2 = np.minimum(np.max(a[:, 0]), np.max(b[:, 0]))
+        inter_y1 = np.maximum(np.min(a[:, 1]), np.min(b[:, 1]))
+        inter_y2 = np.minimum(np.max(a[:, 1]), np.max(b[:, 1]))
+        if inter_x1 >= inter_x2 or inter_y1 >= inter_y2:
+            return 0.
+        x1 = np.minimum(np.min(a[:, 0]), np.min(b[:, 0]))
+        x2 = np.maximum(np.max(a[:, 0]), np.max(b[:, 0]))
+        y1 = np.minimum(np.min(a[:, 1]), np.min(b[:, 1]))
+        y2 = np.maximum(np.max(a[:, 1]), np.max(b[:, 1]))
+        if x1 >= x2 or y1 >= y2 or (x2 - x1) < 2 or (y2 - y1) < 2:
+            return 0.
+
+    g = Polygon(g)
+    p = Polygon(p)
+    if not g.is_valid or not p.is_valid:
+        return 0
+
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    if union == 0:
+        return 0
+    else:
+        return inter / union
+
+
+def matched_rbox_overlaps(anchors, gt_bboxes, use_cv2=False):
+    """
+
+    Args:
+        anchors: [M, 5]  x1,y1,x2,y2,angle
+        gt_bboxes: [M, 5]  x1,y1,x2,y2,angle
+
+    Returns:
+        macthed_iou: [M]
+    """
+    assert anchors.shape[1] == 5
+    assert gt_bboxes.shape[1] == 5
+
+    gt_bboxes_ploy = [rbox2poly_single(e) for e in gt_bboxes]
+    anchors_ploy = [rbox2poly_single(e) for e in anchors]
+
+    num = len(anchors_ploy)
+    iou = np.zeros((num, ), dtype=np.float64)
+
+    start_time = time.time()
+    for i in range(num):
+        try:
+            iou[i] = intersection(gt_bboxes_ploy[i], anchors_ploy[i])
+        except Exception as e:
+            print('cur gt_bboxes_ploy[i]', gt_bboxes_ploy[i], 'anchors_ploy[j]',
+                  anchors_ploy[i], e)
+    return iou
+
+
+def gen_sample(n):
+    rbox = np.random.rand(n, 5)
+    rbox[:, 0:4] = rbox[:, 0:4] * 0.45 + 0.001
+    rbox[:, 4] = rbox[:, 4] - 0.5
+    return rbox
+
+
+class MatchedRBoxIoUTest(unittest.TestCase):
+    def setUp(self):
+        self.initTestCase()
+        self.rbox1 = gen_sample(self.n)
+        self.rbox2 = gen_sample(self.n)
+
+    def initTestCase(self):
+        self.n = 1000
+
+    def assertAllClose(self, x, y, msg, atol=5e-1, rtol=1e-2):
+        self.assertTrue(np.allclose(x, y, atol=atol, rtol=rtol), msg=msg)
+
+    def get_places(self):
+        places = [paddle.CPUPlace()]
+        if paddle.device.is_compiled_with_cuda():
+            places.append(paddle.CUDAPlace(0))
+
+        return places
+
+    def check_output(self, place):
+        paddle.disable_static()
+        pd_rbox1 = paddle.to_tensor(self.rbox1, place=place)
+        pd_rbox2 = paddle.to_tensor(self.rbox2, place=place)
+        actual_t = matched_rbox_iou(pd_rbox1, pd_rbox2).numpy()
+        poly_rbox1 = self.rbox1
+        poly_rbox2 = self.rbox2
+        poly_rbox1[:, 0:4] = self.rbox1[:, 0:4] * 1024
+        poly_rbox2[:, 0:4] = self.rbox2[:, 0:4] * 1024
+        expect_t = matched_rbox_overlaps(poly_rbox1, poly_rbox2, use_cv2=False)
+        self.assertAllClose(
+            actual_t,
+            expect_t,
+            msg="rbox_iou has diff at {} \nExpect {}\nBut got {}".format(
+                str(place), str(expect_t), str(actual_t)))
+
+    def test_output(self):
+        places = self.get_places()
+        for place in places:
+            self.check_output(place)
+
+
+if __name__ == "__main__":
+    unittest.main()

+ 151 - 0
paddlers/models/ppdet/ext_op/unittest/test_rbox_iou.py

@@ -0,0 +1,151 @@
+import numpy as np
+import sys
+import time
+from shapely.geometry import Polygon
+import paddle
+import unittest
+
+from ext_op import rbox_iou
+
+
+def rbox2poly_single(rrect, get_best_begin_point=False):
+    """
+    rrect:[x_ctr,y_ctr,w,h,angle]
+    to
+    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
+    """
+    x_ctr, y_ctr, width, height, angle = rrect[:5]
+    tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
+    # rect 2x4
+    rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
+    R = np.array([[np.cos(angle), -np.sin(angle)],
+                  [np.sin(angle), np.cos(angle)]])
+    # poly
+    poly = R.dot(rect)
+    x0, x1, x2, x3 = poly[0, :4] + x_ctr
+    y0, y1, y2, y3 = poly[1, :4] + y_ctr
+    poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float64)
+    return poly
+
+
+def intersection(g, p):
+    """
+    Intersection.
+    """
+
+    g = g[:8].reshape((4, 2))
+    p = p[:8].reshape((4, 2))
+
+    a = g
+    b = p
+
+    use_filter = True
+    if use_filter:
+        # step1:
+        inter_x1 = np.maximum(np.min(a[:, 0]), np.min(b[:, 0]))
+        inter_x2 = np.minimum(np.max(a[:, 0]), np.max(b[:, 0]))
+        inter_y1 = np.maximum(np.min(a[:, 1]), np.min(b[:, 1]))
+        inter_y2 = np.minimum(np.max(a[:, 1]), np.max(b[:, 1]))
+        if inter_x1 >= inter_x2 or inter_y1 >= inter_y2:
+            return 0.
+        x1 = np.minimum(np.min(a[:, 0]), np.min(b[:, 0]))
+        x2 = np.maximum(np.max(a[:, 0]), np.max(b[:, 0]))
+        y1 = np.minimum(np.min(a[:, 1]), np.min(b[:, 1]))
+        y2 = np.maximum(np.max(a[:, 1]), np.max(b[:, 1]))
+        if x1 >= x2 or y1 >= y2 or (x2 - x1) < 2 or (y2 - y1) < 2:
+            return 0.
+
+    g = Polygon(g)
+    p = Polygon(p)
+    if not g.is_valid or not p.is_valid:
+        return 0
+
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    if union == 0:
+        return 0
+    else:
+        return inter / union
+
+
+def rbox_overlaps(anchors, gt_bboxes, use_cv2=False):
+    """
+
+    Args:
+        anchors: [NA, 5]  x1,y1,x2,y2,angle
+        gt_bboxes: [M, 5]  x1,y1,x2,y2,angle
+
+    Returns:
+        iou: [NA, M]
+    """
+    assert anchors.shape[1] == 5
+    assert gt_bboxes.shape[1] == 5
+
+    gt_bboxes_ploy = [rbox2poly_single(e) for e in gt_bboxes]
+    anchors_ploy = [rbox2poly_single(e) for e in anchors]
+
+    num_gt, num_anchors = len(gt_bboxes_ploy), len(anchors_ploy)
+    iou = np.zeros((num_anchors, num_gt), dtype=np.float64)
+
+    start_time = time.time()
+    for i in range(num_anchors):
+        for j in range(num_gt):
+            try:
+                iou[i, j] = intersection(anchors_ploy[i], gt_bboxes_ploy[j])
+            except Exception as e:
+                print('cur anchors_ploy[i]', anchors_ploy[i],
+                      'gt_bboxes_ploy[j]', gt_bboxes_ploy[j], e)
+    return iou
+
+
+def gen_sample(n):
+    rbox = np.random.rand(n, 5)
+    rbox[:, 0:4] = rbox[:, 0:4] * 0.45 + 0.001
+    rbox[:, 4] = rbox[:, 4] - 0.5
+    return rbox
+
+
+class RBoxIoUTest(unittest.TestCase):
+    def setUp(self):
+        self.initTestCase()
+        self.rbox1 = gen_sample(self.n)
+        self.rbox2 = gen_sample(self.m)
+
+    def initTestCase(self):
+        self.n = 13000
+        self.m = 7
+
+    def assertAllClose(self, x, y, msg, atol=5e-1, rtol=1e-2):
+        self.assertTrue(np.allclose(x, y, atol=atol, rtol=rtol), msg=msg)
+
+    def get_places(self):
+        places = [paddle.CPUPlace()]
+        if paddle.device.is_compiled_with_cuda():
+            places.append(paddle.CUDAPlace(0))
+
+        return places
+
+    def check_output(self, place):
+        paddle.disable_static()
+        pd_rbox1 = paddle.to_tensor(self.rbox1, place=place)
+        pd_rbox2 = paddle.to_tensor(self.rbox2, place=place)
+        actual_t = rbox_iou(pd_rbox1, pd_rbox2).numpy()
+        poly_rbox1 = self.rbox1
+        poly_rbox2 = self.rbox2
+        poly_rbox1[:, 0:4] = self.rbox1[:, 0:4] * 1024
+        poly_rbox2[:, 0:4] = self.rbox2[:, 0:4] * 1024
+        expect_t = rbox_overlaps(poly_rbox1, poly_rbox2, use_cv2=False)
+        self.assertAllClose(
+            actual_t,
+            expect_t,
+            msg="rbox_iou has diff at {} \nExpect {}\nBut got {}".format(
+                str(place), str(expect_t), str(actual_t)))
+
+    def test_output(self):
+        places = self.get_places()
+        for place in places:
+            self.check_output(place)
+
+
+if __name__ == "__main__":
+    unittest.main()

+ 1 - 1
paddlers/models/ppdet/metrics/__init__.py

@@ -26,4 +26,4 @@ __all__ = metrics.__all__ + mot_metrics.__all__
 
 
 from . import mcmot_metrics
 from . import mcmot_metrics
 from .mcmot_metrics import *
 from .mcmot_metrics import *
-__all__ = metrics.__all__ + mcmot_metrics.__all__
+__all__ = metrics.__all__ + mcmot_metrics.__all__

+ 12 - 12
paddlers/models/ppdet/metrics/coco_utils.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 11 - 1
paddlers/models/ppdet/metrics/json_results.py

@@ -65,6 +65,14 @@ def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
     return det_res
     return det_res
 
 
 
 
+def strip_mask(mask):
+    row = mask[0, 0, :]
+    col = mask[0, :, 0]
+    im_h = len(col) - np.count_nonzero(col == -1)
+    im_w = len(row) - np.count_nonzero(row == -1)
+    return mask[:, :im_h, :im_w]
+
+
 def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
 def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
     import pycocotools.mask as mask_util
     import pycocotools.mask as mask_util
     seg_res = []
     seg_res = []
@@ -72,8 +80,10 @@ def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
     for i in range(len(mask_nums)):
     for i in range(len(mask_nums)):
         cur_image_id = int(image_id[i][0])
         cur_image_id = int(image_id[i][0])
         det_nums = mask_nums[i]
         det_nums = mask_nums[i]
+        mask_i = masks[k:k + det_nums]
+        mask_i = strip_mask(mask_i)
         for j in range(det_nums):
         for j in range(det_nums):
-            mask = masks[k].astype(np.uint8)
+            mask = mask_i[j].astype(np.uint8)
             score = float(bboxes[k][1])
             score = float(bboxes[k][1])
             label = int(bboxes[k][0])
             label = int(bboxes[k][0])
             k = k + 1
             k = k + 1

+ 28 - 19
paddlers/models/ppdet/metrics/keypoint_metrics.py

@@ -1,21 +1,22 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 import os
 import os
 import json
 import json
 from collections import defaultdict, OrderedDict
 from collections import defaultdict, OrderedDict
 import numpy as np
 import numpy as np
+import paddle
 from pycocotools.coco import COCO
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
 from pycocotools.cocoeval import COCOeval
 from ..modeling.keypoint_utils import oks_nms
 from ..modeling.keypoint_utils import oks_nms
@@ -70,15 +71,23 @@ class KeyPointTopDownCOCOEval(object):
         self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
         self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
                                   3] = kpts[:, :, 0:3]
                                   3] = kpts[:, :, 0:3]
         self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[
         self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[
-            'center'].numpy()[:, 0:2]
+            'center'].numpy()[:, 0:2] if isinstance(
+                inputs['center'], paddle.Tensor) else inputs['center'][:, 0:2]
         self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[
         self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[
-            'scale'].numpy()[:, 0:2]
+            'scale'].numpy()[:, 0:2] if isinstance(
+                inputs['scale'], paddle.Tensor) else inputs['scale'][:, 0:2]
         self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
         self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
-            inputs['scale'].numpy() * 200, 1)
-        self.results['all_boxes'][self.idx:self.idx + num_images,
-                                  5] = np.squeeze(inputs['score'].numpy())
-        self.results['image_path'].extend(inputs['im_id'].numpy())
-
+            inputs['scale'].numpy() * 200,
+            1) if isinstance(inputs['scale'], paddle.Tensor) else np.prod(
+                inputs['scale'] * 200, 1)
+        self.results['all_boxes'][
+            self.idx:self.idx + num_images,
+            5] = np.squeeze(inputs['score'].numpy()) if isinstance(
+                inputs['score'], paddle.Tensor) else np.squeeze(inputs['score'])
+        if isinstance(inputs['im_id'], paddle.Tensor):
+            self.results['image_path'].extend(inputs['im_id'].numpy())
+        else:
+            self.results['image_path'].extend(inputs['im_id'])
         self.idx += num_images
         self.idx += num_images
 
 
     def _write_coco_keypoint_results(self, keypoints):
     def _write_coco_keypoint_results(self, keypoints):

+ 14 - 22
paddlers/models/ppdet/metrics/map_utils.py

@@ -22,7 +22,7 @@ import sys
 import numpy as np
 import numpy as np
 import itertools
 import itertools
 import paddle
 import paddle
-from paddlers.models.ppdet.modeling.bbox_utils import poly2rbox, rbox2poly_np
+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_np
 
 
 from paddlers.models.ppdet.utils.logger import setup_logger
 from paddlers.models.ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 logger = setup_logger(__name__)
@@ -91,15 +91,13 @@ def jaccard_overlap(pred, gt, is_bbox_normalized=False):
     return overlap
     return overlap
 
 
 
 
-def calc_rbox_iou(pred, gt_rbox):
+def calc_rbox_iou(pred, gt_poly):
     """
     """
     calc iou between rotated bbox
     calc iou between rotated bbox
     """
     """
     # calc iou of bounding box for speedup
     # calc iou of bounding box for speedup
-    pred = np.array(pred, np.float32).reshape(-1, 8)
-    pred = pred.reshape(-1, 2)
-    gt_poly = rbox2poly_np(np.array(gt_rbox).reshape(-1, 5))[0]
-    gt_poly = gt_poly.reshape(-1, 2)
+    pred = np.array(pred, np.float32).reshape(-1, 2)
+    gt_poly = np.array(gt_poly, np.float32).reshape(-1, 2)
     pred_rect = [
     pred_rect = [
         np.min(pred[:, 0]), np.min(pred[:, 1]), np.max(pred[:, 0]),
         np.min(pred[:, 0]), np.min(pred[:, 1]), np.max(pred[:, 0]),
         np.max(pred[:, 1])
         np.max(pred[:, 1])
@@ -114,20 +112,15 @@ def calc_rbox_iou(pred, gt_rbox):
         return iou
         return iou
 
 
     # calc rbox iou
     # calc rbox iou
-    pred = pred.reshape(-1, 8)
-
-    pred = np.array(pred, np.float32).reshape(-1, 8)
-    pred_rbox = poly2rbox(pred)
-    pred_rbox = pred_rbox.reshape(-1, 5)
-    pred_rbox = pred_rbox.reshape(-1, 5)
+    pred_rbox = poly2rbox_np(pred.reshape(-1, 8)).reshape(-1, 5)
+    gt_rbox = poly2rbox_np(gt_poly.reshape(-1, 8)).reshape(-1, 5)
     try:
     try:
-        from rbox_iou_ops import rbox_iou
+        from ext_op import rbox_iou
     except Exception as e:
     except Exception as e:
-        print("import custom_ops error, try install rbox_iou_ops " \
+        print("import custom_ops error, try install ext_op " \
                   "following ppdet/ext_op/README.md", e)
                   "following ppdet/ext_op/README.md", e)
         sys.stdout.flush()
         sys.stdout.flush()
         sys.exit(-1)
         sys.exit(-1)
-    gt_rbox = np.array(gt_rbox, np.float32).reshape(-1, 5)
     pd_gt_rbox = paddle.to_tensor(gt_rbox, dtype='float32')
     pd_gt_rbox = paddle.to_tensor(gt_rbox, dtype='float32')
     pd_pred_rbox = paddle.to_tensor(pred_rbox, dtype='float32')
     pd_pred_rbox = paddle.to_tensor(pred_rbox, dtype='float32')
     iou = rbox_iou(pd_gt_rbox, pd_pred_rbox)
     iou = rbox_iou(pd_gt_rbox, pd_pred_rbox)
@@ -138,8 +131,7 @@ def calc_rbox_iou(pred, gt_rbox):
 def prune_zero_padding(gt_box, gt_label, difficult=None):
 def prune_zero_padding(gt_box, gt_label, difficult=None):
     valid_cnt = 0
     valid_cnt = 0
     for i in range(len(gt_box)):
     for i in range(len(gt_box)):
-        if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
-                gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
+        if (gt_box[i] == 0).all():
             break
             break
         valid_cnt += 1
         valid_cnt += 1
     return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
     return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
@@ -154,8 +146,8 @@ class DetectionMAP(object):
     Args:
     Args:
         class_num (int): The class number.
         class_num (int): The class number.
         overlap_thresh (float): The threshold of overlap
         overlap_thresh (float): The threshold of overlap
-            ratio between prediction bounding box and
-            ground truth bounding box for deciding
+            ratio between prediction bounding box and 
+            ground truth bounding box for deciding 
             true/false positive. Default 0.5.
             true/false positive. Default 0.5.
         map_type (str): Calculation method of mean average
         map_type (str): Calculation method of mean average
             precision, currently support '11point' and
             precision, currently support '11point' and
@@ -212,7 +204,7 @@ class DetectionMAP(object):
             max_overlap = -1.0
             max_overlap = -1.0
             for i, gl in enumerate(gt_label):
             for i, gl in enumerate(gt_label):
                 if int(gl) == int(l):
                 if int(gl) == int(l):
-                    if len(gt_box[i]) == 5:
+                    if len(gt_box[i]) == 8:
                         overlap = calc_rbox_iou(pred, gt_box[i])
                         overlap = calc_rbox_iou(pred, gt_box[i])
                     else:
                     else:
                         overlap = jaccard_overlap(pred, gt_box[i],
                         overlap = jaccard_overlap(pred, gt_box[i],
@@ -363,7 +355,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls):
     """
     """
     Computes the average precision, given the recall and precision curves.
     Computes the average precision, given the recall and precision curves.
     Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
     Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
-
+    
     Args:
     Args:
         tp (list): True positives.
         tp (list): True positives.
         conf (list): Objectness value from 0-1.
         conf (list): Objectness value from 0-1.
@@ -417,7 +409,7 @@ def compute_ap(recall, precision):
     """
     """
     Computes the average precision, given the recall and precision curves.
     Computes the average precision, given the recall and precision curves.
     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
-
+    
     Args:
     Args:
         recall (list): The recall curve.
         recall (list): The recall curve.
         precision (list): The precision curve.
         precision (list): The precision curve.

+ 35 - 29
paddlers/models/ppdet/metrics/mcmot_metrics.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -21,18 +21,21 @@ import copy
 import sys
 import sys
 import math
 import math
 from collections import defaultdict
 from collections import defaultdict
-from motmetrics.math_util import quiet_divide
 
 
 import numpy as np
 import numpy as np
 import pandas as pd
 import pandas as pd
 
 
-import paddle
-import paddle.nn.functional as F
 from .metrics import Metric
 from .metrics import Metric
-import motmetrics as mm
-import openpyxl
-metrics = mm.metrics.motchallenge_metrics
-mh = mm.metrics.create()
+try:
+    import motmetrics as mm
+    from motmetrics.math_util import quiet_divide
+    metrics = mm.metrics.motchallenge_metrics
+    mh = mm.metrics.create()
+except:
+    print(
+        'Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+    )
+    pass
 from paddlers.models.ppdet.utils.logger import setup_logger
 from paddlers.models.ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 logger = setup_logger(__name__)
 
 
@@ -78,7 +81,7 @@ NAME_MAP = {
 
 
 def parse_accs_metrics(seq_acc, index_name, verbose=False):
 def parse_accs_metrics(seq_acc, index_name, verbose=False):
     """
     """
-    Parse the evaluation indicators of multiple MOTAccumulator
+    Parse the evaluation indicators of multiple MOTAccumulator 
     """
     """
     mh = mm.metrics.create()
     mh = mm.metrics.create()
     summary = MCMOTEvaluator.get_summary(seq_acc, index_name, METRICS_LIST)
     summary = MCMOTEvaluator.get_summary(seq_acc, index_name, METRICS_LIST)
@@ -302,24 +305,30 @@ class MCMOTEvaluator(object):
         self.num_classes = num_classes
         self.num_classes = num_classes
 
 
         self.load_annotations()
         self.load_annotations()
+        try:
+            import motmetrics as mm
+            mm.lap.default_solver = 'lap'
+        except Exception as e:
+            raise RuntimeError(
+                'Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+            )
         self.reset_accumulator()
         self.reset_accumulator()
 
 
         self.class_accs = []
         self.class_accs = []
 
 
     def load_annotations(self):
     def load_annotations(self):
         assert self.data_type == 'mcmot'
         assert self.data_type == 'mcmot'
-        self.gt_filename = os.path.join(self.data_root, '../', '../',
-                                        'sequences',
+        self.gt_filename = os.path.join(self.data_root, '../', 'sequences',
                                         '{}.txt'.format(self.seq_name))
                                         '{}.txt'.format(self.seq_name))
+        if not os.path.exists(self.gt_filename):
+            logger.warning(
+                "gt_filename '{}' of MCMOTEvaluator is not exist, so the MOTA will be -INF."
+            )
 
 
     def reset_accumulator(self):
     def reset_accumulator(self):
-        import motmetrics as mm
-        mm.lap.default_solver = 'lap'
         self.acc = mm.MOTAccumulator(auto_id=True)
         self.acc = mm.MOTAccumulator(auto_id=True)
 
 
     def eval_frame_dict(self, trk_objs, gt_objs, rtn_events=False, union=False):
     def eval_frame_dict(self, trk_objs, gt_objs, rtn_events=False, union=False):
-        import motmetrics as mm
-        mm.lap.default_solver = 'lap'
         if union:
         if union:
             trk_tlwhs, trk_ids, trk_cls = unzip_objs_cls(trk_objs)[:3]
             trk_tlwhs, trk_ids, trk_cls = unzip_objs_cls(trk_objs)[:3]
             gt_tlwhs, gt_ids, gt_cls = unzip_objs_cls(gt_objs)[:3]
             gt_tlwhs, gt_ids, gt_cls = unzip_objs_cls(gt_objs)[:3]
@@ -393,9 +402,6 @@ class MCMOTEvaluator(object):
                     names,
                     names,
                     metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
                     metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
                              'precision', 'recall')):
                              'precision', 'recall')):
-        import motmetrics as mm
-        mm.lap.default_solver = 'lap'
-
         names = copy.deepcopy(names)
         names = copy.deepcopy(names)
         if metrics is None:
         if metrics is None:
             metrics = mm.metrics.motchallenge_metrics
             metrics = mm.metrics.motchallenge_metrics

+ 141 - 70
paddlers/models/ppdet/metrics/metrics.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -22,11 +22,14 @@ import json
 import paddle
 import paddle
 import numpy as np
 import numpy as np
 import typing
 import typing
+from collections import defaultdict
+from pathlib import Path
 
 
 from .map_utils import prune_zero_padding, DetectionMAP
 from .map_utils import prune_zero_padding, DetectionMAP
 from .coco_utils import get_infer_results, cocoapi_eval
 from .coco_utils import get_infer_results, cocoapi_eval
 from .widerface_utils import face_eval_run
 from .widerface_utils import face_eval_run
 from paddlers.models.ppdet.data.source.category import get_categories
 from paddlers.models.ppdet.data.source.category import get_categories
+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_np
 
 
 from paddlers.models.ppdet.utils.logger import setup_logger
 from paddlers.models.ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 logger = setup_logger(__name__)
@@ -69,8 +72,6 @@ class Metric(paddle.metric.Metric):
 
 
 class COCOMetric(Metric):
 class COCOMetric(Metric):
     def __init__(self, anno_file, **kwargs):
     def __init__(self, anno_file, **kwargs):
-        assert os.path.isfile(anno_file), \
-                "anno_file {} not a file".format(anno_file)
         self.anno_file = anno_file
         self.anno_file = anno_file
         self.clsid2catid = kwargs.get('clsid2catid', None)
         self.clsid2catid = kwargs.get('clsid2catid', None)
         if self.clsid2catid is None:
         if self.clsid2catid is None:
@@ -81,6 +82,14 @@ class COCOMetric(Metric):
         self.bias = kwargs.get('bias', 0)
         self.bias = kwargs.get('bias', 0)
         self.save_prediction_only = kwargs.get('save_prediction_only', False)
         self.save_prediction_only = kwargs.get('save_prediction_only', False)
         self.iou_type = kwargs.get('IouType', 'bbox')
         self.iou_type = kwargs.get('IouType', 'bbox')
+
+        if not self.save_prediction_only:
+            assert os.path.isfile(anno_file), \
+                    "anno_file {} not a file".format(anno_file)
+
+        if self.output_eval is not None:
+            Path(self.output_eval).mkdir(exist_ok=True)
+
         self.reset()
         self.reset()
 
 
     def reset(self):
     def reset(self):
@@ -218,7 +227,9 @@ class VOCMetric(Metric):
                  map_type='11point',
                  map_type='11point',
                  is_bbox_normalized=False,
                  is_bbox_normalized=False,
                  evaluate_difficult=False,
                  evaluate_difficult=False,
-                 classwise=False):
+                 classwise=False,
+                 output_eval=None,
+                 save_prediction_only=False):
         assert os.path.isfile(label_list), \
         assert os.path.isfile(label_list), \
                 "label_list {} not a file".format(label_list)
                 "label_list {} not a file".format(label_list)
         self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
         self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
@@ -226,6 +237,8 @@ class VOCMetric(Metric):
         self.overlap_thresh = overlap_thresh
         self.overlap_thresh = overlap_thresh
         self.map_type = map_type
         self.map_type = map_type
         self.evaluate_difficult = evaluate_difficult
         self.evaluate_difficult = evaluate_difficult
+        self.output_eval = output_eval
+        self.save_prediction_only = save_prediction_only
         self.detection_map = DetectionMAP(
         self.detection_map = DetectionMAP(
             class_num=class_num,
             class_num=class_num,
             overlap_thresh=overlap_thresh,
             overlap_thresh=overlap_thresh,
@@ -238,34 +251,52 @@ class VOCMetric(Metric):
         self.reset()
         self.reset()
 
 
     def reset(self):
     def reset(self):
+        self.results = {'bbox': [], 'score': [], 'label': []}
         self.detection_map.reset()
         self.detection_map.reset()
 
 
     def update(self, inputs, outputs):
     def update(self, inputs, outputs):
-        bbox_np = outputs['bbox'].numpy()
+        bbox_np = outputs['bbox'].numpy() if isinstance(
+            outputs['bbox'], paddle.Tensor) else outputs['bbox']
         bboxes = bbox_np[:, 2:]
         bboxes = bbox_np[:, 2:]
         scores = bbox_np[:, 1]
         scores = bbox_np[:, 1]
         labels = bbox_np[:, 0]
         labels = bbox_np[:, 0]
-        bbox_lengths = outputs['bbox_num'].numpy()
+        bbox_lengths = outputs['bbox_num'].numpy() if isinstance(
+            outputs['bbox_num'], paddle.Tensor) else outputs['bbox_num']
+
+        self.results['bbox'].append(bboxes.tolist())
+        self.results['score'].append(scores.tolist())
+        self.results['label'].append(labels.tolist())
 
 
         if bboxes.shape == (1, 1) or bboxes is None:
         if bboxes.shape == (1, 1) or bboxes is None:
             return
             return
+        if self.save_prediction_only:
+            return
+
         gt_boxes = inputs['gt_bbox']
         gt_boxes = inputs['gt_bbox']
         gt_labels = inputs['gt_class']
         gt_labels = inputs['gt_class']
         difficults = inputs['difficult'] if not self.evaluate_difficult \
         difficults = inputs['difficult'] if not self.evaluate_difficult \
                             else None
                             else None
 
 
-        scale_factor = inputs['scale_factor'].numpy(
-        ) if 'scale_factor' in inputs else np.ones(
-            (gt_boxes.shape[0], 2)).astype('float32')
+        if 'scale_factor' in inputs:
+            scale_factor = inputs['scale_factor'].numpy() if isinstance(
+                inputs['scale_factor'],
+                paddle.Tensor) else inputs['scale_factor']
+        else:
+            scale_factor = np.ones((gt_boxes.shape[0], 2)).astype('float32')
 
 
         bbox_idx = 0
         bbox_idx = 0
         for i in range(len(gt_boxes)):
         for i in range(len(gt_boxes)):
-            gt_box = gt_boxes[i].numpy()
+            gt_box = gt_boxes[i].numpy() if isinstance(
+                gt_boxes[i], paddle.Tensor) else gt_boxes[i]
             h, w = scale_factor[i]
             h, w = scale_factor[i]
             gt_box = gt_box / np.array([w, h, w, h])
             gt_box = gt_box / np.array([w, h, w, h])
-            gt_label = gt_labels[i].numpy()
-            difficult = None if difficults is None \
-                            else difficults[i].numpy()
+            gt_label = gt_labels[i].numpy() if isinstance(
+                gt_labels[i], paddle.Tensor) else gt_labels[i]
+            if difficults is not None:
+                difficult = difficults[i].numpy() if isinstance(
+                    difficults[i], paddle.Tensor) else difficults[i]
+            else:
+                difficult = None
             bbox_num = bbox_lengths[i]
             bbox_num = bbox_lengths[i]
             bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
             bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
             score = scores[bbox_idx:bbox_idx + bbox_num]
             score = scores[bbox_idx:bbox_idx + bbox_num]
@@ -277,6 +308,15 @@ class VOCMetric(Metric):
             bbox_idx += bbox_num
             bbox_idx += bbox_num
 
 
     def accumulate(self):
     def accumulate(self):
+        output = "bbox.json"
+        if self.output_eval:
+            output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results, f)
+                logger.info('The bbox result is saved to bbox.json.')
+        if self.save_prediction_only:
+            return
+
         logger.info("Accumulating evaluatation results...")
         logger.info("Accumulating evaluatation results...")
         self.detection_map.accumulate()
         self.detection_map.accumulate()
 
 
@@ -309,25 +349,16 @@ class WiderFaceMetric(Metric):
 
 
 class RBoxMetric(Metric):
 class RBoxMetric(Metric):
     def __init__(self, anno_file, **kwargs):
     def __init__(self, anno_file, **kwargs):
-        assert os.path.isfile(anno_file), \
-                "anno_file {} not a file".format(anno_file)
-        assert os.path.exists(anno_file), "anno_file {} not exists".format(
-            anno_file)
         self.anno_file = anno_file
         self.anno_file = anno_file
-        self.gt_anno = json.load(open(self.anno_file))
-        cats = self.gt_anno['categories']
-        self.clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
-        self.catid2clsid = {cat['id']: i for i, cat in enumerate(cats)}
-        self.catid2name = {cat['id']: cat['name'] for cat in cats}
+        self.clsid2catid, self.catid2name = get_categories('COCO', anno_file)
+        self.catid2clsid = {v: k for k, v in self.clsid2catid.items()}
         self.classwise = kwargs.get('classwise', False)
         self.classwise = kwargs.get('classwise', False)
         self.output_eval = kwargs.get('output_eval', None)
         self.output_eval = kwargs.get('output_eval', None)
-        # TODO: bias should be unified
-        self.bias = kwargs.get('bias', 0)
         self.save_prediction_only = kwargs.get('save_prediction_only', False)
         self.save_prediction_only = kwargs.get('save_prediction_only', False)
-        self.iou_type = kwargs.get('IouType', 'bbox')
         self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
         self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
         self.map_type = kwargs.get('map_type', '11point')
         self.map_type = kwargs.get('map_type', '11point')
         self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
         self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
+        self.imid2path = kwargs.get('imid2path', None)
         class_num = len(self.catid2name)
         class_num = len(self.catid2name)
         self.detection_map = DetectionMAP(
         self.detection_map = DetectionMAP(
             class_num=class_num,
             class_num=class_num,
@@ -341,7 +372,7 @@ class RBoxMetric(Metric):
         self.reset()
         self.reset()
 
 
     def reset(self):
     def reset(self):
-        self.result_bbox = []
+        self.results = []
         self.detection_map.reset()
         self.detection_map.reset()
 
 
     def update(self, inputs, outputs):
     def update(self, inputs, outputs):
@@ -351,43 +382,83 @@ class RBoxMetric(Metric):
             outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
             outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
 
 
         im_id = inputs['im_id']
         im_id = inputs['im_id']
-        outs['im_id'] = im_id.numpy() if isinstance(im_id,
-                                                    paddle.Tensor) else im_id
+        im_id = im_id.numpy() if isinstance(im_id, paddle.Tensor) else im_id
+        outs['im_id'] = im_id
 
 
-        infer_results = get_infer_results(
-            outs, self.clsid2catid, bias=self.bias)
-        self.result_bbox += infer_results[
-            'bbox'] if 'bbox' in infer_results else []
-        bbox = [b['bbox'] for b in self.result_bbox]
-        score = [b['score'] for b in self.result_bbox]
-        label = [b['category_id'] for b in self.result_bbox]
-        label = [self.catid2clsid[e] for e in label]
-        gt_box = [
-            e['bbox'] for e in self.gt_anno['annotations']
-            if e['image_id'] == outs['im_id']
-        ]
-        gt_label = [
-            e['category_id'] for e in self.gt_anno['annotations']
-            if e['image_id'] == outs['im_id']
-        ]
-        gt_label = [self.catid2clsid[e] for e in gt_label]
-        self.detection_map.update(bbox, score, label, gt_box, gt_label)
+        infer_results = get_infer_results(outs, self.clsid2catid)
+        infer_results = infer_results['bbox'] if 'bbox' in infer_results else []
+        self.results += infer_results
+        if self.save_prediction_only:
+            return
 
 
-    def accumulate(self):
-        if len(self.result_bbox) > 0:
-            output = "bbox.json"
-            if self.output_eval:
-                output = os.path.join(self.output_eval, output)
+        gt_boxes = inputs['gt_poly']
+        gt_labels = inputs['gt_class']
+
+        if 'scale_factor' in inputs:
+            scale_factor = inputs['scale_factor'].numpy() if isinstance(
+                inputs['scale_factor'],
+                paddle.Tensor) else inputs['scale_factor']
+        else:
+            scale_factor = np.ones((gt_boxes.shape[0], 2)).astype('float32')
+
+        for i in range(len(gt_boxes)):
+            gt_box = gt_boxes[i].numpy() if isinstance(
+                gt_boxes[i], paddle.Tensor) else gt_boxes[i]
+            h, w = scale_factor[i]
+            gt_box = gt_box / np.array([w, h, w, h, w, h, w, h])
+            gt_label = gt_labels[i].numpy() if isinstance(
+                gt_labels[i], paddle.Tensor) else gt_labels[i]
+            gt_box, gt_label, _ = prune_zero_padding(gt_box, gt_label)
+            bbox = [
+                res['bbox'] for res in infer_results
+                if int(res['image_id']) == int(im_id[i])
+            ]
+            score = [
+                res['score'] for res in infer_results
+                if int(res['image_id']) == int(im_id[i])
+            ]
+            label = [
+                self.catid2clsid[int(res['category_id'])]
+                for res in infer_results
+                if int(res['image_id']) == int(im_id[i])
+            ]
+            self.detection_map.update(bbox, score, label, gt_box, gt_label)
+
+    def save_results(self, results, output_dir, imid2path):
+        if imid2path:
+            data_dicts = defaultdict(list)
+            for result in results:
+                image_id = result['image_id']
+                data_dicts[image_id].append(result)
+
+            for image_id, image_path in imid2path.items():
+                basename = os.path.splitext(os.path.split(image_path)[-1])[0]
+                output = os.path.join(output_dir, "{}.txt".format(basename))
+                dets = data_dicts.get(image_id, [])
+                with open(output, 'w') as f:
+                    for det in dets:
+                        catid, bbox, score = det['category_id'], det[
+                            'bbox'], det['score']
+                        bbox_pred = '{} {} '.format(self.catid2name[catid],
+                                                    score) + ' '.join(
+                                                        [str(e) for e in bbox])
+                        f.write(bbox_pred + '\n')
+
+            logger.info('The bbox result is saved to {}.'.format(output_dir))
+        else:
+            output = os.path.join(output_dir, "bbox.json")
             with open(output, 'w') as f:
             with open(output, 'w') as f:
-                json.dump(self.result_bbox, f)
-                logger.info('The bbox result is saved to bbox.json.')
+                json.dump(results, f)
 
 
-            if self.save_prediction_only:
-                logger.info('The bbox result is saved to {} and do not '
-                            'evaluate the mAP.'.format(output))
-            else:
-                logger.info("Accumulating evaluatation results...")
-                self.detection_map.accumulate()
+            logger.info('The bbox result is saved to {}.'.format(output))
+
+    def accumulate(self):
+        if self.output_eval:
+            self.save_results(self.results, self.output_eval, self.imid2path)
+
+        if not self.save_prediction_only:
+            logger.info("Accumulating evaluatation results...")
+            self.detection_map.accumulate()
 
 
     def log(self):
     def log(self):
         map_stat = 100. * self.detection_map.get_map()
         map_stat = 100. * self.detection_map.get_map()

+ 43 - 29
paddlers/models/ppdet/metrics/mot_metrics.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -22,13 +22,21 @@ import sys
 import math
 import math
 from collections import defaultdict
 from collections import defaultdict
 import numpy as np
 import numpy as np
-import paddle
-import paddle.nn.functional as F
+
 from paddlers.models.ppdet.modeling.bbox_utils import bbox_iou_np_expand
 from paddlers.models.ppdet.modeling.bbox_utils import bbox_iou_np_expand
 from .map_utils import ap_per_class
 from .map_utils import ap_per_class
 from .metrics import Metric
 from .metrics import Metric
 from .munkres import Munkres
 from .munkres import Munkres
 
 
+try:
+    import motmetrics as mm
+    mm.lap.default_solver = 'lap'
+except:
+    print(
+        'Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+    )
+    pass
+
 from paddlers.models.ppdet.utils.logger import setup_logger
 from paddlers.models.ppdet.utils.logger import setup_logger
 logger = setup_logger(__name__)
 logger = setup_logger(__name__)
 
 
@@ -36,8 +44,13 @@ __all__ = ['MOTEvaluator', 'MOTMetric', 'JDEDetMetric', 'KITTIMOTMetric']
 
 
 
 
 def read_mot_results(filename, is_gt=False, is_ignore=False):
 def read_mot_results(filename, is_gt=False, is_ignore=False):
-    valid_labels = {1}
-    ignore_labels = {2, 7, 8, 12}  # only in motchallenge datasets like 'MOT16'
+    valid_label = [1]
+    ignore_labels = [2, 7, 8, 12]  # only in motchallenge datasets like 'MOT16'
+    if is_gt:
+        logger.info(
+            "In MOT16/17 dataset the valid_label of ground truth is '{}', "
+            "in other dataset it should be '0' for single classs MOT.".format(
+                valid_label[0]))
     results_dict = dict()
     results_dict = dict()
     if os.path.isfile(filename):
     if os.path.isfile(filename):
         with open(filename, 'r') as f:
         with open(filename, 'r') as f:
@@ -50,12 +63,10 @@ def read_mot_results(filename, is_gt=False, is_ignore=False):
                     continue
                     continue
                 results_dict.setdefault(fid, list())
                 results_dict.setdefault(fid, list())
 
 
-                box_size = float(linelist[4]) * float(linelist[5])
-
                 if is_gt:
                 if is_gt:
                     label = int(float(linelist[7]))
                     label = int(float(linelist[7]))
                     mark = int(float(linelist[6]))
                     mark = int(float(linelist[6]))
-                    if mark == 0 or label not in valid_labels:
+                    if mark == 0 or label not in valid_label:
                         continue
                         continue
                     score = 1
                     score = 1
                 elif is_ignore:
                 elif is_ignore:
@@ -112,24 +123,31 @@ class MOTEvaluator(object):
         self.data_type = data_type
         self.data_type = data_type
 
 
         self.load_annotations()
         self.load_annotations()
+        try:
+            import motmetrics as mm
+            mm.lap.default_solver = 'lap'
+        except Exception as e:
+            raise RuntimeError(
+                'Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+            )
         self.reset_accumulator()
         self.reset_accumulator()
 
 
     def load_annotations(self):
     def load_annotations(self):
         assert self.data_type == 'mot'
         assert self.data_type == 'mot'
         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
                                    'gt.txt')
                                    'gt.txt')
+        if not os.path.exists(gt_filename):
+            logger.warning(
+                "gt_filename '{}' of MOTEvaluator is not exist, so the MOTA will be -INF."
+            )
         self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
         self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
         self.gt_ignore_frame_dict = read_mot_results(
         self.gt_ignore_frame_dict = read_mot_results(
             gt_filename, is_ignore=True)
             gt_filename, is_ignore=True)
 
 
     def reset_accumulator(self):
     def reset_accumulator(self):
-        import motmetrics as mm
-        mm.lap.default_solver = 'lap'
         self.acc = mm.MOTAccumulator(auto_id=True)
         self.acc = mm.MOTAccumulator(auto_id=True)
 
 
     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
-        import motmetrics as mm
-        mm.lap.default_solver = 'lap'
         # results
         # results
         trk_tlwhs = np.copy(trk_tlwhs)
         trk_tlwhs = np.copy(trk_tlwhs)
         trk_ids = np.copy(trk_ids)
         trk_ids = np.copy(trk_ids)
@@ -187,8 +205,6 @@ class MOTEvaluator(object):
                     names,
                     names,
                     metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
                     metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
                              'precision', 'recall')):
                              'precision', 'recall')):
-        import motmetrics as mm
-        mm.lap.default_solver = 'lap'
         names = copy.deepcopy(names)
         names = copy.deepcopy(names)
         if metrics is None:
         if metrics is None:
             metrics = mm.metrics.motchallenge_metrics
             metrics = mm.metrics.motchallenge_metrics
@@ -225,8 +241,6 @@ class MOTMetric(Metric):
         self.result_root = result_root
         self.result_root = result_root
 
 
     def accumulate(self):
     def accumulate(self):
-        import motmetrics as mm
-        import openpyxl
         metrics = mm.metrics.motchallenge_metrics
         metrics = mm.metrics.motchallenge_metrics
         mh = mm.metrics.create()
         mh = mm.metrics.create()
         summary = self.MOTEvaluator.get_summary(self.accs, self.seqs, metrics)
         summary = self.MOTEvaluator.get_summary(self.accs, self.seqs, metrics)
@@ -422,7 +436,7 @@ class KITTIEvaluation(object):
         self.ifn = 0  # number of ignored false negatives
         self.ifn = 0  # number of ignored false negatives
         self.ifns = []  # number of ignored false negatives PER SEQUENCE
         self.ifns = []  # number of ignored false negatives PER SEQUENCE
         self.fp = 0  # number of false positives
         self.fp = 0  # number of false positives
-        # a bit tricky, the number of ignored false negatives and ignored true positives
+        # a bit tricky, the number of ignored false negatives and ignored true positives 
         # is subtracted, but if both tracker detection and ground truth detection
         # is subtracted, but if both tracker detection and ground truth detection
         # are ignored this number is added again to avoid double counting
         # are ignored this number is added again to avoid double counting
         self.fps = []  # above PER SEQUENCE
         self.fps = []  # above PER SEQUENCE
@@ -551,7 +565,7 @@ class KITTIEvaluation(object):
                             "track ids are not unique for sequence %d: frame %d"
                             "track ids are not unique for sequence %d: frame %d"
                             % (seq, t_data.frame))
                             % (seq, t_data.frame))
                         logger.info(
                         logger.info(
-                            "track id %d occured at least twice for this frame"
+                            "track id %d occurred at least twice for this frame"
                             % t_data.track_id)
                             % t_data.track_id)
                         logger.info("Exiting...")
                         logger.info("Exiting...")
                         #continue # this allows to evaluate non-unique result files
                         #continue # this allows to evaluate non-unique result files

+ 12 - 12
paddlers/models/ppdet/metrics/munkres.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 """
 """
 This code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/munkres.py
 This code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/munkres.py

+ 1 - 0
paddlers/models/ppdet/model_zoo/.gitignore

@@ -0,0 +1 @@
+MODEL_ZOO

+ 12 - 12
paddlers/models/ppdet/model_zoo/__init__.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from . import model_zoo
 from . import model_zoo

+ 12 - 12
paddlers/models/ppdet/model_zoo/model_zoo.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 import os.path as osp
 import os.path as osp

+ 13 - 0
paddlers/models/ppdet/model_zoo/tests/__init__.py

@@ -0,0 +1,13 @@
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 48 - 0
paddlers/models/ppdet/model_zoo/tests/test_get_model.py

@@ -0,0 +1,48 @@
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import paddle
+import paddlers.models.ppdet as ppdet
+import unittest
+
+# NOTE: weights downloading costs time, we choose
+#       a small model for unittesting
+MODEL_NAME = 'ppyolo/ppyolo_tiny_650e_coco'
+
+
+class TestGetConfigFile(unittest.TestCase):
+    def test_main(self):
+        try:
+            cfg_file = ppdet.model_zoo.get_config_file(MODEL_NAME)
+            assert os.path.isfile(cfg_file)
+        except:
+            self.assertTrue(False)
+
+
+class TestGetModel(unittest.TestCase):
+    def test_main(self):
+        try:
+            model = ppdet.model_zoo.get_model(MODEL_NAME)
+            assert isinstance(model, paddle.nn.Layer)
+        except:
+            self.assertTrue(False)
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 68 - 0
paddlers/models/ppdet/model_zoo/tests/test_list_model.py

@@ -0,0 +1,68 @@
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+import paddlers.models.ppdet as ppdet
+
+
+class TestListModel(unittest.TestCase):
+    def setUp(self):
+        self._filter = []
+
+    def test_main(self):
+        try:
+            ppdet.model_zoo.list_model(self._filter)
+            self.assertTrue(True)
+        except:
+            self.assertTrue(False)
+
+
+class TestListModelYOLO(TestListModel):
+    def setUp(self):
+        self._filter = ['yolo']
+
+
+class TestListModelRCNN(TestListModel):
+    def setUp(self):
+        self._filter = ['rcnn']
+
+
+class TestListModelSSD(TestListModel):
+    def setUp(self):
+        self._filter = ['ssd']
+
+
+class TestListModelMultiFilter(TestListModel):
+    def setUp(self):
+        self._filter = ['yolo', 'darknet']
+
+
+class TestListModelError(unittest.TestCase):
+    def setUp(self):
+        self._filter = ['xxx']
+
+    def test_main(self):
+        try:
+            ppdet.model_zoo.list_model(self._filter)
+            self.assertTrue(False)
+        except ValueError:
+            self.assertTrue(True)
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 14 - 12
paddlers/models/ppdet/modeling/__init__.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 import warnings
 import warnings
@@ -29,6 +29,7 @@ from . import reid
 from . import mot
 from . import mot
 from . import transformers
 from . import transformers
 from . import assigners
 from . import assigners
+from . import rbox_utils
 
 
 from .ops import *
 from .ops import *
 from .backbones import *
 from .backbones import *
@@ -43,3 +44,4 @@ from .reid import *
 from .mot import *
 from .mot import *
 from .transformers import *
 from .transformers import *
 from .assigners import *
 from .assigners import *
+from .rbox_utils import *

+ 20 - 7
paddlers/models/ppdet/modeling/architectures/__init__.py

@@ -1,10 +1,17 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
 from . import meta_arch
 from . import meta_arch
 from . import faster_rcnn
 from . import faster_rcnn
 from . import mask_rcnn
 from . import mask_rcnn
@@ -26,6 +33,9 @@ from . import picodet
 from . import detr
 from . import detr
 from . import sparse_rcnn
 from . import sparse_rcnn
 from . import tood
 from . import tood
+from . import retinanet
+from . import bytetrack
+from . import yolox
 
 
 from .meta_arch import *
 from .meta_arch import *
 from .faster_rcnn import *
 from .faster_rcnn import *
@@ -49,3 +59,6 @@ from .picodet import *
 from .detr import *
 from .detr import *
 from .sparse_rcnn import *
 from .sparse_rcnn import *
 from .tood import *
 from .tood import *
+from .retinanet import *
+from .bytetrack import *
+from .yolox import *

+ 79 - 0
paddlers/models/ppdet/modeling/architectures/bytetrack.py

@@ -0,0 +1,79 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddlers.models.ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+__all__ = ['ByteTrack']
+
+
+@register
+class ByteTrack(BaseArch):
+    """
+    ByteTrack network, see https://arxiv.org/abs/2110.06864
+
+    Args:
+        detector (object): detector model instance
+        reid (object): reid model instance, default None
+        tracker (object): tracker instance
+    """
+    __category__ = 'architecture'
+
+    def __init__(self, detector='YOLOX', reid=None, tracker='JDETracker'):
+        super(ByteTrack, self).__init__()
+        self.detector = detector
+        self.reid = reid
+        self.tracker = tracker
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        detector = create(cfg['detector'])
+
+        if cfg['reid'] != 'None':
+            reid = create(cfg['reid'])
+        else:
+            reid = None
+
+        tracker = create(cfg['tracker'])
+
+        return {
+            "detector": detector,
+            "reid": reid,
+            "tracker": tracker,
+        }
+
+    def _forward(self):
+        det_outs = self.detector(self.inputs)
+
+        if self.training:
+            return det_outs
+        else:
+            if self.reid is not None:
+                assert 'crops' in self.inputs
+                crops = self.inputs['crops']
+                pred_embs = self.reid(crops)
+            else:
+                pred_embs = None
+            det_outs['embeddings'] = pred_embs
+            return det_outs
+
+    def get_loss(self):
+        return self._forward()
+
+    def get_pred(self):
+        return self._forward()

+ 16 - 16
paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -111,14 +111,14 @@ class CascadeRCNN(BaseArch):
             bbox, bbox_num = self.bbox_post_process(
             bbox, bbox_num = self.bbox_post_process(
                 preds, (refined_rois, rois_num), im_shape, scale_factor)
                 preds, (refined_rois, rois_num), im_shape, scale_factor)
             # rescale the prediction back to origin image
             # rescale the prediction back to origin image
-            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
-                                                        im_shape, scale_factor)
+            bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
+                bbox, bbox_num, im_shape, scale_factor)
             if not self.with_mask:
             if not self.with_mask:
                 return bbox_pred, bbox_num, None
                 return bbox_pred, bbox_num, None
             mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
             mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
             origin_shape = self.bbox_post_process.get_origin_shape()
             origin_shape = self.bbox_post_process.get_origin_shape()
-            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
-                                               bbox_num, origin_shape)
+            mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
+                                               origin_shape)
             return bbox_pred, bbox_num, mask_pred
             return bbox_pred, bbox_num, mask_pred
 
 
     def get_loss(self, ):
     def get_loss(self, ):

+ 12 - 12
paddlers/models/ppdet/modeling/architectures/centernet.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 15 - 14
paddlers/models/ppdet/modeling/architectures/deepsort.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -62,8 +62,9 @@ class DeepSORT(BaseArch):
 
 
     def _forward(self):
     def _forward(self):
         crops = self.inputs['crops']
         crops = self.inputs['crops']
-        features = self.reid(crops)
-        return features
+        outs = {}
+        outs['embeddings'] = self.reid(crops)
+        return outs
 
 
     def get_pred(self):
     def get_pred(self):
         return self._forward()
         return self._forward()

+ 12 - 12
paddlers/models/ppdet/modeling/architectures/fairmot.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 14 - 14
paddlers/models/ppdet/modeling/architectures/faster_rcnn.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -87,8 +87,8 @@ class FasterRCNN(BaseArch):
                                                     im_shape, scale_factor)
                                                     im_shape, scale_factor)
 
 
             # rescale the prediction back to origin image
             # rescale the prediction back to origin image
-            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
-                                                        im_shape, scale_factor)
+            bboxes, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
+                bbox, bbox_num, im_shape, scale_factor)
             return bbox_pred, bbox_num
             return bbox_pred, bbox_num
 
 
     def get_loss(self, ):
     def get_loss(self, ):

+ 12 - 12
paddlers/models/ppdet/modeling/architectures/fcos.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 12 - 12
paddlers/models/ppdet/modeling/architectures/gfl.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 12 - 12
paddlers/models/ppdet/modeling/architectures/jde.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 13 - 13
paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -153,7 +153,7 @@ class HrHRNetPostProcess(object):
         heat_thresh (float): value of topk below this threshhold will be ignored
         heat_thresh (float): value of topk below this threshhold will be ignored
         tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
         tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
 
 
-        inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
+        inputs(list[heatmap]): the output list of model, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
         original_height, original_width (float): the original image size
         original_height, original_width (float): the original image size
     '''
     '''
 
 

+ 4 - 4
paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# you may not use this file except in compliance with the License. 
+# You may obtain a copy of the License at 
 #
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 #
 # Unless required by applicable law or agreed to in writing, software
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and 
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 16 - 16
paddlers/models/ppdet/modeling/architectures/mask_rcnn.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -112,11 +112,11 @@ class MaskRCNN(BaseArch):
                 body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
                 body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
 
 
             # rescale the prediction back to origin image
             # rescale the prediction back to origin image
-            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
-                                                        im_shape, scale_factor)
+            bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
+                bbox, bbox_num, im_shape, scale_factor)
             origin_shape = self.bbox_post_process.get_origin_shape()
             origin_shape = self.bbox_post_process.get_origin_shape()
-            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
-                                               bbox_num, origin_shape)
+            mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
+                                               origin_shape)
             return bbox_pred, bbox_num, mask_pred
             return bbox_pred, bbox_num, mask_pred
 
 
     def get_loss(self, ):
     def get_loss(self, ):

+ 19 - 27
paddlers/models/ppdet/modeling/architectures/meta_arch.py

@@ -22,22 +22,23 @@ class BaseArch(nn.Layer):
         self.fuse_norm = False
         self.fuse_norm = False
 
 
     def load_meanstd(self, cfg_transform):
     def load_meanstd(self, cfg_transform):
-        self.scale = 1.
-        self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape(
-            (1, 3, 1, 1))
-        self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1))
+        scale = 1.
+        mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+        std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
         for item in cfg_transform:
         for item in cfg_transform:
             if 'NormalizeImage' in item:
             if 'NormalizeImage' in item:
-                self.mean = paddle.to_tensor(item['NormalizeImage'][
-                    'mean']).reshape((1, 3, 1, 1))
-                self.std = paddle.to_tensor(item['NormalizeImage'][
-                    'std']).reshape((1, 3, 1, 1))
+                mean = np.array(
+                    item['NormalizeImage']['mean'], dtype=np.float32)
+                std = np.array(item['NormalizeImage']['std'], dtype=np.float32)
                 if item['NormalizeImage'].get('is_scale', True):
                 if item['NormalizeImage'].get('is_scale', True):
-                    self.scale = 1. / 255.
+                    scale = 1. / 255.
                 break
                 break
         if self.data_format == 'NHWC':
         if self.data_format == 'NHWC':
-            self.mean = self.mean.reshape(1, 1, 1, 3)
-            self.std = self.std.reshape(1, 1, 1, 3)
+            self.scale = paddle.to_tensor(scale / std).reshape((1, 1, 1, 3))
+            self.bias = paddle.to_tensor(-mean / std).reshape((1, 1, 1, 3))
+        else:
+            self.scale = paddle.to_tensor(scale / std).reshape((1, 3, 1, 1))
+            self.bias = paddle.to_tensor(-mean / std).reshape((1, 3, 1, 1))
 
 
     def forward(self, inputs):
     def forward(self, inputs):
         if self.data_format == 'NHWC':
         if self.data_format == 'NHWC':
@@ -46,7 +47,7 @@ class BaseArch(nn.Layer):
 
 
         if self.fuse_norm:
         if self.fuse_norm:
             image = inputs['image']
             image = inputs['image']
-            self.inputs['image'] = (image * self.scale - self.mean) / self.std
+            self.inputs['image'] = image * self.scale + self.bias
             self.inputs['im_shape'] = inputs['im_shape']
             self.inputs['im_shape'] = inputs['im_shape']
             self.inputs['scale_factor'] = inputs['scale_factor']
             self.inputs['scale_factor'] = inputs['scale_factor']
         else:
         else:
@@ -63,10 +64,14 @@ class BaseArch(nn.Layer):
                 inputs_list.append(inputs)
                 inputs_list.append(inputs)
             else:
             else:
                 inputs_list.extend(inputs)
                 inputs_list.extend(inputs)
-
             outs = []
             outs = []
             for inp in inputs_list:
             for inp in inputs_list:
-                self.inputs = inp
+                if self.fuse_norm:
+                    self.inputs['image'] = inp['image'] * self.scale + self.bias
+                    self.inputs['im_shape'] = inp['im_shape']
+                    self.inputs['scale_factor'] = inp['scale_factor']
+                else:
+                    self.inputs = inp
                 outs.append(self.get_pred())
                 outs.append(self.get_pred())
 
 
             # multi-scale test
             # multi-scale test
@@ -124,16 +129,3 @@ class BaseArch(nn.Layer):
 
 
     def get_pred(self, ):
     def get_pred(self, ):
         raise NotImplementedError("Should implement get_pred method!")
         raise NotImplementedError("Should implement get_pred method!")
-
-    @classmethod
-    def convert_sync_batchnorm(cls, layer):
-        layer_output = layer
-        if getattr(layer, 'norm_type', None) == 'sync_bn':
-            layer_output = nn.SyncBatchNorm.convert_sync_batchnorm(layer)
-        else:
-            for name, sublayer in layer.named_children():
-                layer_output.add_sublayer(name,
-                                          cls.convert_sync_batchnorm(sublayer))
-
-        del layer
-        return layer_output

+ 24 - 20
paddlers/models/ppdet/modeling/architectures/picodet.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -41,7 +41,8 @@ class PicoDet(BaseArch):
         self.backbone = backbone
         self.backbone = backbone
         self.neck = neck
         self.neck = neck
         self.head = head
         self.head = head
-        self.deploy = False
+        self.export_post_process = True
+        self.export_nms = True
 
 
     @classmethod
     @classmethod
     def from_config(cls, cfg, *args, **kwargs):
     def from_config(cls, cfg, *args, **kwargs):
@@ -62,14 +63,13 @@ class PicoDet(BaseArch):
     def _forward(self):
     def _forward(self):
         body_feats = self.backbone(self.inputs)
         body_feats = self.backbone(self.inputs)
         fpn_feats = self.neck(body_feats)
         fpn_feats = self.neck(body_feats)
-        head_outs = self.head(fpn_feats, self.deploy)
-        if self.training or self.deploy:
+        head_outs = self.head(fpn_feats, self.export_post_process)
+        if self.training or not self.export_post_process:
             return head_outs, None
             return head_outs, None
         else:
         else:
-            im_shape = self.inputs['im_shape']
             scale_factor = self.inputs['scale_factor']
             scale_factor = self.inputs['scale_factor']
-            bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
-                                                      scale_factor)
+            bboxes, bbox_num = self.head.post_process(
+                head_outs, scale_factor, export_nms=self.export_nms)
             return bboxes, bbox_num
             return bboxes, bbox_num
 
 
     def get_loss(self, ):
     def get_loss(self, ):
@@ -83,9 +83,13 @@ class PicoDet(BaseArch):
         return loss
         return loss
 
 
     def get_pred(self):
     def get_pred(self):
-        if self.deploy:
+        if not self.export_post_process:
             return {'picodet': self._forward()[0]}
             return {'picodet': self._forward()[0]}
-        else:
+        elif self.export_nms:
             bbox_pred, bbox_num = self._forward()
             bbox_pred, bbox_num = self._forward()
             output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
             output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
             return output
             return output
+        else:
+            bboxes, mlvl_scores = self._forward()
+            output = {'bbox': bboxes, 'scores': mlvl_scores}
+            return output

+ 68 - 0
paddlers/models/ppdet/modeling/architectures/retinanet.py

@@ -0,0 +1,68 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddlers.models.ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+import paddle
+
+__all__ = ['RetinaNet']
+
+
+@register
+class RetinaNet(BaseArch):
+    __category__ = 'architecture'
+
+    def __init__(self, backbone, neck, head):
+        super(RetinaNet, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.head = head
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        backbone = create(cfg['backbone'])
+
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+
+        kwargs = {'input_shape': neck.out_shape}
+        head = create(cfg['head'], **kwargs)
+
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            'head': head,
+        }
+
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        neck_feats = self.neck(body_feats)
+
+        if self.training:
+            return self.head(neck_feats, self.inputs)
+        else:
+            head_outs = self.head(neck_feats)
+            bbox, bbox_num = self.head.post_process(
+                head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
+            return {'bbox': bbox, 'bbox_num': bbox_num}
+
+    def get_loss(self):
+        return self._forward()
+
+    def get_pred(self):
+        return self._forward()

+ 25 - 44
paddlers/models/ppdet/modeling/architectures/s2anet.py

@@ -1,15 +1,15 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -26,26 +26,21 @@ __all__ = ['S2ANet']
 @register
 @register
 class S2ANet(BaseArch):
 class S2ANet(BaseArch):
     __category__ = 'architecture'
     __category__ = 'architecture'
-    __inject__ = [
-        's2anet_head',
-        's2anet_bbox_post_process',
-    ]
+    __inject__ = ['head']
 
 
-    def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process):
+    def __init__(self, backbone, neck, head):
         """
         """
         S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
         S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
 
 
         Args:
         Args:
             backbone (object): backbone instance
             backbone (object): backbone instance
             neck (object): `FPN` instance
             neck (object): `FPN` instance
-            s2anet_head (object): `S2ANetHead` instance
-            s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
+            head (object): `Head` instance
         """
         """
         super(S2ANet, self).__init__()
         super(S2ANet, self).__init__()
         self.backbone = backbone
         self.backbone = backbone
         self.neck = neck
         self.neck = neck
-        self.s2anet_head = s2anet_head
-        self.s2anet_bbox_post_process = s2anet_bbox_post_process
+        self.s2anet_head = head
 
 
     @classmethod
     @classmethod
     def from_config(cls, cfg, *args, **kwargs):
     def from_config(cls, cfg, *args, **kwargs):
@@ -55,42 +50,28 @@ class S2ANet(BaseArch):
 
 
         out_shape = neck and neck.out_shape or backbone.out_shape
         out_shape = neck and neck.out_shape or backbone.out_shape
         kwargs = {'input_shape': out_shape}
         kwargs = {'input_shape': out_shape}
-        s2anet_head = create(cfg['s2anet_head'], **kwargs)
-        s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
-                                          **kwargs)
+        head = create(cfg['head'], **kwargs)
 
 
-        return {
-            'backbone': backbone,
-            'neck': neck,
-            "s2anet_head": s2anet_head,
-            "s2anet_bbox_post_process": s2anet_bbox_post_process,
-        }
+        return {'backbone': backbone, 'neck': neck, "head": head}
 
 
     def _forward(self):
     def _forward(self):
         body_feats = self.backbone(self.inputs)
         body_feats = self.backbone(self.inputs)
         if self.neck is not None:
         if self.neck is not None:
             body_feats = self.neck(body_feats)
             body_feats = self.neck(body_feats)
-        self.s2anet_head(body_feats)
         if self.training:
         if self.training:
-            loss = self.s2anet_head.get_loss(self.inputs)
-            total_loss = paddle.add_n(list(loss.values()))
-            loss.update({'loss': total_loss})
+            loss = self.s2anet_head(body_feats, self.inputs)
             return loss
             return loss
         else:
         else:
-            im_shape = self.inputs['im_shape']
-            scale_factor = self.inputs['scale_factor']
-            nms_pre = self.s2anet_bbox_post_process.nms_pre
-            pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
-
+            head_outs = self.s2anet_head(body_feats)
             # post_process
             # post_process
-            pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores,
-                                                                  pred_bboxes)
+            bboxes, bbox_num = self.s2anet_head.get_bboxes(head_outs)
             # rescale the prediction back to origin image
             # rescale the prediction back to origin image
-            pred_bboxes = self.s2anet_bbox_post_process.get_pred(
-                pred_bboxes, bbox_num, im_shape, scale_factor)
-
+            im_shape = self.inputs['im_shape']
+            scale_factor = self.inputs['scale_factor']
+            bboxes = self.s2anet_head.get_pred(bboxes, bbox_num, im_shape,
+                                               scale_factor)
             # output
             # output
-            output = {'bbox': pred_bboxes, 'bbox_num': bbox_num}
+            output = {'bbox': bboxes, 'bbox_num': bbox_num}
             return output
             return output
 
 
     def get_loss(self, ):
     def get_loss(self, ):

+ 12 - 12
paddlers/models/ppdet/modeling/architectures/ttfnet.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import

+ 16 - 13
paddlers/models/ppdet/modeling/architectures/yolo.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
@@ -109,10 +109,13 @@ class YOLOv3(BaseArch):
                 if self.return_idx:
                 if self.return_idx:
                     _, bbox, bbox_num, _ = self.post_process(
                     _, bbox, bbox_num, _ = self.post_process(
                         yolo_head_outs, self.yolo_head.mask_anchors)
                         yolo_head_outs, self.yolo_head.mask_anchors)
-                else:
+                elif self.post_process is not None:
                     bbox, bbox_num = self.post_process(
                     bbox, bbox_num = self.post_process(
                         yolo_head_outs, self.yolo_head.mask_anchors,
                         yolo_head_outs, self.yolo_head.mask_anchors,
                         self.inputs['im_shape'], self.inputs['scale_factor'])
                         self.inputs['im_shape'], self.inputs['scale_factor'])
+                else:
+                    bbox, bbox_num = self.yolo_head.post_process(
+                        yolo_head_outs, self.inputs['scale_factor'])
                 output = {'bbox': bbox, 'bbox_num': bbox_num}
                 output = {'bbox': bbox, 'bbox_num': bbox_num}
 
 
             return output
             return output

+ 138 - 0
paddlers/models/ppdet/modeling/architectures/yolox.py

@@ -0,0 +1,138 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddlers.models.ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+
+import random
+import paddle
+import paddle.nn.functional as F
+import paddle.distributed as dist
+
+__all__ = ['YOLOX']
+
+
+@register
+class YOLOX(BaseArch):
+    """
+    YOLOX network, see https://arxiv.org/abs/2107.08430
+
+    Args:
+        backbone (nn.Layer): backbone instance
+        neck (nn.Layer): neck instance
+        head (nn.Layer): head instance
+        for_mot (bool): whether used for MOT or not
+        input_size (list[int]): initial scale, will be reset by self._preprocess()
+        size_stride (int): stride of the size range
+        size_range (list[int]): multi-scale range for training
+        random_interval (int): interval of iter to change self._input_size
+    """
+    __category__ = 'architecture'
+
+    def __init__(self,
+                 backbone='CSPDarkNet',
+                 neck='YOLOCSPPAN',
+                 head='YOLOXHead',
+                 for_mot=False,
+                 input_size=[640, 640],
+                 size_stride=32,
+                 size_range=[15, 25],
+                 random_interval=10):
+        super(YOLOX, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.head = head
+        self.for_mot = for_mot
+
+        self.input_size = input_size
+        self._input_size = paddle.to_tensor(input_size)
+        self.size_stride = size_stride
+        self.size_range = size_range
+        self.random_interval = random_interval
+        self._step = 0
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        # backbone
+        backbone = create(cfg['backbone'])
+
+        # fpn
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+
+        # head
+        kwargs = {'input_shape': neck.out_shape}
+        head = create(cfg['head'], **kwargs)
+
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "head": head,
+        }
+
+    def _forward(self):
+        if self.training:
+            self._preprocess()
+        body_feats = self.backbone(self.inputs)
+        neck_feats = self.neck(body_feats, self.for_mot)
+
+        if self.training:
+            yolox_losses = self.head(neck_feats, self.inputs)
+            yolox_losses.update({'size': self._input_size[0]})
+            return yolox_losses
+        else:
+            head_outs = self.head(neck_feats)
+            bbox, bbox_num = self.head.post_process(
+                head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
+            return {'bbox': bbox, 'bbox_num': bbox_num}
+
+    def get_loss(self):
+        return self._forward()
+
+    def get_pred(self):
+        return self._forward()
+
+    def _preprocess(self):
+        # YOLOX multi-scale training, interpolate resize before inputs of the network.
+        self._get_size()
+        scale_y = self._input_size[0] / self.input_size[0]
+        scale_x = self._input_size[1] / self.input_size[1]
+        if scale_x != 1 or scale_y != 1:
+            self.inputs['image'] = F.interpolate(
+                self.inputs['image'],
+                size=self._input_size,
+                mode='bilinear',
+                align_corners=False)
+            gt_bboxes = self.inputs['gt_bbox']
+            for i in range(len(gt_bboxes)):
+                if len(gt_bboxes[i]) > 0:
+                    gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x
+                    gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y
+            self.inputs['gt_bbox'] = gt_bboxes
+
+    def _get_size(self):
+        # random_interval = 10 as default, every 10 iters to change self._input_size
+        image_ratio = self.input_size[1] * 1.0 / self.input_size[0]
+        if self._step % self.random_interval == 0:
+            size_factor = random.randint(*self.size_range)
+            size = [
+                self.size_stride * size_factor,
+                self.size_stride * int(size_factor * image_ratio)
+            ]
+            self._input_size = paddle.to_tensor(size)
+        self._step += 1

+ 2 - 0
paddlers/models/ppdet/modeling/assigners/__init__.py

@@ -16,8 +16,10 @@ from . import utils
 from . import task_aligned_assigner
 from . import task_aligned_assigner
 from . import atss_assigner
 from . import atss_assigner
 from . import simota_assigner
 from . import simota_assigner
+from . import max_iou_assigner
 
 
 from .utils import *
 from .utils import *
 from .task_aligned_assigner import *
 from .task_aligned_assigner import *
 from .atss_assigner import *
 from .atss_assigner import *
 from .simota_assigner import *
 from .simota_assigner import *
+from .max_iou_assigner import *

+ 33 - 27
paddlers/models/ppdet/modeling/assigners/atss_assigner.py

@@ -22,11 +22,13 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
 from paddlers.models.ppdet.core.workspace import register
 from paddlers.models.ppdet.core.workspace import register
-from ..ops import iou_similarity
+from ..bbox_utils import iou_similarity, batch_iou_similarity
 from ..bbox_utils import bbox_center
 from ..bbox_utils import bbox_center
-from .utils import (pad_gt, check_points_inside_bboxes, compute_max_iou_anchor,
+from .utils import (check_points_inside_bboxes, compute_max_iou_anchor,
                     compute_max_iou_gt)
                     compute_max_iou_gt)
 
 
+__all__ = ['ATSSAssigner']
+
 
 
 @register
 @register
 class ATSSAssigner(nn.Layer):
 class ATSSAssigner(nn.Layer):
@@ -48,7 +50,6 @@ class ATSSAssigner(nn.Layer):
 
 
     def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
     def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
                              pad_gt_mask):
                              pad_gt_mask):
-        pad_gt_mask = pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool)
         gt2anchor_distances_list = paddle.split(
         gt2anchor_distances_list = paddle.split(
             gt2anchor_distances, num_anchors_list, axis=-1)
             gt2anchor_distances, num_anchors_list, axis=-1)
         num_anchors_index = np.cumsum(num_anchors_list).tolist()
         num_anchors_index = np.cumsum(num_anchors_list).tolist()
@@ -58,15 +59,12 @@ class ATSSAssigner(nn.Layer):
         for distances, anchors_index in zip(gt2anchor_distances_list,
         for distances, anchors_index in zip(gt2anchor_distances_list,
                                             num_anchors_index):
                                             num_anchors_index):
             num_anchors = distances.shape[-1]
             num_anchors = distances.shape[-1]
-            topk_metrics, topk_idxs = paddle.topk(
+            _, topk_idxs = paddle.topk(
                 distances, self.topk, axis=-1, largest=False)
                 distances, self.topk, axis=-1, largest=False)
             topk_idxs_list.append(topk_idxs + anchors_index)
             topk_idxs_list.append(topk_idxs + anchors_index)
-            topk_idxs = paddle.where(pad_gt_mask, topk_idxs,
-                                     paddle.zeros_like(topk_idxs))
-            is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
-            is_in_topk = paddle.where(is_in_topk > 1,
-                                      paddle.zeros_like(is_in_topk), is_in_topk)
-            is_in_topk_list.append(is_in_topk.astype(gt2anchor_distances.dtype))
+            is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
+                axis=-2).astype(gt2anchor_distances.dtype)
+            is_in_topk_list.append(is_in_topk * pad_gt_mask)
         is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
         is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
         topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
         topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
         return is_in_topk_list, topk_idxs_list
         return is_in_topk_list, topk_idxs_list
@@ -77,8 +75,10 @@ class ATSSAssigner(nn.Layer):
                 num_anchors_list,
                 num_anchors_list,
                 gt_labels,
                 gt_labels,
                 gt_bboxes,
                 gt_bboxes,
+                pad_gt_mask,
                 bg_index,
                 bg_index,
-                gt_scores=None):
+                gt_scores=None,
+                pred_bboxes=None):
         r"""This code is based on
         r"""This code is based on
             https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
             https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
 
 
@@ -99,18 +99,18 @@ class ATSSAssigner(nn.Layer):
             anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
             anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
                     "xmin, xmax, ymin, ymax" format
                     "xmin, xmax, ymin, ymax" format
             num_anchors_list (List): num of anchors in each level
             num_anchors_list (List): num of anchors in each level
-            gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
-            gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
             bg_index (int): background index
             bg_index (int): background index
-            gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
+            gt_scores (Tensor|None, float32) Score of gt_bboxes,
                     shape(B, n, 1), if None, then it will initialize with one_hot label
                     shape(B, n, 1), if None, then it will initialize with one_hot label
+            pred_bboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 4)
         Returns:
         Returns:
             assigned_labels (Tensor): (B, L)
             assigned_labels (Tensor): (B, L)
             assigned_bboxes (Tensor): (B, L, 4)
             assigned_bboxes (Tensor): (B, L, 4)
-            assigned_scores (Tensor): (B, L, C)
+            assigned_scores (Tensor): (B, L, C), if pred_bboxes is not None, then output ious
         """
         """
-        gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
-            gt_labels, gt_bboxes, gt_scores)
         assert gt_labels.ndim == gt_bboxes.ndim and \
         assert gt_labels.ndim == gt_bboxes.ndim and \
                gt_bboxes.ndim == 3
                gt_bboxes.ndim == 3
 
 
@@ -119,7 +119,8 @@ class ATSSAssigner(nn.Layer):
 
 
         # negative batch
         # negative batch
         if num_max_boxes == 0:
         if num_max_boxes == 0:
-            assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
+            assigned_labels = paddle.full(
+                [batch_size, num_anchors], bg_index, dtype='int32')
             assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
             assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
             assigned_scores = paddle.zeros(
             assigned_scores = paddle.zeros(
                 [batch_size, num_anchors, self.num_classes])
                 [batch_size, num_anchors, self.num_classes])
@@ -149,9 +150,8 @@ class ATSSAssigner(nn.Layer):
         iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
         iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
         iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
         iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
                         iou_threshold.std(axis=-1, keepdim=True)
                         iou_threshold.std(axis=-1, keepdim=True)
-        is_in_topk = paddle.where(
-            iou_candidates > iou_threshold.tile([1, 1, num_anchors]),
-            is_in_topk, paddle.zeros_like(is_in_topk))
+        is_in_topk = paddle.where(iou_candidates > iou_threshold, is_in_topk,
+                                  paddle.zeros_like(is_in_topk))
 
 
         # 6. check the positive sample's center in gt, [B, n, L]
         # 6. check the positive sample's center in gt, [B, n, L]
         is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
         is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
@@ -178,9 +178,6 @@ class ATSSAssigner(nn.Layer):
                                          mask_positive)
                                          mask_positive)
             mask_positive_sum = mask_positive.sum(axis=-2)
             mask_positive_sum = mask_positive.sum(axis=-2)
         assigned_gt_index = mask_positive.argmax(axis=-2)
         assigned_gt_index = mask_positive.argmax(axis=-2)
-        assert mask_positive_sum.max() == 1, \
-            ("one anchor just assign one gt, but received not equals 1. "
-             "Received: %f" % mask_positive_sum.max().item())
 
 
         # assigned target
         # assigned target
         batch_ind = paddle.arange(
         batch_ind = paddle.arange(
@@ -197,10 +194,19 @@ class ATSSAssigner(nn.Layer):
             gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
             gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
         assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
         assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
 
 
-        assigned_scores = F.one_hot(assigned_labels, self.num_classes)
-        if gt_scores is not None:
+        assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
+        ind = list(range(self.num_classes + 1))
+        ind.remove(bg_index)
+        assigned_scores = paddle.index_select(
+            assigned_scores, paddle.to_tensor(ind), axis=-1)
+        if pred_bboxes is not None:
+            # assigned iou
+            ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
+            ious = ious.max(axis=-2).unsqueeze(-1)
+            assigned_scores *= ious
+        elif gt_scores is not None:
             gather_scores = paddle.gather(
             gather_scores = paddle.gather(
-                pad_gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
+                gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
             gather_scores = gather_scores.reshape([batch_size, num_anchors])
             gather_scores = gather_scores.reshape([batch_size, num_anchors])
             gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
             gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
                                          paddle.zeros_like(gather_scores))
                                          paddle.zeros_like(gather_scores))

+ 54 - 0
paddlers/models/ppdet/modeling/assigners/max_iou_assigner.py

@@ -0,0 +1,54 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddlers.models.ppdet.core.workspace import register
+from paddlers.models.ppdet.modeling.proposal_generator.target import label_box
+
+__all__ = ['MaxIoUAssigner']
+
+
+@register
+class MaxIoUAssigner(object):
+    """a standard bbox assigner based on max IoU, use ppdet's label_box 
+    as backend.
+    Args:
+        positive_overlap (float): threshold for defining positive samples 
+        negative_overlap (float): threshold for denining negative samples
+        allow_low_quality (bool): whether to lower IoU thr if a GT poorly
+            overlaps with candidate bboxes
+    """
+
+    def __init__(self,
+                 positive_overlap,
+                 negative_overlap,
+                 allow_low_quality=True):
+        self.positive_overlap = positive_overlap
+        self.negative_overlap = negative_overlap
+        self.allow_low_quality = allow_low_quality
+
+    def __call__(self, bboxes, gt_bboxes):
+        matches, match_labels = label_box(
+            bboxes,
+            gt_bboxes,
+            positive_overlap=self.positive_overlap,
+            negative_overlap=self.negative_overlap,
+            allow_low_quality=self.allow_low_quality,
+            ignore_thresh=-1,
+            is_crowd=None,
+            assign_on_cpu=False)
+        return matches, match_labels

+ 4 - 1
paddlers/models/ppdet/modeling/assigners/simota_assigner.py

@@ -115,7 +115,10 @@ class SimOTAAssigner(object):
     def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
     def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
         match_matrix = np.zeros_like(cost_matrix.numpy())
         match_matrix = np.zeros_like(cost_matrix.numpy())
         # select candidate topk ious for dynamic-k calculation
         # select candidate topk ious for dynamic-k calculation
-        topk_ious, _ = paddle.topk(pairwise_ious, self.candidate_topk, axis=0)
+        topk_ious, _ = paddle.topk(
+            pairwise_ious,
+            min(self.candidate_topk, pairwise_ious.shape[0]),
+            axis=0)
         # calculate dynamic k for each gt
         # calculate dynamic k for each gt
         dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
         dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
         for gt_idx in range(num_gt):
         for gt_idx in range(num_gt):

+ 20 - 18
paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py

@@ -21,10 +21,12 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 
 
 from paddlers.models.ppdet.core.workspace import register
 from paddlers.models.ppdet.core.workspace import register
-from ..bbox_utils import iou_similarity
-from .utils import (pad_gt, gather_topk_anchors, check_points_inside_bboxes,
+from ..bbox_utils import batch_iou_similarity
+from .utils import (gather_topk_anchors, check_points_inside_bboxes,
                     compute_max_iou_anchor)
                     compute_max_iou_anchor)
 
 
+__all__ = ['TaskAlignedAssigner']
+
 
 
 @register
 @register
 class TaskAlignedAssigner(nn.Layer):
 class TaskAlignedAssigner(nn.Layer):
@@ -43,8 +45,10 @@ class TaskAlignedAssigner(nn.Layer):
                 pred_scores,
                 pred_scores,
                 pred_bboxes,
                 pred_bboxes,
                 anchor_points,
                 anchor_points,
+                num_anchors_list,
                 gt_labels,
                 gt_labels,
                 gt_bboxes,
                 gt_bboxes,
+                pad_gt_mask,
                 bg_index,
                 bg_index,
                 gt_scores=None):
                 gt_scores=None):
         r"""This code is based on
         r"""This code is based on
@@ -61,20 +65,18 @@ class TaskAlignedAssigner(nn.Layer):
             pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
             pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
             pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
             pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
             anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
             anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
-            gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
-            gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
+            num_anchors_list (List): num of anchors in each level, shape(L)
+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
             bg_index (int): background index
             bg_index (int): background index
-            gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
-                    shape(B, n, 1), if None, then it will initialize with one_hot label
+            gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
         Returns:
         Returns:
             assigned_labels (Tensor): (B, L)
             assigned_labels (Tensor): (B, L)
             assigned_bboxes (Tensor): (B, L, 4)
             assigned_bboxes (Tensor): (B, L, 4)
             assigned_scores (Tensor): (B, L, C)
             assigned_scores (Tensor): (B, L, C)
         """
         """
         assert pred_scores.ndim == pred_bboxes.ndim
         assert pred_scores.ndim == pred_bboxes.ndim
-
-        gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
-            gt_labels, gt_bboxes, gt_scores)
         assert gt_labels.ndim == gt_bboxes.ndim and \
         assert gt_labels.ndim == gt_bboxes.ndim and \
                gt_bboxes.ndim == 3
                gt_bboxes.ndim == 3
 
 
@@ -83,14 +85,15 @@ class TaskAlignedAssigner(nn.Layer):
 
 
         # negative batch
         # negative batch
         if num_max_boxes == 0:
         if num_max_boxes == 0:
-            assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
+            assigned_labels = paddle.full(
+                [batch_size, num_anchors], bg_index, dtype='int32')
             assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
             assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
             assigned_scores = paddle.zeros(
             assigned_scores = paddle.zeros(
                 [batch_size, num_anchors, num_classes])
                 [batch_size, num_anchors, num_classes])
             return assigned_labels, assigned_bboxes, assigned_scores
             return assigned_labels, assigned_bboxes, assigned_scores
 
 
         # compute iou between gt and pred bbox, [B, n, L]
         # compute iou between gt and pred bbox, [B, n, L]
-        ious = iou_similarity(gt_bboxes, pred_bboxes)
+        ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
         # gather pred bboxes class score
         # gather pred bboxes class score
         pred_scores = pred_scores.transpose([0, 2, 1])
         pred_scores = pred_scores.transpose([0, 2, 1])
         batch_ind = paddle.arange(
         batch_ind = paddle.arange(
@@ -109,9 +112,7 @@ class TaskAlignedAssigner(nn.Layer):
         # select topk largest alignment metrics pred bbox as candidates
         # select topk largest alignment metrics pred bbox as candidates
         # for each gt, [B, n, L]
         # for each gt, [B, n, L]
         is_in_topk = gather_topk_anchors(
         is_in_topk = gather_topk_anchors(
-            alignment_metrics * is_in_gts,
-            self.topk,
-            topk_mask=pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool))
+            alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
 
 
         # select positive sample, [B, n, L]
         # select positive sample, [B, n, L]
         mask_positive = is_in_topk * is_in_gts * pad_gt_mask
         mask_positive = is_in_topk * is_in_gts * pad_gt_mask
@@ -127,9 +128,6 @@ class TaskAlignedAssigner(nn.Layer):
                                          mask_positive)
                                          mask_positive)
             mask_positive_sum = mask_positive.sum(axis=-2)
             mask_positive_sum = mask_positive.sum(axis=-2)
         assigned_gt_index = mask_positive.argmax(axis=-2)
         assigned_gt_index = mask_positive.argmax(axis=-2)
-        assert mask_positive_sum.max() == 1, \
-            ("one anchor just assign one gt, but received not equals 1. "
-             "Received: %f" % mask_positive_sum.max().item())
 
 
         # assigned target
         # assigned target
         assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
         assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
@@ -144,7 +142,11 @@ class TaskAlignedAssigner(nn.Layer):
             gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
             gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
         assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
         assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
 
 
-        assigned_scores = F.one_hot(assigned_labels, num_classes)
+        assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
+        ind = list(range(num_classes + 1))
+        ind.remove(bg_index)
+        assigned_scores = paddle.index_select(
+            assigned_scores, paddle.to_tensor(ind), axis=-1)
         # rescale alignment metrics
         # rescale alignment metrics
         alignment_metrics *= mask_positive
         alignment_metrics *= mask_positive
         max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
         max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)

+ 51 - 20
paddlers/models/ppdet/modeling/assigners/utils.py

@@ -88,7 +88,7 @@ def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
         largest (bool) : largest is a flag, if set to true,
         largest (bool) : largest is a flag, if set to true,
             algorithm will sort by descending order, otherwise sort by
             algorithm will sort by descending order, otherwise sort by
             ascending order. Default: True
             ascending order. Default: True
-        topk_mask (Tensor, bool|None): shape[B, n, topk], ignore bbox mask,
+        topk_mask (Tensor, float32): shape[B, n, 1], ignore bbox mask,
             Default: None
             Default: None
         eps (float): Default: 1e-9
         eps (float): Default: 1e-9
     Returns:
     Returns:
@@ -98,20 +98,22 @@ def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
     topk_metrics, topk_idxs = paddle.topk(
     topk_metrics, topk_idxs = paddle.topk(
         metrics, topk, axis=-1, largest=largest)
         metrics, topk, axis=-1, largest=largest)
     if topk_mask is None:
     if topk_mask is None:
-        topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > eps).tile(
-            [1, 1, topk])
-    topk_idxs = paddle.where(topk_mask, topk_idxs, paddle.zeros_like(topk_idxs))
-    is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
-    is_in_topk = paddle.where(is_in_topk > 1,
-                              paddle.zeros_like(is_in_topk), is_in_topk)
-    return is_in_topk.astype(metrics.dtype)
+        topk_mask = (
+            topk_metrics.max(axis=-1, keepdim=True) > eps).astype(metrics.dtype)
+    is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
+        axis=-2).astype(metrics.dtype)
+    return is_in_topk * topk_mask
 
 
 
 
-def check_points_inside_bboxes(points, bboxes, eps=1e-9):
+def check_points_inside_bboxes(points,
+                               bboxes,
+                               center_radius_tensor=None,
+                               eps=1e-9):
     r"""
     r"""
     Args:
     Args:
         points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
         points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
         bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
         bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
+        center_radius_tensor (Tensor, float32): shape [L, 1]. Default: None.
         eps (float): Default: 1e-9
         eps (float): Default: 1e-9
     Returns:
     Returns:
         is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
         is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
@@ -119,12 +121,28 @@ def check_points_inside_bboxes(points, bboxes, eps=1e-9):
     points = points.unsqueeze([0, 1])
     points = points.unsqueeze([0, 1])
     x, y = points.chunk(2, axis=-1)
     x, y = points.chunk(2, axis=-1)
     xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
     xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
+    # check whether `points` is in `bboxes`
     l = x - xmin
     l = x - xmin
     t = y - ymin
     t = y - ymin
     r = xmax - x
     r = xmax - x
     b = ymax - y
     b = ymax - y
-    bbox_ltrb = paddle.concat([l, t, r, b], axis=-1)
-    return (bbox_ltrb.min(axis=-1) > eps).astype(bboxes.dtype)
+    delta_ltrb = paddle.concat([l, t, r, b], axis=-1)
+    is_in_bboxes = (delta_ltrb.min(axis=-1) > eps)
+    if center_radius_tensor is not None:
+        # check whether `points` is in `center_radius`
+        center_radius_tensor = center_radius_tensor.unsqueeze([0, 1])
+        cx = (xmin + xmax) * 0.5
+        cy = (ymin + ymax) * 0.5
+        l = x - (cx - center_radius_tensor)
+        t = y - (cy - center_radius_tensor)
+        r = (cx + center_radius_tensor) - x
+        b = (cy + center_radius_tensor) - y
+        delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1)
+        is_in_center = (delta_ltrb_c.min(axis=-1) > eps)
+        return (paddle.logical_and(is_in_bboxes, is_in_center),
+                paddle.logical_or(is_in_bboxes, is_in_center))
+
+    return is_in_bboxes.astype(bboxes.dtype)
 
 
 
 
 def compute_max_iou_anchor(ious):
 def compute_max_iou_anchor(ious):
@@ -158,7 +176,8 @@ def compute_max_iou_gt(ious):
 def generate_anchors_for_grid_cell(feats,
 def generate_anchors_for_grid_cell(feats,
                                    fpn_strides,
                                    fpn_strides,
                                    grid_cell_size=5.0,
                                    grid_cell_size=5.0,
-                                   grid_cell_offset=0.5):
+                                   grid_cell_offset=0.5,
+                                   dtype='float32'):
     r"""
     r"""
     Like ATSS, generate anchors based on grid size.
     Like ATSS, generate anchors based on grid size.
     Args:
     Args:
@@ -167,14 +186,16 @@ def generate_anchors_for_grid_cell(feats,
         grid_cell_size (float): anchor size
         grid_cell_size (float): anchor size
         grid_cell_offset (float): The range is between 0 and 1.
         grid_cell_offset (float): The range is between 0 and 1.
     Returns:
     Returns:
-        anchors (List[Tensor]): shape[s, (l, 4)]
-        num_anchors_list (List[int]): shape[s]
-        stride_tensor_list (List[Tensor]): shape[s, (l, 1)]
+        anchors (Tensor): shape[l, 4], "xmin, ymin, xmax, ymax" format.
+        anchor_points (Tensor): shape[l, 2], "x, y" format.
+        num_anchors_list (List[int]): shape[s], contains [s_1, s_2, ...].
+        stride_tensor (Tensor): shape[l, 1], contains the stride for each scale.
     """
     """
     assert len(feats) == len(fpn_strides)
     assert len(feats) == len(fpn_strides)
     anchors = []
     anchors = []
+    anchor_points = []
     num_anchors_list = []
     num_anchors_list = []
-    stride_tensor_list = []
+    stride_tensor = []
     for feat, stride in zip(feats, fpn_strides):
     for feat, stride in zip(feats, fpn_strides):
         _, _, h, w = feat.shape
         _, _, h, w = feat.shape
         cell_half_size = grid_cell_size * stride * 0.5
         cell_half_size = grid_cell_size * stride * 0.5
@@ -186,9 +207,19 @@ def generate_anchors_for_grid_cell(feats,
                 shift_x - cell_half_size, shift_y - cell_half_size,
                 shift_x - cell_half_size, shift_y - cell_half_size,
                 shift_x + cell_half_size, shift_y + cell_half_size
                 shift_x + cell_half_size, shift_y + cell_half_size
             ],
             ],
-            axis=-1).astype(feat.dtype)
+            axis=-1).astype(dtype)
+        anchor_point = paddle.stack([shift_x, shift_y], axis=-1).astype(dtype)
+
         anchors.append(anchor.reshape([-1, 4]))
         anchors.append(anchor.reshape([-1, 4]))
+        anchor_points.append(anchor_point.reshape([-1, 2]))
         num_anchors_list.append(len(anchors[-1]))
         num_anchors_list.append(len(anchors[-1]))
-        stride_tensor_list.append(
-            paddle.full([num_anchors_list[-1], 1], stride))
-    return anchors, num_anchors_list, stride_tensor_list
+        stride_tensor.append(
+            paddle.full(
+                [num_anchors_list[-1], 1], stride, dtype=dtype))
+    anchors = paddle.concat(anchors)
+    anchors.stop_gradient = True
+    anchor_points = paddle.concat(anchor_points)
+    anchor_points.stop_gradient = True
+    stride_tensor = paddle.concat(stride_tensor)
+    stride_tensor.stop_gradient = True
+    return anchors, anchor_points, num_anchors_list, stride_tensor

+ 23 - 12
paddlers/models/ppdet/modeling/backbones/__init__.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 from . import vgg
 from . import vgg
@@ -29,6 +29,11 @@ from . import swin_transformer
 from . import lcnet
 from . import lcnet
 from . import hardnet
 from . import hardnet
 from . import esnet
 from . import esnet
+from . import cspresnet
+from . import csp_darknet
+from . import convnext
+from . import vision_transformer
+from . import mobileone
 
 
 from .vgg import *
 from .vgg import *
 from .resnet import *
 from .resnet import *
@@ -47,3 +52,9 @@ from .swin_transformer import *
 from .lcnet import *
 from .lcnet import *
 from .hardnet import *
 from .hardnet import *
 from .esnet import *
 from .esnet import *
+from .cspresnet import *
+from .csp_darknet import *
+from .convnext import *
+from .vision_transformer import *
+from .vision_transformer import *
+from .mobileone import *

+ 1 - 1
paddlers/models/ppdet/modeling/backbones/blazenet.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 245 - 0
paddlers/models/ppdet/modeling/backbones/convnext.py

@@ -0,0 +1,245 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+'''
+Modified from https://github.com/facebookresearch/ConvNeXt
+Copyright (c) Meta Platforms, Inc. and affiliates.
+All rights reserved.
+This source code is licensed under the license found in the
+LICENSE file in the root directory of this source tree.
+'''
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import Constant
+
+import numpy as np
+
+from paddlers.models.ppdet.core.workspace import register, serializable
+from ..shape_spec import ShapeSpec
+from .transformer_utils import DropPath, trunc_normal_, zeros_
+
+__all__ = ['ConvNeXt']
+
+
+class Block(nn.Layer):
+    r""" ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in Pypaddle
+    
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
+        super().__init__()
+        self.dwconv = nn.Conv2D(
+            dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(
+            dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+
+        if layer_scale_init_value > 0:
+            self.gamma = self.create_parameter(
+                shape=(dim, ),
+                attr=ParamAttr(initializer=Constant(layer_scale_init_value)))
+        else:
+            self.gamma = None
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity(
+        )
+
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.transpose([0, 2, 3, 1])
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.transpose([0, 3, 1, 2])
+        x = input + self.drop_path(x)
+        return x
+
+
+class LayerNorm(nn.Layer):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs 
+    with shape (batch_size, channels, height, width).
+    """
+
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+
+        self.weight = self.create_parameter(
+            shape=(normalized_shape, ),
+            attr=ParamAttr(initializer=Constant(1.)))
+        self.bias = self.create_parameter(
+            shape=(normalized_shape, ),
+            attr=ParamAttr(initializer=Constant(0.)))
+
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight,
+                                self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / paddle.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
+
+
+@register
+@serializable
+class ConvNeXt(nn.Layer):
+    r""" ConvNeXt
+        A Pypaddle impl of : `A ConvNet for the 2020s`  -
+          https://arxiv.org/pdf/2201.03545.pdf
+
+    Args:
+        in_chans (int): Number of input image channels. Default: 3
+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+        drop_path_rate (float): Stochastic depth rate. Default: 0.
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+
+    arch_settings = {
+        'tiny': {
+            'depths': [3, 3, 9, 3],
+            'dims': [96, 192, 384, 768]
+        },
+        'small': {
+            'depths': [3, 3, 27, 3],
+            'dims': [96, 192, 384, 768]
+        },
+        'base': {
+            'depths': [3, 3, 27, 3],
+            'dims': [128, 256, 512, 1024]
+        },
+        'large': {
+            'depths': [3, 3, 27, 3],
+            'dims': [192, 384, 768, 1536]
+        },
+        'xlarge': {
+            'depths': [3, 3, 27, 3],
+            'dims': [256, 512, 1024, 2048]
+        },
+    }
+
+    def __init__(
+            self,
+            arch='tiny',
+            in_chans=3,
+            drop_path_rate=0.,
+            layer_scale_init_value=1e-6,
+            return_idx=[1, 2, 3],
+            norm_output=True,
+            pretrained=None, ):
+        super().__init__()
+        depths = self.arch_settings[arch]['depths']
+        dims = self.arch_settings[arch]['dims']
+        self.downsample_layers = nn.LayerList(
+        )  # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2D(
+                in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(
+                dims[0], eps=1e-6, data_format="channels_first"))
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                LayerNorm(
+                    dims[i], eps=1e-6, data_format="channels_first"),
+                nn.Conv2D(
+                    dims[i], dims[i + 1], kernel_size=2, stride=2), )
+            self.downsample_layers.append(downsample_layer)
+
+        self.stages = nn.LayerList(
+        )  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = [x for x in np.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(*[
+                Block(
+                    dim=dims[i],
+                    drop_path=dp_rates[cur + j],
+                    layer_scale_init_value=layer_scale_init_value)
+                for j in range(depths[i])
+            ])
+            self.stages.append(stage)
+            cur += depths[i]
+
+        self.return_idx = return_idx
+        self.dims = [dims[i] for i in return_idx]  # [::-1]
+
+        self.norm_output = norm_output
+        if norm_output:
+            self.norms = nn.LayerList([
+                LayerNorm(
+                    c, eps=1e-6, data_format="channels_first")
+                for c in self.dims
+            ])
+
+        self.apply(self._init_weights)
+
+        if pretrained is not None:
+            if 'http' in pretrained:  #URL
+                path = paddle.utils.download.get_weights_path_from_url(
+                    pretrained)
+            else:  #model in local path
+                path = pretrained
+            self.set_state_dict(paddle.load(path))
+
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2D, nn.Linear)):
+            trunc_normal_(m.weight)
+            zeros_(m.bias)
+
+    def forward_features(self, x):
+        output = []
+        for i in range(4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+            output.append(x)
+
+        outputs = [output[i] for i in self.return_idx]
+        if self.norm_output:
+            outputs = [self.norms[i](out) for i, out in enumerate(outputs)]
+
+        return outputs
+
+    def forward(self, x):
+        x = self.forward_features(x['image'])
+        return x
+
+    @property
+    def out_shape(self):
+        return [ShapeSpec(channels=c) for c in self.dims]

+ 404 - 0
paddlers/models/ppdet/modeling/backbones/csp_darknet.py

@@ -0,0 +1,404 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddlers.models.ppdet.core.workspace import register, serializable
+from paddlers.models.ppdet.modeling.initializer import conv_init_
+from ..shape_spec import ShapeSpec
+
+__all__ = [
+    'CSPDarkNet', 'BaseConv', 'DWConv', 'BottleNeck', 'SPPLayer', 'SPPFLayer'
+]
+
+
+class BaseConv(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 ksize,
+                 stride,
+                 groups=1,
+                 bias=False,
+                 act="silu"):
+        super(BaseConv, self).__init__()
+        self.conv = nn.Conv2D(
+            in_channels,
+            out_channels,
+            kernel_size=ksize,
+            stride=stride,
+            padding=(ksize - 1) // 2,
+            groups=groups,
+            bias_attr=bias)
+        self.bn = nn.BatchNorm2D(
+            out_channels,
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+
+        self._init_weights()
+
+    def _init_weights(self):
+        conv_init_(self.conv)
+
+    def forward(self, x):
+        # use 'x * F.sigmoid(x)' replace 'silu'
+        x = self.bn(self.conv(x))
+        y = x * F.sigmoid(x)
+        return y
+
+
+class DWConv(nn.Layer):
+    """Depthwise Conv"""
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 ksize,
+                 stride=1,
+                 bias=False,
+                 act="silu"):
+        super(DWConv, self).__init__()
+        self.dw_conv = BaseConv(
+            in_channels,
+            in_channels,
+            ksize=ksize,
+            stride=stride,
+            groups=in_channels,
+            bias=bias,
+            act=act)
+        self.pw_conv = BaseConv(
+            in_channels,
+            out_channels,
+            ksize=1,
+            stride=1,
+            groups=1,
+            bias=bias,
+            act=act)
+
+    def forward(self, x):
+        return self.pw_conv(self.dw_conv(x))
+
+
+class Focus(nn.Layer):
+    """Focus width and height information into channel space, used in YOLOX."""
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 ksize=3,
+                 stride=1,
+                 bias=False,
+                 act="silu"):
+        super(Focus, self).__init__()
+        self.conv = BaseConv(
+            in_channels * 4,
+            out_channels,
+            ksize=ksize,
+            stride=stride,
+            bias=bias,
+            act=act)
+
+    def forward(self, inputs):
+        # inputs [bs, C, H, W] -> outputs [bs, 4C, W/2, H/2]
+        top_left = inputs[:, :, 0::2, 0::2]
+        top_right = inputs[:, :, 0::2, 1::2]
+        bottom_left = inputs[:, :, 1::2, 0::2]
+        bottom_right = inputs[:, :, 1::2, 1::2]
+        outputs = paddle.concat(
+            [top_left, bottom_left, top_right, bottom_right], 1)
+        return self.conv(outputs)
+
+
+class BottleNeck(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 shortcut=True,
+                 expansion=0.5,
+                 depthwise=False,
+                 bias=False,
+                 act="silu"):
+        super(BottleNeck, self).__init__()
+        hidden_channels = int(out_channels * expansion)
+        Conv = DWConv if depthwise else BaseConv
+        self.conv1 = BaseConv(
+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+        self.conv2 = Conv(
+            hidden_channels,
+            out_channels,
+            ksize=3,
+            stride=1,
+            bias=bias,
+            act=act)
+        self.add_shortcut = shortcut and in_channels == out_channels
+
+    def forward(self, x):
+        y = self.conv2(self.conv1(x))
+        if self.add_shortcut:
+            y = y + x
+        return y
+
+
+class SPPLayer(nn.Layer):
+    """Spatial Pyramid Pooling (SPP) layer used in YOLOv3-SPP and YOLOX"""
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_sizes=(5, 9, 13),
+                 bias=False,
+                 act="silu"):
+        super(SPPLayer, self).__init__()
+        hidden_channels = in_channels // 2
+        self.conv1 = BaseConv(
+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+        self.maxpoolings = nn.LayerList([
+            nn.MaxPool2D(
+                kernel_size=ks, stride=1, padding=ks // 2)
+            for ks in kernel_sizes
+        ])
+        conv2_channels = hidden_channels * (len(kernel_sizes) + 1)
+        self.conv2 = BaseConv(
+            conv2_channels, out_channels, ksize=1, stride=1, bias=bias, act=act)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = paddle.concat([x] + [mp(x) for mp in self.maxpoolings], axis=1)
+        x = self.conv2(x)
+        return x
+
+
+class SPPFLayer(nn.Layer):
+    """ Spatial Pyramid Pooling - Fast (SPPF) layer used in YOLOv5 by Glenn Jocher,
+        equivalent to SPP(k=(5, 9, 13))
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 ksize=5,
+                 bias=False,
+                 act='silu'):
+        super(SPPFLayer, self).__init__()
+        hidden_channels = in_channels // 2
+        self.conv1 = BaseConv(
+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+        self.maxpooling = nn.MaxPool2D(
+            kernel_size=ksize, stride=1, padding=ksize // 2)
+        conv2_channels = hidden_channels * 4
+        self.conv2 = BaseConv(
+            conv2_channels, out_channels, ksize=1, stride=1, bias=bias, act=act)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        y1 = self.maxpooling(x)
+        y2 = self.maxpooling(y1)
+        y3 = self.maxpooling(y2)
+        concats = paddle.concat([x, y1, y2, y3], axis=1)
+        out = self.conv2(concats)
+        return out
+
+
+class CSPLayer(nn.Layer):
+    """CSP (Cross Stage Partial) layer with 3 convs, named C3 in YOLOv5"""
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 num_blocks=1,
+                 shortcut=True,
+                 expansion=0.5,
+                 depthwise=False,
+                 bias=False,
+                 act="silu"):
+        super(CSPLayer, self).__init__()
+        hidden_channels = int(out_channels * expansion)
+        self.conv1 = BaseConv(
+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+        self.conv2 = BaseConv(
+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+        self.bottlenecks = nn.Sequential(*[
+            BottleNeck(
+                hidden_channels,
+                hidden_channels,
+                shortcut=shortcut,
+                expansion=1.0,
+                depthwise=depthwise,
+                bias=bias,
+                act=act) for _ in range(num_blocks)
+        ])
+        self.conv3 = BaseConv(
+            hidden_channels * 2,
+            out_channels,
+            ksize=1,
+            stride=1,
+            bias=bias,
+            act=act)
+
+    def forward(self, x):
+        x_1 = self.conv1(x)
+        x_1 = self.bottlenecks(x_1)
+        x_2 = self.conv2(x)
+        x = paddle.concat([x_1, x_2], axis=1)
+        x = self.conv3(x)
+        return x
+
+
+@register
+@serializable
+class CSPDarkNet(nn.Layer):
+    """
+    CSPDarkNet backbone.
+    Args:
+        arch (str): Architecture of CSPDarkNet, from {P5, P6, X}, default as X,
+            and 'X' means used in YOLOX, 'P5/P6' means used in YOLOv5.
+        depth_mult (float): Depth multiplier, multiply number of channels in
+            each layer, default as 1.0.
+        width_mult (float): Width multiplier, multiply number of blocks in
+            CSPLayer, default as 1.0.
+        depthwise (bool): Whether to use depth-wise conv layer.
+        act (str): Activation function type, default as 'silu'.
+        return_idx (list): Index of stages whose feature maps are returned.
+    """
+
+    __shared__ = ['depth_mult', 'width_mult', 'act', 'trt']
+
+    # in_channels, out_channels, num_blocks, add_shortcut, use_spp(use_sppf)
+    # 'X' means setting used in YOLOX, 'P5/P6' means setting used in YOLOv5.
+    arch_settings = {
+        'X': [[64, 128, 3, True, False], [128, 256, 9, True, False],
+              [256, 512, 9, True, False], [512, 1024, 3, False, True]],
+        'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False],
+               [256, 512, 9, True, False], [512, 1024, 3, True, True]],
+        'P6': [[64, 128, 3, True, False], [128, 256, 6, True, False],
+               [256, 512, 9, True, False], [512, 768, 3, True, False],
+               [768, 1024, 3, True, True]],
+    }
+
+    def __init__(self,
+                 arch='X',
+                 depth_mult=1.0,
+                 width_mult=1.0,
+                 depthwise=False,
+                 act='silu',
+                 trt=False,
+                 return_idx=[2, 3, 4]):
+        super(CSPDarkNet, self).__init__()
+        self.arch = arch
+        self.return_idx = return_idx
+        Conv = DWConv if depthwise else BaseConv
+        arch_setting = self.arch_settings[arch]
+        base_channels = int(arch_setting[0][0] * width_mult)
+
+        # Note: differences between the latest YOLOv5 and the original YOLOX
+        # 1. self.stem, use SPPF(in YOLOv5) or SPP(in YOLOX)
+        # 2. use SPPF(in YOLOv5) or SPP(in YOLOX)
+        # 3. put SPPF before(YOLOv5) or SPP after(YOLOX) the last cspdark block's CSPLayer
+        # 4. whether SPPF(SPP)'CSPLayer add shortcut, True in YOLOv5, False in YOLOX
+        if arch in ['P5', 'P6']:
+            # in the latest YOLOv5, use Conv stem, and SPPF (fast, only single spp kernal size)
+            self.stem = Conv(
+                3, base_channels, ksize=6, stride=2, bias=False, act=act)
+            spp_kernal_sizes = 5
+        elif arch in ['X']:
+            # in the original YOLOX, use Focus stem, and SPP (three spp kernal sizes)
+            self.stem = Focus(
+                3, base_channels, ksize=3, stride=1, bias=False, act=act)
+            spp_kernal_sizes = (5, 9, 13)
+        else:
+            raise AttributeError("Unsupported arch type: {}".format(arch))
+
+        _out_channels = [base_channels]
+        layers_num = 1
+        self.csp_dark_blocks = []
+
+        for i, (in_channels, out_channels, num_blocks, shortcut,
+                use_spp) in enumerate(arch_setting):
+            in_channels = int(in_channels * width_mult)
+            out_channels = int(out_channels * width_mult)
+            _out_channels.append(out_channels)
+            num_blocks = max(round(num_blocks * depth_mult), 1)
+            stage = []
+
+            conv_layer = self.add_sublayer(
+                'layers{}.stage{}.conv_layer'.format(layers_num, i + 1),
+                Conv(
+                    in_channels, out_channels, 3, 2, bias=False, act=act))
+            stage.append(conv_layer)
+            layers_num += 1
+
+            if use_spp and arch in ['X']:
+                # in YOLOX use SPPLayer
+                spp_layer = self.add_sublayer(
+                    'layers{}.stage{}.spp_layer'.format(layers_num, i + 1),
+                    SPPLayer(
+                        out_channels,
+                        out_channels,
+                        kernel_sizes=spp_kernal_sizes,
+                        bias=False,
+                        act=act))
+                stage.append(spp_layer)
+                layers_num += 1
+
+            csp_layer = self.add_sublayer(
+                'layers{}.stage{}.csp_layer'.format(layers_num, i + 1),
+                CSPLayer(
+                    out_channels,
+                    out_channels,
+                    num_blocks=num_blocks,
+                    shortcut=shortcut,
+                    depthwise=depthwise,
+                    bias=False,
+                    act=act))
+            stage.append(csp_layer)
+            layers_num += 1
+
+            if use_spp and arch in ['P5', 'P6']:
+                # in latest YOLOv5 use SPPFLayer instead of SPPLayer
+                sppf_layer = self.add_sublayer(
+                    'layers{}.stage{}.sppf_layer'.format(layers_num, i + 1),
+                    SPPFLayer(
+                        out_channels,
+                        out_channels,
+                        ksize=5,
+                        bias=False,
+                        act=act))
+                stage.append(sppf_layer)
+                layers_num += 1
+
+            self.csp_dark_blocks.append(nn.Sequential(*stage))
+
+        self._out_channels = [_out_channels[i] for i in self.return_idx]
+        self.strides = [[2, 4, 8, 16, 32, 64][i] for i in self.return_idx]
+
+    def forward(self, inputs):
+        x = inputs['image']
+        outputs = []
+        x = self.stem(x)
+        for i, layer in enumerate(self.csp_dark_blocks):
+            x = layer(x)
+            if i + 1 in self.return_idx:
+                outputs.append(x)
+        return outputs
+
+    @property
+    def out_shape(self):
+        return [
+            ShapeSpec(
+                channels=c, stride=s)
+            for c, s in zip(self._out_channels, self.strides)
+        ]

+ 321 - 0
paddlers/models/ppdet/modeling/backbones/cspresnet.py

@@ -0,0 +1,321 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Constant
+
+from paddlers.models.ppdet.modeling.ops import get_act_fn
+from paddlers.models.ppdet.core.workspace import register, serializable
+from ..shape_spec import ShapeSpec
+
+__all__ = ['CSPResNet', 'BasicBlock', 'EffectiveSELayer', 'ConvBNLayer']
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size=3,
+                 stride=1,
+                 groups=1,
+                 padding=0,
+                 act=None):
+        super(ConvBNLayer, self).__init__()
+
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm2D(
+            ch_out,
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+        self.act = get_act_fn(act) if act is None or isinstance(act, (
+            str, dict)) else act
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.act(x)
+
+        return x
+
+
+class RepVggBlock(nn.Layer):
+    def __init__(self, ch_in, ch_out, act='relu', alpha=False):
+        super(RepVggBlock, self).__init__()
+        self.ch_in = ch_in
+        self.ch_out = ch_out
+        self.conv1 = ConvBNLayer(
+            ch_in, ch_out, 3, stride=1, padding=1, act=None)
+        self.conv2 = ConvBNLayer(
+            ch_in, ch_out, 1, stride=1, padding=0, act=None)
+        self.act = get_act_fn(act) if act is None or isinstance(act, (
+            str, dict)) else act
+        if alpha:
+            self.alpha = self.create_parameter(
+                shape=[1],
+                attr=ParamAttr(initializer=Constant(value=1.)),
+                dtype="float32")
+        else:
+            self.alpha = None
+
+    def forward(self, x):
+        if hasattr(self, 'conv'):
+            y = self.conv(x)
+        else:
+            if self.alpha:
+                y = self.conv1(x) + self.alpha * self.conv2(x)
+            else:
+                y = self.conv1(x) + self.conv2(x)
+        y = self.act(y)
+        return y
+
+    def convert_to_deploy(self):
+        if not hasattr(self, 'conv'):
+            self.conv = nn.Conv2D(
+                in_channels=self.ch_in,
+                out_channels=self.ch_out,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                groups=1)
+        kernel, bias = self.get_equivalent_kernel_bias()
+        self.conv.weight.set_value(kernel)
+        self.conv.bias.set_value(bias)
+        self.__delattr__('conv1')
+        self.__delattr__('conv2')
+
+    def get_equivalent_kernel_bias(self):
+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
+        if self.alpha:
+            return kernel3x3 + self.alpha * self._pad_1x1_to_3x3_tensor(
+                kernel1x1), bias3x3 + self.alpha * bias1x1
+        else:
+            return kernel3x3 + self._pad_1x1_to_3x3_tensor(
+                kernel1x1), bias3x3 + bias1x1
+
+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        if kernel1x1 is None:
+            return 0
+        else:
+            return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
+
+    def _fuse_bn_tensor(self, branch):
+        if branch is None:
+            return 0, 0
+        kernel = branch.conv.weight
+        running_mean = branch.bn._mean
+        running_var = branch.bn._variance
+        gamma = branch.bn.weight
+        beta = branch.bn.bias
+        eps = branch.bn._epsilon
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape((-1, 1, 1, 1))
+        return kernel * t, beta - running_mean * gamma / std
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 act='relu',
+                 shortcut=True,
+                 use_alpha=False):
+        super(BasicBlock, self).__init__()
+        assert ch_in == ch_out
+        self.conv1 = ConvBNLayer(ch_in, ch_out, 3, stride=1, padding=1, act=act)
+        self.conv2 = RepVggBlock(ch_out, ch_out, act=act, alpha=use_alpha)
+        self.shortcut = shortcut
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.conv2(y)
+        if self.shortcut:
+            return paddle.add(x, y)
+        else:
+            return y
+
+
+class EffectiveSELayer(nn.Layer):
+    """ Effective Squeeze-Excitation
+    From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
+    """
+
+    def __init__(self, channels, act='hardsigmoid'):
+        super(EffectiveSELayer, self).__init__()
+        self.fc = nn.Conv2D(channels, channels, kernel_size=1, padding=0)
+        self.act = get_act_fn(act) if act is None or isinstance(act, (
+            str, dict)) else act
+
+    def forward(self, x):
+        x_se = x.mean((2, 3), keepdim=True)
+        x_se = self.fc(x_se)
+        return x * self.act(x_se)
+
+
+class CSPResStage(nn.Layer):
+    def __init__(self,
+                 block_fn,
+                 ch_in,
+                 ch_out,
+                 n,
+                 stride,
+                 act='relu',
+                 attn='eca',
+                 use_alpha=False):
+        super(CSPResStage, self).__init__()
+
+        ch_mid = (ch_in + ch_out) // 2
+        if stride == 2:
+            self.conv_down = ConvBNLayer(
+                ch_in, ch_mid, 3, stride=2, padding=1, act=act)
+        else:
+            self.conv_down = None
+        self.conv1 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act)
+        self.conv2 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act)
+        self.blocks = nn.Sequential(*[
+            block_fn(
+                ch_mid // 2,
+                ch_mid // 2,
+                act=act,
+                shortcut=True,
+                use_alpha=use_alpha) for i in range(n)
+        ])
+        if attn:
+            self.attn = EffectiveSELayer(ch_mid, act='hardsigmoid')
+        else:
+            self.attn = None
+
+        self.conv3 = ConvBNLayer(ch_mid, ch_out, 1, act=act)
+
+    def forward(self, x):
+        if self.conv_down is not None:
+            x = self.conv_down(x)
+        y1 = self.conv1(x)
+        y2 = self.blocks(self.conv2(x))
+        y = paddle.concat([y1, y2], axis=1)
+        if self.attn is not None:
+            y = self.attn(y)
+        y = self.conv3(y)
+        return y
+
+
+@register
+@serializable
+class CSPResNet(nn.Layer):
+    __shared__ = ['width_mult', 'depth_mult', 'trt']
+
+    def __init__(self,
+                 layers=[3, 6, 6, 3],
+                 channels=[64, 128, 256, 512, 1024],
+                 act='swish',
+                 return_idx=[1, 2, 3],
+                 depth_wise=False,
+                 use_large_stem=False,
+                 width_mult=1.0,
+                 depth_mult=1.0,
+                 trt=False,
+                 use_checkpoint=False,
+                 use_alpha=False,
+                 **args):
+        super(CSPResNet, self).__init__()
+        self.use_checkpoint = use_checkpoint
+        channels = [max(round(c * width_mult), 1) for c in channels]
+        layers = [max(round(l * depth_mult), 1) for l in layers]
+        act = get_act_fn(
+            act, trt=trt) if act is None or isinstance(act,
+                                                       (str, dict)) else act
+
+        if use_large_stem:
+            self.stem = nn.Sequential(
+                ('conv1', ConvBNLayer(
+                    3, channels[0] // 2, 3, stride=2, padding=1, act=act)),
+                ('conv2', ConvBNLayer(
+                    channels[0] // 2,
+                    channels[0] // 2,
+                    3,
+                    stride=1,
+                    padding=1,
+                    act=act)), ('conv3', ConvBNLayer(
+                        channels[0] // 2,
+                        channels[0],
+                        3,
+                        stride=1,
+                        padding=1,
+                        act=act)))
+        else:
+            self.stem = nn.Sequential(
+                ('conv1', ConvBNLayer(
+                    3, channels[0] // 2, 3, stride=2, padding=1, act=act)),
+                ('conv2', ConvBNLayer(
+                    channels[0] // 2,
+                    channels[0],
+                    3,
+                    stride=1,
+                    padding=1,
+                    act=act)))
+
+        n = len(channels) - 1
+        self.stages = nn.Sequential(*[(str(i), CSPResStage(
+            BasicBlock,
+            channels[i],
+            channels[i + 1],
+            layers[i],
+            2,
+            act=act,
+            use_alpha=use_alpha)) for i in range(n)])
+
+        self._out_channels = channels[1:]
+        self._out_strides = [4 * 2**i for i in range(n)]
+        self.return_idx = return_idx
+        if use_checkpoint:
+            paddle.seed(0)
+
+    def forward(self, inputs):
+        x = inputs['image']
+        x = self.stem(x)
+        outs = []
+        for idx, stage in enumerate(self.stages):
+            if self.use_checkpoint and self.training:
+                x = paddle.distributed.fleet.utils.recompute(
+                    stage, x, **{"preserve_rng_state": True})
+            else:
+                x = stage(x)
+            if idx in self.return_idx:
+                outs.append(x)
+
+        return outs
+
+    @property
+    def out_shape(self):
+        return [
+            ShapeSpec(
+                channels=self._out_channels[i], stride=self._out_strides[i])
+            for i in self.return_idx
+        ]

+ 15 - 10
paddlers/models/ppdet/modeling/backbones/darknet.py

@@ -77,8 +77,8 @@ class ConvBNLayer(nn.Layer):
         out = self.batch_norm(out)
         out = self.batch_norm(out)
         if self.act == 'leaky':
         if self.act == 'leaky':
             out = F.leaky_relu(out, 0.1)
             out = F.leaky_relu(out, 0.1)
-        elif self.act == 'mish':
-            out = mish(out)
+        else:
+            out = getattr(F, self.act)(out)
         return out
         return out
 
 
 
 
@@ -149,9 +149,14 @@ class BasicBlock(nn.Layer):
 
 
         super(BasicBlock, self).__init__()
         super(BasicBlock, self).__init__()
 
 
+        assert ch_in == ch_out and (ch_in % 2) == 0, \
+            f"ch_in and ch_out should be the same even int, but the input \'ch_in is {ch_in}, \'ch_out is {ch_out}"
+        # example:
+        # --------------{conv1} --> {conv2}
+        # channel route: 10-->5 --> 5-->10
         self.conv1 = ConvBNLayer(
         self.conv1 = ConvBNLayer(
             ch_in=ch_in,
             ch_in=ch_in,
-            ch_out=ch_out,
+            ch_out=int(ch_out / 2),
             filter_size=1,
             filter_size=1,
             stride=1,
             stride=1,
             padding=0,
             padding=0,
@@ -160,8 +165,8 @@ class BasicBlock(nn.Layer):
             freeze_norm=freeze_norm,
             freeze_norm=freeze_norm,
             data_format=data_format)
             data_format=data_format)
         self.conv2 = ConvBNLayer(
         self.conv2 = ConvBNLayer(
-            ch_in=ch_out,
-            ch_out=ch_out * 2,
+            ch_in=int(ch_out / 2),
+            ch_out=ch_out,
             filter_size=3,
             filter_size=3,
             stride=1,
             stride=1,
             padding=1,
             padding=1,
@@ -215,7 +220,7 @@ class Blocks(nn.Layer):
             res_out = self.add_sublayer(
             res_out = self.add_sublayer(
                 block_name,
                 block_name,
                 BasicBlock(
                 BasicBlock(
-                    ch_out * 2,
+                    ch_out,
                     ch_out,
                     ch_out,
                     norm_type=norm_type,
                     norm_type=norm_type,
                     norm_decay=norm_decay,
                     norm_decay=norm_decay,
@@ -296,7 +301,7 @@ class DarkNet(nn.Layer):
                 name,
                 name,
                 Blocks(
                 Blocks(
                     int(ch_in[i]),
                     int(ch_in[i]),
-                    32 * (2**i),
+                    int(ch_in[i]),
                     stage,
                     stage,
                     norm_type=norm_type,
                     norm_type=norm_type,
                     norm_decay=norm_decay,
                     norm_decay=norm_decay,
@@ -305,14 +310,14 @@ class DarkNet(nn.Layer):
                     name=name))
                     name=name))
             self.darknet_conv_block_list.append(conv_block)
             self.darknet_conv_block_list.append(conv_block)
             if i in return_idx:
             if i in return_idx:
-                self._out_channels.append(64 * (2**i))
+                self._out_channels.append(int(ch_in[i]))
         for i in range(num_stages - 1):
         for i in range(num_stages - 1):
             down_name = 'stage.{}.downsample'.format(i)
             down_name = 'stage.{}.downsample'.format(i)
             downsample = self.add_sublayer(
             downsample = self.add_sublayer(
                 down_name,
                 down_name,
                 DownSample(
                 DownSample(
-                    ch_in=32 * (2**(i + 1)),
-                    ch_out=32 * (2**(i + 2)),
+                    ch_in=int(ch_in[i]),
+                    ch_out=int(ch_in[i + 1]),
                     norm_type=norm_type,
                     norm_type=norm_type,
                     norm_decay=norm_decay,
                     norm_decay=norm_decay,
                     freeze_norm=freeze_norm,
                     freeze_norm=freeze_norm,

+ 12 - 12
paddlers/models/ppdet/modeling/backbones/dla.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 import paddle
 import paddle

+ 2 - 2
paddlers/models/ppdet/modeling/backbones/esnet.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle import ParamAttr
-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D
+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
 from paddle.nn.initializer import KaimingNormal
 from paddle.nn.initializer import KaimingNormal
 from paddle.regularizer import L2Decay
 from paddle.regularizer import L2Decay
 
 

+ 12 - 12
paddlers/models/ppdet/modeling/backbones/ghostnet.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -299,17 +299,17 @@ class GhostBottleneck(nn.Layer):
 class GhostNet(nn.Layer):
 class GhostNet(nn.Layer):
     __shared__ = ['norm_type']
     __shared__ = ['norm_type']
 
 
-    def __init__(self,
-                 scale=1.3,
-                 feature_maps=[6, 12, 15],
-                 with_extra_blocks=False,
-                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
-                                      [64, 128]],
-                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
-                 conv_decay=0.,
-                 norm_type='bn',
-                 norm_decay=0.0,
-                 freeze_norm=False):
+    def __init__(
+            self,
+            scale=1.3,
+            feature_maps=[6, 12, 15],
+            with_extra_blocks=False,
+            extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
+            lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
+            conv_decay=0.,
+            norm_type='bn',
+            norm_decay=0.0,
+            freeze_norm=False):
         super(GhostNet, self).__init__()
         super(GhostNet, self).__init__()
         if isinstance(feature_maps, Integral):
         if isinstance(feature_maps, Integral):
             feature_maps = [feature_maps]
             feature_maps = [feature_maps]

+ 4 - 2
paddlers/models/ppdet/modeling/backbones/hardnet.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -146,7 +146,7 @@ class HarDBlock(nn.Layer):
 class HarDNet(nn.Layer):
 class HarDNet(nn.Layer):
     def __init__(self, depth_wise=False, return_idx=[1, 3, 8, 13], arch=85):
     def __init__(self, depth_wise=False, return_idx=[1, 3, 8, 13], arch=85):
         super(HarDNet, self).__init__()
         super(HarDNet, self).__init__()
-        assert arch in [39, 68, 85], "HarDNet-{} not support.".format(arch)
+        assert arch in [68, 85], "HarDNet-{} is not supported.".format(arch)
         if arch == 85:
         if arch == 85:
             first_ch = [48, 96]
             first_ch = [48, 96]
             second_kernel = 3
             second_kernel = 3
@@ -161,6 +161,8 @@ class HarDNet(nn.Layer):
             grmul = 1.7
             grmul = 1.7
             gr = [14, 16, 20, 40]
             gr = [14, 16, 20, 40]
             n_layers = [8, 16, 16, 16]
             n_layers = [8, 16, 16, 16]
+        else:
+            raise ValueError("HarDNet-{} is not supported.".format(arch))
 
 
         self.return_idx = return_idx
         self.return_idx = return_idx
         self._out_channels = [96, 214, 458, 784]
         self._out_channels = [96, 214, 458, 784]

+ 27 - 14
paddlers/models/ppdet/modeling/backbones/lcnet.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -68,7 +68,8 @@ class ConvBNLayer(nn.Layer):
                  filter_size,
                  filter_size,
                  num_filters,
                  num_filters,
                  stride,
                  stride,
-                 num_groups=1):
+                 num_groups=1,
+                 act='hard_swish'):
         super().__init__()
         super().__init__()
 
 
         self.conv = Conv2D(
         self.conv = Conv2D(
@@ -85,12 +86,15 @@ class ConvBNLayer(nn.Layer):
             num_filters,
             num_filters,
             weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
             weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
-        self.hardswish = nn.Hardswish()
+        if act == 'hard_swish':
+            self.act = nn.Hardswish()
+        elif act == 'relu6':
+            self.act = nn.ReLU6()
 
 
     def forward(self, x):
     def forward(self, x):
         x = self.conv(x)
         x = self.conv(x)
         x = self.bn(x)
         x = self.bn(x)
-        x = self.hardswish(x)
+        x = self.act(x)
         return x
         return x
 
 
 
 
@@ -100,7 +104,8 @@ class DepthwiseSeparable(nn.Layer):
                  num_filters,
                  num_filters,
                  stride,
                  stride,
                  dw_size=3,
                  dw_size=3,
-                 use_se=False):
+                 use_se=False,
+                 act='hard_swish'):
         super().__init__()
         super().__init__()
         self.use_se = use_se
         self.use_se = use_se
         self.dw_conv = ConvBNLayer(
         self.dw_conv = ConvBNLayer(
@@ -108,14 +113,16 @@ class DepthwiseSeparable(nn.Layer):
             num_filters=num_channels,
             num_filters=num_channels,
             filter_size=dw_size,
             filter_size=dw_size,
             stride=stride,
             stride=stride,
-            num_groups=num_channels)
+            num_groups=num_channels,
+            act=act)
         if use_se:
         if use_se:
             self.se = SEModule(num_channels)
             self.se = SEModule(num_channels)
         self.pw_conv = ConvBNLayer(
         self.pw_conv = ConvBNLayer(
             num_channels=num_channels,
             num_channels=num_channels,
             filter_size=1,
             filter_size=1,
             num_filters=num_filters,
             num_filters=num_filters,
-            stride=1)
+            stride=1,
+            act=act)
 
 
     def forward(self, x):
     def forward(self, x):
         x = self.dw_conv(x)
         x = self.dw_conv(x)
@@ -158,7 +165,7 @@ class SEModule(nn.Layer):
 @register
 @register
 @serializable
 @serializable
 class LCNet(nn.Layer):
 class LCNet(nn.Layer):
-    def __init__(self, scale=1.0, feature_maps=[3, 4, 5]):
+    def __init__(self, scale=1.0, feature_maps=[3, 4, 5], act='hard_swish'):
         super().__init__()
         super().__init__()
         self.scale = scale
         self.scale = scale
         self.feature_maps = feature_maps
         self.feature_maps = feature_maps
@@ -169,7 +176,8 @@ class LCNet(nn.Layer):
             num_channels=3,
             num_channels=3,
             filter_size=3,
             filter_size=3,
             num_filters=make_divisible(16 * scale),
             num_filters=make_divisible(16 * scale),
-            stride=2)
+            stride=2,
+            act=act)
 
 
         self.blocks2 = nn.Sequential(*[
         self.blocks2 = nn.Sequential(*[
             DepthwiseSeparable(
             DepthwiseSeparable(
@@ -177,7 +185,8 @@ class LCNet(nn.Layer):
                 num_filters=make_divisible(out_c * scale),
                 num_filters=make_divisible(out_c * scale),
                 dw_size=k,
                 dw_size=k,
                 stride=s,
                 stride=s,
-                use_se=se)
+                use_se=se,
+                act=act)
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
         ])
         ])
 
 
@@ -187,7 +196,8 @@ class LCNet(nn.Layer):
                 num_filters=make_divisible(out_c * scale),
                 num_filters=make_divisible(out_c * scale),
                 dw_size=k,
                 dw_size=k,
                 stride=s,
                 stride=s,
-                use_se=se)
+                use_se=se,
+                act=act)
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
         ])
         ])
 
 
@@ -200,7 +210,8 @@ class LCNet(nn.Layer):
                 num_filters=make_divisible(out_c * scale),
                 num_filters=make_divisible(out_c * scale),
                 dw_size=k,
                 dw_size=k,
                 stride=s,
                 stride=s,
-                use_se=se)
+                use_se=se,
+                act=act)
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
         ])
         ])
 
 
@@ -213,7 +224,8 @@ class LCNet(nn.Layer):
                 num_filters=make_divisible(out_c * scale),
                 num_filters=make_divisible(out_c * scale),
                 dw_size=k,
                 dw_size=k,
                 stride=s,
                 stride=s,
-                use_se=se)
+                use_se=se,
+                act=act)
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
         ])
         ])
 
 
@@ -226,7 +238,8 @@ class LCNet(nn.Layer):
                 num_filters=make_divisible(out_c * scale),
                 num_filters=make_divisible(out_c * scale),
                 dw_size=k,
                 dw_size=k,
                 stride=s,
                 stride=s,
-                use_se=se)
+                use_se=se,
+                act=act)
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
         ])
         ])
 
 

+ 1 - 1
paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.

+ 14 - 14
paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -282,19 +282,19 @@ class ExtraBlockDW(nn.Layer):
 class MobileNetV3(nn.Layer):
 class MobileNetV3(nn.Layer):
     __shared__ = ['norm_type']
     __shared__ = ['norm_type']
 
 
-    def __init__(self,
-                 scale=1.0,
-                 model_name="large",
-                 feature_maps=[6, 12, 15],
-                 with_extra_blocks=False,
-                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
-                                      [64, 128]],
-                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
-                 conv_decay=0.0,
-                 multiplier=1.0,
-                 norm_type='bn',
-                 norm_decay=0.0,
-                 freeze_norm=False):
+    def __init__(
+            self,
+            scale=1.0,
+            model_name="large",
+            feature_maps=[6, 12, 15],
+            with_extra_blocks=False,
+            extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
+            lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
+            conv_decay=0.0,
+            multiplier=1.0,
+            norm_type='bn',
+            norm_decay=0.0,
+            freeze_norm=False):
         super(MobileNetV3, self).__init__()
         super(MobileNetV3, self).__init__()
         if isinstance(feature_maps, Integral):
         if isinstance(feature_maps, Integral):
             feature_maps = [feature_maps]
             feature_maps = [feature_maps]

+ 266 - 0
paddlers/models/ppdet/modeling/backbones/mobileone.py

@@ -0,0 +1,266 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is the paddle implementation of MobileOne block, see: https://arxiv.org/pdf/2206.04040.pdf. 
+Some codes are based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
+Ths copyright of microsoft/Swin-Transformer is as follows:
+MIT License [see LICENSE for details]
+"""
+
+import paddle
+import paddle.nn as nn
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Normal, Constant
+
+from paddlers.models.ppdet.modeling.ops import get_act_fn
+from paddlers.models.ppdet.modeling.layers import ConvNormLayer
+
+
+class MobileOneBlock(nn.Layer):
+    def __init__(
+            self,
+            ch_in,
+            ch_out,
+            stride,
+            kernel_size,
+            conv_num=1,
+            norm_type='bn',
+            norm_decay=0.,
+            norm_groups=32,
+            bias_on=False,
+            lr_scale=1.,
+            freeze_norm=False,
+            initializer=Normal(
+                mean=0., std=0.01),
+            skip_quant=False,
+            act='relu', ):
+        super(MobileOneBlock, self).__init__()
+
+        self.ch_in = ch_in
+        self.ch_out = ch_out
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = (kernel_size - 1) // 2
+        self.k = conv_num
+
+        self.depth_conv = nn.LayerList()
+        self.point_conv = nn.LayerList()
+        for _ in range(self.k):
+            self.depth_conv.append(
+                ConvNormLayer(
+                    ch_in,
+                    ch_in,
+                    kernel_size,
+                    stride=stride,
+                    groups=ch_in,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    norm_groups=norm_groups,
+                    bias_on=bias_on,
+                    lr_scale=lr_scale,
+                    freeze_norm=freeze_norm,
+                    initializer=initializer,
+                    skip_quant=skip_quant))
+            self.point_conv.append(
+                ConvNormLayer(
+                    ch_in,
+                    ch_out,
+                    1,
+                    stride=1,
+                    groups=1,
+                    norm_type=norm_type,
+                    norm_decay=norm_decay,
+                    norm_groups=norm_groups,
+                    bias_on=bias_on,
+                    lr_scale=lr_scale,
+                    freeze_norm=freeze_norm,
+                    initializer=initializer,
+                    skip_quant=skip_quant))
+        self.rbr_1x1 = ConvNormLayer(
+            ch_in,
+            ch_in,
+            1,
+            stride=self.stride,
+            groups=ch_in,
+            norm_type=norm_type,
+            norm_decay=norm_decay,
+            norm_groups=norm_groups,
+            bias_on=bias_on,
+            lr_scale=lr_scale,
+            freeze_norm=freeze_norm,
+            initializer=initializer,
+            skip_quant=skip_quant)
+        self.rbr_identity_st1 = nn.BatchNorm2D(
+            num_features=ch_in,
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(
+                0.0))) if ch_in == ch_out and self.stride == 1 else None
+        self.rbr_identity_st2 = nn.BatchNorm2D(
+            num_features=ch_out,
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(
+                0.0))) if ch_in == ch_out and self.stride == 1 else None
+        self.act = get_act_fn(act) if act is None or isinstance(act, (
+            str, dict)) else act
+
+    def forward(self, x):
+        if hasattr(self, "conv1") and hasattr(self, "conv2"):
+            y = self.act(self.conv2(self.act(self.conv1(x))))
+        else:
+            if self.rbr_identity_st1 is None:
+                id_out_st1 = 0
+            else:
+                id_out_st1 = self.rbr_identity_st1(x)
+
+            x1_1 = 0
+            for i in range(self.k):
+                x1_1 += self.depth_conv[i](x)
+
+            x1_2 = self.rbr_1x1(x)
+            x1 = self.act(x1_1 + x1_2 + id_out_st1)
+
+            if self.rbr_identity_st2 is None:
+                id_out_st2 = 0
+            else:
+                id_out_st2 = self.rbr_identity_st2(x1)
+
+            x2_1 = 0
+            for i in range(self.k):
+                x2_1 += self.point_conv[i](x1)
+            y = self.act(x2_1 + id_out_st2)
+
+        return y
+
+    def convert_to_deploy(self):
+        if not hasattr(self, 'conv1'):
+            self.conv1 = nn.Conv2D(
+                in_channels=self.ch_in,
+                out_channels=self.ch_in,
+                kernel_size=self.kernel_size,
+                stride=self.stride,
+                padding=self.padding,
+                groups=self.ch_in,
+                bias_attr=ParamAttr(
+                    initializer=Constant(value=0.), learning_rate=1.))
+        if not hasattr(self, 'conv2'):
+            self.conv2 = nn.Conv2D(
+                in_channels=self.ch_in,
+                out_channels=self.ch_out,
+                kernel_size=1,
+                stride=1,
+                padding='SAME',
+                groups=1,
+                bias_attr=ParamAttr(
+                    initializer=Constant(value=0.), learning_rate=1.))
+
+        conv1_kernel, conv1_bias, conv2_kernel, conv2_bias = self.get_equivalent_kernel_bias(
+        )
+        self.conv1.weight.set_value(conv1_kernel)
+        self.conv1.bias.set_value(conv1_bias)
+        self.conv2.weight.set_value(conv2_kernel)
+        self.conv2.bias.set_value(conv2_bias)
+        self.__delattr__('depth_conv')
+        self.__delattr__('point_conv')
+        self.__delattr__('rbr_1x1')
+        if hasattr(self, 'rbr_identity_st1'):
+            self.__delattr__('rbr_identity_st1')
+        if hasattr(self, 'rbr_identity_st2'):
+            self.__delattr__('rbr_identity_st2')
+
+    def get_equivalent_kernel_bias(self):
+        st1_kernel3x3, st1_bias3x3 = self._fuse_bn_tensor(self.depth_conv)
+        st1_kernel1x1, st1_bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
+        st1_kernelid, st1_biasid = self._fuse_bn_tensor(
+            self.rbr_identity_st1, kernel_size=self.kernel_size)
+
+        st2_kernel1x1, st2_bias1x1 = self._fuse_bn_tensor(self.point_conv)
+        st2_kernelid, st2_biasid = self._fuse_bn_tensor(
+            self.rbr_identity_st2, kernel_size=1)
+
+        conv1_kernel = st1_kernel3x3 + self._pad_1x1_to_3x3_tensor(
+            st1_kernel1x1) + st1_kernelid
+
+        conv1_bias = st1_bias3x3 + st1_bias1x1 + st1_biasid
+
+        conv2_kernel = st2_kernel1x1 + st2_kernelid
+        conv2_bias = st2_bias1x1 + st2_biasid
+
+        return conv1_kernel, conv1_bias, conv2_kernel, conv2_bias
+
+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        if kernel1x1 is None:
+            return 0
+        else:
+            padding_size = (self.kernel_size - 1) // 2
+            return nn.functional.pad(
+                kernel1x1,
+                [padding_size, padding_size, padding_size, padding_size])
+
+    def _fuse_bn_tensor(self, branch, kernel_size=3):
+        if branch is None:
+            return 0, 0
+
+        if isinstance(branch, nn.LayerList):
+            fused_kernels = []
+            fused_bias = []
+            for block in branch:
+                kernel = block.conv.weight
+                running_mean = block.norm._mean
+                running_var = block.norm._variance
+                gamma = block.norm.weight
+                beta = block.norm.bias
+                eps = block.norm._epsilon
+
+                std = (running_var + eps).sqrt()
+                t = (gamma / std).reshape((-1, 1, 1, 1))
+
+                fused_kernels.append(kernel * t)
+                fused_bias.append(beta - running_mean * gamma / std)
+
+            return sum(fused_kernels), sum(fused_bias)
+
+        elif isinstance(branch, ConvNormLayer):
+            kernel = branch.conv.weight
+            running_mean = branch.norm._mean
+            running_var = branch.norm._variance
+            gamma = branch.norm.weight
+            beta = branch.norm.bias
+            eps = branch.norm._epsilon
+        else:
+            assert isinstance(branch, nn.BatchNorm2D)
+            input_dim = self.ch_in if kernel_size == 1 else 1
+            kernel_value = paddle.zeros(
+                shape=[self.ch_in, input_dim, kernel_size, kernel_size],
+                dtype='float32')
+            if kernel_size > 1:
+                for i in range(self.ch_in):
+                    kernel_value[i, i % input_dim, (kernel_size - 1) // 2, (
+                        kernel_size - 1) // 2] = 1
+            elif kernel_size == 1:
+                for i in range(self.ch_in):
+                    kernel_value[i, i % input_dim, 0, 0] = 1
+            else:
+                raise ValueError("Invalid kernel size recieved!")
+            kernel = paddle.to_tensor(kernel_value, place=branch.weight.place)
+            running_mean = branch._mean
+            running_var = branch._variance
+            gamma = branch.weight
+            beta = branch.bias
+            eps = branch._epsilon
+
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape((-1, 1, 1, 1))
+
+        return kernel * t, beta - running_mean * gamma / std

+ 14 - 14
paddlers/models/ppdet/modeling/backbones/resnet.py

@@ -1,15 +1,15 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 import math
 import math
@@ -446,13 +446,13 @@ class ResNet(nn.Layer):
                  std_senet=False):
                  std_senet=False):
         """
         """
         Residual Network, see https://arxiv.org/abs/1512.03385
         Residual Network, see https://arxiv.org/abs/1512.03385
-
+        
         Args:
         Args:
             depth (int): ResNet depth, should be 18, 34, 50, 101, 152.
             depth (int): ResNet depth, should be 18, 34, 50, 101, 152.
             ch_in (int): output channel of first stage, default 64
             ch_in (int): output channel of first stage, default 64
             variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
             variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
             lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
             lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
-                                 lower learning rate ratio is need for pretrained model
+                                 lower learning rate ratio is need for pretrained model 
                                  got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
                                  got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
             groups (int): group convolution cardinality
             groups (int): group convolution cardinality
             base_width (int): base width of each group convolution
             base_width (int): base width of each group convolution

+ 17 - 15
paddlers/models/ppdet/modeling/backbones/senet.py

@@ -1,21 +1,23 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
 # limitations under the License.
 # limitations under the License.
 
 
 import paddle.nn as nn
 import paddle.nn as nn
 
 
 from paddlers.models.ppdet.core.workspace import register, serializable
 from paddlers.models.ppdet.core.workspace import register, serializable
 from .resnet import ResNet, Blocks, BasicBlock, BottleNeck
 from .resnet import ResNet, Blocks, BasicBlock, BottleNeck
+from ..shape_spec import ShapeSpec
+from .name_adapter import NameAdapter
 
 
 __all__ = ['SENet', 'SERes5Head']
 __all__ = ['SENet', 'SERes5Head']
 
 
@@ -41,12 +43,12 @@ class SENet(ResNet):
                  num_stages=4):
                  num_stages=4):
         """
         """
         Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507
         Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507
-
+        
         Args:
         Args:
             depth (int): SENet depth, should be 50, 101, 152
             depth (int): SENet depth, should be 50, 101, 152
             variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
             variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
             lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
             lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
-                                 lower learning rate ratio is need for pretrained model
+                                 lower learning rate ratio is need for pretrained model 
                                  got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
                                  got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
             groups (int): group convolution cardinality
             groups (int): group convolution cardinality
             base_width (int): base width of each group convolution
             base_width (int): base width of each group convolution
@@ -103,7 +105,7 @@ class SERes5Head(nn.Layer):
             norm_decay (float): weight decay for normalization layer weights
             norm_decay (float): weight decay for normalization layer weights
             dcn_v2_stages (list): index of stages who select deformable conv v2
             dcn_v2_stages (list): index of stages who select deformable conv v2
             std_senet (bool): whether use senet, default True
             std_senet (bool): whether use senet, default True
-
+            
         """
         """
         super(SERes5Head, self).__init__()
         super(SERes5Head, self).__init__()
         ch_out = 512
         ch_out = 512

+ 2 - 3
paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py

@@ -1,4 +1,4 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # you may not use this file except in compliance with the License.
@@ -188,11 +188,10 @@ class ShuffleNetV2(nn.Layer):
         elif scale == 1.5:
         elif scale == 1.5:
             stage_out_channels = [-1, 24, 176, 352, 704, 1024]
             stage_out_channels = [-1, 24, 176, 352, 704, 1024]
         elif scale == 2.0:
         elif scale == 2.0:
-            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
+            stage_out_channels = [-1, 24, 244, 488, 976, 2048]
         else:
         else:
             raise NotImplementedError("This scale size:[" + str(scale) +
             raise NotImplementedError("This scale size:[" + str(scale) +
                                       "] is not implemented!")
                                       "] is not implemented!")
-
         self._out_channels = []
         self._out_channels = []
         self._feature_idx = 0
         self._feature_idx = 0
         # 1. conv1
         # 1. conv1

+ 22 - 71
paddlers/models/ppdet/modeling/backbones/swin_transformer.py

@@ -20,62 +20,13 @@ MIT License [see LICENSE for details]
 import paddle
 import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
-from paddle.nn.initializer import TruncatedNormal, Constant, Assign
 from paddlers.models.ppdet.modeling.shape_spec import ShapeSpec
 from paddlers.models.ppdet.modeling.shape_spec import ShapeSpec
 from paddlers.models.ppdet.core.workspace import register, serializable
 from paddlers.models.ppdet.core.workspace import register, serializable
 import numpy as np
 import numpy as np
 
 
-# Common initializations
-ones_ = Constant(value=1.)
-zeros_ = Constant(value=0.)
-trunc_normal_ = TruncatedNormal(std=.02)
-
-
-# Common Functions
-def to_2tuple(x):
-    return tuple([x] * 2)
-
-
-def add_parameter(layer, datas, name=None):
-    parameter = layer.create_parameter(
-        shape=(datas.shape), default_initializer=Assign(datas))
-    if name:
-        layer.add_parameter(name, parameter)
-    return parameter
-
-
-# Common Layers
-def drop_path(x, drop_prob=0., training=False):
-    """
-        Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-        the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-        See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
-    """
-    if drop_prob == 0. or not training:
-        return x
-    keep_prob = paddle.to_tensor(1 - drop_prob)
-    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
-    random_tensor = paddle.floor(random_tensor)  # binarize
-    output = x.divide(keep_prob) * random_tensor
-    return output
-
-
-class DropPath(nn.Layer):
-    def __init__(self, drop_prob=None):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training)
-
-
-class Identity(nn.Layer):
-    def __init__(self):
-        super(Identity, self).__init__()
-
-    def forward(self, input):
-        return input
+from .transformer_utils import DropPath, Identity
+from .transformer_utils import add_parameter, to_2tuple
+from .transformer_utils import ones_, zeros_, trunc_normal_
 
 
 
 
 class Mlp(nn.Layer):
 class Mlp(nn.Layer):
@@ -112,7 +63,7 @@ def window_partition(x, window_size):
     """
     """
     B, H, W, C = x.shape
     B, H, W, C = x.shape
     x = x.reshape(
     x = x.reshape(
-        [B, H // window_size, window_size, W // window_size, window_size, C])
+        [-1, H // window_size, window_size, W // window_size, window_size, C])
     windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape(
     windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape(
         [-1, window_size, window_size, C])
         [-1, window_size, window_size, C])
     return windows
     return windows
@@ -128,10 +79,11 @@ def window_reverse(windows, window_size, H, W):
     Returns:
     Returns:
         x: (B, H, W, C)
         x: (B, H, W, C)
     """
     """
+    _, _, _, C = windows.shape
     B = int(windows.shape[0] / (H * W / window_size / window_size))
     B = int(windows.shape[0] / (H * W / window_size / window_size))
     x = windows.reshape(
     x = windows.reshape(
-        [B, H // window_size, W // window_size, window_size, window_size, -1])
-    x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1])
+        [-1, H // window_size, W // window_size, window_size, window_size, C])
+    x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H, W, C])
     return x
     return x
 
 
 
 
@@ -206,14 +158,14 @@ class WindowAttention(nn.Layer):
         """
         """
         B_, N, C = x.shape
         B_, N, C = x.shape
         qkv = self.qkv(x).reshape(
         qkv = self.qkv(x).reshape(
-            [B_, N, 3, self.num_heads, C // self.num_heads]).transpose(
+            [-1, N, 3, self.num_heads, C // self.num_heads]).transpose(
                 [2, 0, 3, 1, 4])
                 [2, 0, 3, 1, 4])
         q, k, v = qkv[0], qkv[1], qkv[2]
         q, k, v = qkv[0], qkv[1], qkv[2]
 
 
         q = q * self.scale
         q = q * self.scale
         attn = paddle.mm(q, k.transpose([0, 1, 3, 2]))
         attn = paddle.mm(q, k.transpose([0, 1, 3, 2]))
 
 
-        index = self.relative_position_index.reshape([-1])
+        index = self.relative_position_index.flatten()
 
 
         relative_position_bias = paddle.index_select(
         relative_position_bias = paddle.index_select(
             self.relative_position_bias_table, index)
             self.relative_position_bias_table, index)
@@ -227,7 +179,7 @@ class WindowAttention(nn.Layer):
 
 
         if mask is not None:
         if mask is not None:
             nW = mask.shape[0]
             nW = mask.shape[0]
-            attn = attn.reshape([B_ // nW, nW, self.num_heads, N, N
+            attn = attn.reshape([-1, nW, self.num_heads, N, N
                                  ]) + mask.unsqueeze(1).unsqueeze(0)
                                  ]) + mask.unsqueeze(1).unsqueeze(0)
             attn = attn.reshape([-1, self.num_heads, N, N])
             attn = attn.reshape([-1, self.num_heads, N, N])
             attn = self.softmax(attn)
             attn = self.softmax(attn)
@@ -237,7 +189,7 @@ class WindowAttention(nn.Layer):
         attn = self.attn_drop(attn)
         attn = self.attn_drop(attn)
 
 
         # x = (attn @ v).transpose(1, 2).reshape([B_, N, C])
         # x = (attn @ v).transpose(1, 2).reshape([B_, N, C])
-        x = paddle.mm(attn, v).transpose([0, 2, 1, 3]).reshape([B_, N, C])
+        x = paddle.mm(attn, v).transpose([0, 2, 1, 3]).reshape([-1, N, C])
         x = self.proj(x)
         x = self.proj(x)
         x = self.proj_drop(x)
         x = self.proj_drop(x)
         return x
         return x
@@ -315,7 +267,7 @@ class SwinTransformerBlock(nn.Layer):
 
 
         shortcut = x
         shortcut = x
         x = self.norm1(x)
         x = self.norm1(x)
-        x = x.reshape([B, H, W, C])
+        x = x.reshape([-1, H, W, C])
 
 
         # pad feature maps to multiples of window size
         # pad feature maps to multiples of window size
         pad_l = pad_t = 0
         pad_l = pad_t = 0
@@ -337,7 +289,7 @@ class SwinTransformerBlock(nn.Layer):
         x_windows = window_partition(
         x_windows = window_partition(
             shifted_x, self.window_size)  # nW*B, window_size, window_size, C
             shifted_x, self.window_size)  # nW*B, window_size, window_size, C
         x_windows = x_windows.reshape(
         x_windows = x_windows.reshape(
-            [-1, self.window_size * self.window_size,
+            [x_windows.shape[0], self.window_size * self.window_size,
              C])  # nW*B, window_size*window_size, C
              C])  # nW*B, window_size*window_size, C
 
 
         # W-MSA/SW-MSA
         # W-MSA/SW-MSA
@@ -346,7 +298,7 @@ class SwinTransformerBlock(nn.Layer):
 
 
         # merge windows
         # merge windows
         attn_windows = attn_windows.reshape(
         attn_windows = attn_windows.reshape(
-            [-1, self.window_size, self.window_size, C])
+            [x_windows.shape[0], self.window_size, self.window_size, C])
         shifted_x = window_reverse(attn_windows, self.window_size, Hp,
         shifted_x = window_reverse(attn_windows, self.window_size, Hp,
                                    Wp)  # B H' W' C
                                    Wp)  # B H' W' C
 
 
@@ -362,7 +314,7 @@ class SwinTransformerBlock(nn.Layer):
         if pad_r > 0 or pad_b > 0:
         if pad_r > 0 or pad_b > 0:
             x = x[:, :H, :W, :]
             x = x[:, :H, :W, :]
 
 
-        x = x.reshape([B, H * W, C])
+        x = x.reshape([-1, H * W, C])
 
 
         # FFN
         # FFN
         x = shortcut + self.drop_path(x)
         x = shortcut + self.drop_path(x)
@@ -393,7 +345,7 @@ class PatchMerging(nn.Layer):
         B, L, C = x.shape
         B, L, C = x.shape
         assert L == H * W, "input feature has wrong size"
         assert L == H * W, "input feature has wrong size"
 
 
-        x = x.reshape([B, H, W, C])
+        x = x.reshape([-1, H, W, C])
 
 
         # padding
         # padding
         pad_input = (H % 2 == 1) or (W % 2 == 1)
         pad_input = (H % 2 == 1) or (W % 2 == 1)
@@ -405,7 +357,7 @@ class PatchMerging(nn.Layer):
         x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
         x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
         x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
         x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
         x = paddle.concat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
         x = paddle.concat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
-        x = x.reshape([B, H * W // 4, 4 * C])  # B H/2*W/2 4*C
+        x = x.reshape([-1, H * W // 4, 4 * C])  # B H/2*W/2 4*C
 
 
         x = self.norm(x)
         x = self.norm(x)
         x = self.reduction(x)
         x = self.reduction(x)
@@ -482,8 +434,7 @@ class BasicLayer(nn.Layer):
         # calculate attention mask for SW-MSA
         # calculate attention mask for SW-MSA
         Hp = int(np.ceil(H / self.window_size)) * self.window_size
         Hp = int(np.ceil(H / self.window_size)) * self.window_size
         Wp = int(np.ceil(W / self.window_size)) * self.window_size
         Wp = int(np.ceil(W / self.window_size)) * self.window_size
-        img_mask = paddle.fluid.layers.zeros(
-            [1, Hp, Wp, 1], dtype='float32')  # 1 Hp Wp 1
+        img_mask = paddle.zeros([1, Hp, Wp, 1], dtype='float32')  # 1 Hp Wp 1
         h_slices = (slice(0, -self.window_size),
         h_slices = (slice(0, -self.window_size),
                     slice(-self.window_size, -self.shift_size),
                     slice(-self.window_size, -self.shift_size),
                     slice(-self.shift_size, None))
                     slice(-self.shift_size, None))
@@ -688,10 +639,10 @@ class SwinTransformer(nn.Layer):
         if self.frozen_stages >= 0:
         if self.frozen_stages >= 0:
             self.patch_embed.eval()
             self.patch_embed.eval()
             for param in self.patch_embed.parameters():
             for param in self.patch_embed.parameters():
-                param.requires_grad = False
+                param.stop_gradient = True
 
 
         if self.frozen_stages >= 1 and self.ape:
         if self.frozen_stages >= 1 and self.ape:
-            self.absolute_pos_embed.requires_grad = False
+            self.absolute_pos_embed.stop_gradient = True
 
 
         if self.frozen_stages >= 2:
         if self.frozen_stages >= 2:
             self.pos_drop.eval()
             self.pos_drop.eval()
@@ -699,7 +650,7 @@ class SwinTransformer(nn.Layer):
                 m = self.layers[i]
                 m = self.layers[i]
                 m.eval()
                 m.eval()
                 for param in m.parameters():
                 for param in m.parameters():
-                    param.requires_grad = False
+                    param.stop_gradient = True
 
 
     def _init_weights(self, m):
     def _init_weights(self, m):
         if isinstance(m, nn.Linear):
         if isinstance(m, nn.Linear):
@@ -713,7 +664,7 @@ class SwinTransformer(nn.Layer):
     def forward(self, x):
     def forward(self, x):
         """Forward function."""
         """Forward function."""
         x = self.patch_embed(x['image'])
         x = self.patch_embed(x['image'])
-        _, _, Wh, Ww = x.shape
+        B, _, Wh, Ww = x.shape
         if self.ape:
         if self.ape:
             # interpolate the position embedding to the corresponding size
             # interpolate the position embedding to the corresponding size
             absolute_pos_embed = F.interpolate(
             absolute_pos_embed = F.interpolate(

+ 74 - 0
paddlers/models/ppdet/modeling/backbones/transformer_utils.py

@@ -0,0 +1,74 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+
+from paddle.nn.initializer import TruncatedNormal, Constant, Assign
+
+# Common initializations
+ones_ = Constant(value=1.)
+zeros_ = Constant(value=0.)
+trunc_normal_ = TruncatedNormal(std=.02)
+
+
+# Common Layers
+def drop_path(x, drop_prob=0., training=False):
+    """
+        Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+        the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+        See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = paddle.to_tensor(1 - drop_prob)
+    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
+    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+    random_tensor = paddle.floor(random_tensor)  # binarize
+    output = x.divide(keep_prob) * random_tensor
+    return output
+
+
+class DropPath(nn.Layer):
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+
+class Identity(nn.Layer):
+    def __init__(self):
+        super(Identity, self).__init__()
+
+    def forward(self, input):
+        return input
+
+
+# common funcs
+
+
+def to_2tuple(x):
+    if isinstance(x, (list, tuple)):
+        return x
+    return tuple([x] * 2)
+
+
+def add_parameter(layer, datas, name=None):
+    parameter = layer.create_parameter(
+        shape=(datas.shape), default_initializer=Assign(datas))
+    if name:
+        layer.add_parameter(name, parameter)
+    return parameter

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio