пре 3 година · a4957b21be
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -26,16 +26,16 @@ jobs:
 
				         include:
			
 
				           - python-version: "3.7"
			
 
				             os: windows-latest
			
 
				-            gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/cp37/GDAL-3.3.3-cp37-cp37m-win_amd64.whl
			
 
				+            gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.3.3-cp37-cp37m-win_amd64.whl
			
 
				           - python-version: "3.7"
			
 
				             os: ubuntu-latest
			
 
				-            gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
			
 
				+            gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
			
 
				           - python-version: "3.8"
			
 
				             os: windows-latest
			
 
				-            gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/GDAL-3.3.3-cp38-cp38-win_amd64.whl
			
 
				+            gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.3.3-cp38-cp38-win_amd64.whl
			
 
				           - python-version: "3.8"
			
 
				             os: ubuntu-latest
			
 
				-            gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl
			
 
				+            gdal-whl-url: https://paddlers.bj.bcebos.com/dependencies/gdal/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl
			
 
				       fail-fast: false
			
 
				     steps:
			
 
				       - uses: actions/checkout@v3
			
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ PaddleRS具有以下五大特色：
 
				 * 如果您发现任何PaddleRS存在的问题或是对PaddleRS有建议, 欢迎通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleRS/issues)向我们提出。
			
 
				 * 欢迎加入PaddleRS微信群：
			
 
				 <div align="center">
			
 
				-<img src="https://user-images.githubusercontent.com/21275753/199192024-79373ad7-917f-4a7c-9de2-010a4d0c0152.png"  width = "150" />  
			
 
				+<img src="https://user-images.githubusercontent.com/21275753/200470530-a3321f5b-fa8e-4330-84fa-b76cb3df873a.png"  width = "150" />  
			
 
				 </div>
			
 
				 
			
 
				 ## <img src="./docs/images/model.png" width="30"/> 产品矩阵
			
--- a/docs/images/whole_picture.png
+++ b/docs/images/whole_picture.png
--- a/paddlers/models/hash.txt
+++ b/paddlers/models/hash.txt
@@ -1 +1,2 @@
 
				+ppdet ba2aad26e6bc1e5c2dad76ca96692a0d63eccfac
			
 
				 ppseg f6c73b478cdf00f40ae69edd35bf6bce2a1687ef
			
--- a/paddlers/models/ppdet/core/workspace.py
+++ b/paddlers/models/ppdet/core/workspace.py
@@ -210,9 +210,17 @@ def create(cls_or_name, **kwargs):
 
				     assert type(cls_or_name) in [type, str
			
 
				                                  ], "should be a class or name of a class"
			
 
				     name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
			
 
				-    assert name in global_config and \
			
 
				-        isinstance(global_config[name], SchemaDict), \
			
 
				-        "the module {} is not registered".format(name)
			
 
				+    if name in global_config:
			
 
				+        if isinstance(global_config[name], SchemaDict):
			
 
				+            pass
			
 
				+        elif hasattr(global_config[name], "__dict__"):
			
 
				+            # support instance return directly
			
 
				+            return global_config[name]
			
 
				+        else:
			
 
				+            raise ValueError("The module {} is not registered".format(name))
			
 
				+    else:
			
 
				+        raise ValueError("The module {} is not registered".format(name))
			
 
				+
			
 
				     config = global_config[name]
			
 
				     cls = getattr(config.pymodule, name)
			
 
				     cls_kwargs = {}
			
--- a/paddlers/models/ppdet/data/__init__.py
+++ b/paddlers/models/ppdet/data/__init__.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from . import source
			
--- a/paddlers/models/ppdet/data/crop_utils/__init__.py
+++ b/paddlers/models/ppdet/data/crop_utils/__init__.py
@@ -10,4 +10,4 @@
 
				 # distributed under the License is distributed on an "AS IS" BASIS,
			
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				 # See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				+# limitations under the License.
			
--- a/paddlers/models/ppdet/data/reader.py
+++ b/paddlers/models/ppdet/data/reader.py
@@ -23,7 +23,7 @@ else:
 
				 import numpy as np
			
 
				 
			
 
				 from paddle.io import DataLoader, DistributedBatchSampler
			
 
				-from paddle.fluid.dataloader.collate import default_collate_fn
			
 
				+from .utils import default_collate_fn
			
 
				 
			
 
				 from paddlers.models.ppdet.core.workspace import register
			
 
				 from . import transform
			
@@ -118,7 +118,7 @@ class BaseDataLoader(object):
 
				         collate_batch (bool): whether to collate batch in dataloader.
			
 
				             If set to True, the samples will collate into batch according
			
 
				             to the batch size. Otherwise, the ground-truth will not collate,
			
 
				-            which is used when the number of ground-truch is different in
			
 
				+            which is used when the number of ground-truch is different in 
			
 
				             samples.
			
 
				         use_shared_memory (bool): whether to use shared memory to
			
 
				                 accelerate data loading, enable this only if you
			
@@ -144,7 +144,7 @@ class BaseDataLoader(object):
 
				         self._sample_transforms = Compose(
			
 
				             sample_transforms, num_classes=num_classes)
			
 
				 
			
 
				-        # batch transfrom
			
 
				+        # batch transfrom 
			
 
				         self._batch_transforms = BatchCompose(batch_transforms, num_classes,
			
 
				                                               collate_batch)
			
 
				         self.batch_size = batch_size
			
--- a/paddlers/models/ppdet/data/shm_utils.py
+++ b/paddlers/models/ppdet/data/shm_utils.py
@@ -34,7 +34,10 @@ SHM_DEFAULT_MOUNT = '/dev/shm'
 
				 
			
 
				 
			
 
				 def _parse_size_in_M(size_str):
			
 
				-    num, unit = size_str[:-1], size_str[-1]
			
 
				+    if size_str[-1] == 'B':
			
 
				+        num, unit = size_str[:-2], size_str[-2]
			
 
				+    else:
			
 
				+        num, unit = size_str[:-1], size_str[-1]
			
 
				     assert unit in SIZE_UNIT, \
			
 
				             "unknown shm size unit {}".format(unit)
			
 
				     return float(num) * \
			
--- a/paddlers/models/ppdet/data/source/__init__.py
+++ b/paddlers/models/ppdet/data/source/__init__.py
@@ -27,3 +27,4 @@ from .category import *
 
				 from .keypoint_coco import *
			
 
				 from .mot import *
			
 
				 from .sniper_coco import SniperCOCODataSet
			
 
				+from .dataset import ImageFolder
			
--- a/paddlers/models/ppdet/data/source/category.py
+++ b/paddlers/models/ppdet/data/source/category.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -39,24 +39,49 @@ def get_categories(metric_type, anno_file=None, arch=None):
 
				     if arch == 'keypoint_arch':
			
 
				         return (None, {'id': 'keypoint'})
			
 
				 
			
 
				+    if anno_file == None or (not os.path.isfile(anno_file)):
			
 
				+        logger.warning(
			
 
				+            "anno_file '{}' is None or not set or not exist, "
			
 
				+            "please recheck TrainDataset/EvalDataset/TestDataset.anno_path, "
			
 
				+            "otherwise the default categories will be used by metric_type.".
			
 
				+            format(anno_file))
			
 
				+
			
 
				     if metric_type.lower() == 'coco' or metric_type.lower(
			
 
				     ) == 'rbox' or metric_type.lower() == 'snipercoco':
			
 
				         if anno_file and os.path.isfile(anno_file):
			
 
				-            # lazy import pycocotools here
			
 
				-            from pycocotools.coco import COCO
			
 
				-
			
 
				-            coco = COCO(anno_file)
			
 
				-            cats = coco.loadCats(coco.getCatIds())
			
 
				-
			
 
				-            clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
			
 
				-            catid2name = {cat['id']: cat['name'] for cat in cats}
			
 
				+            if anno_file.endswith('json'):
			
 
				+                # lazy import pycocotools here
			
 
				+                from pycocotools.coco import COCO
			
 
				+                coco = COCO(anno_file)
			
 
				+                cats = coco.loadCats(coco.getCatIds())
			
 
				+
			
 
				+                clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
			
 
				+                catid2name = {cat['id']: cat['name'] for cat in cats}
			
 
				+
			
 
				+            elif anno_file.endswith('txt'):
			
 
				+                cats = []
			
 
				+                with open(anno_file) as f:
			
 
				+                    for line in f.readlines():
			
 
				+                        cats.append(line.strip())
			
 
				+                if cats[0] == 'background': cats = cats[1:]
			
 
				+
			
 
				+                clsid2catid = {i: i for i in range(len(cats))}
			
 
				+                catid2name = {i: name for i, name in enumerate(cats)}
			
 
				+
			
 
				+            else:
			
 
				+                raise ValueError("anno_file {} should be json or txt.".format(
			
 
				+                    anno_file))
			
 
				             return clsid2catid, catid2name
			
 
				 
			
 
				         # anno file not exist, load default categories of COCO17
			
 
				         else:
			
 
				             if metric_type.lower() == 'rbox':
			
 
				+                logger.warning(
			
 
				+                    "metric_type: {}, load default categories of DOTA.".format(
			
 
				+                        metric_type))
			
 
				                 return _dota_category()
			
 
				-
			
 
				+            logger.warning("metric_type: {}, load default categories of COCO.".
			
 
				+                           format(metric_type))
			
 
				             return _coco17_category()
			
 
				 
			
 
				     elif metric_type.lower() == 'voc':
			
@@ -77,6 +102,8 @@ def get_categories(metric_type, anno_file=None, arch=None):
 
				         # anno file not exist, load default categories of
			
 
				         # VOC all 20 categories
			
 
				         else:
			
 
				+            logger.warning("metric_type: {}, load default categories of VOC.".
			
 
				+                           format(metric_type))
			
 
				             return _vocall_category()
			
 
				 
			
 
				     elif metric_type.lower() == 'oid':
			
@@ -104,6 +131,9 @@ def get_categories(metric_type, anno_file=None, arch=None):
 
				             return clsid2catid, catid2name
			
 
				         # anno file not exist, load default category 'pedestrian'.
			
 
				         else:
			
 
				+            logger.warning(
			
 
				+                "metric_type: {}, load default categories of pedestrian MOT.".
			
 
				+                format(metric_type))
			
 
				             return _mot_category(category='pedestrian')
			
 
				 
			
 
				     elif metric_type.lower() in ['kitti', 'bdd100kmot']:
			
@@ -122,6 +152,9 @@ def get_categories(metric_type, anno_file=None, arch=None):
 
				             return clsid2catid, catid2name
			
 
				         # anno file not exist, load default categories of visdrone all 10 categories
			
 
				         else:
			
 
				+            logger.warning(
			
 
				+                "metric_type: {}, load default categories of VisDrone.".format(
			
 
				+                    metric_type))
			
 
				             return _visdrone_category()
			
 
				 
			
 
				     else:
			
--- a/paddlers/models/ppdet/data/source/coco.py
+++ b/paddlers/models/ppdet/data/source/coco.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 import os
			
@@ -33,12 +33,13 @@ class COCODataSet(DetDataset):
 
				         anno_path (str): coco annotation file path.
			
 
				         data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				         sample_num (int): number of samples to load, -1 means all.
			
 
				-        load_crowd (bool): whether to load crowded ground-truth.
			
 
				+        load_crowd (bool): whether to load crowded ground-truth. 
			
 
				             False as default
			
 
				         allow_empty (bool): whether to load empty entry. False as default
			
 
				-        empty_ratio (float): the ratio of empty record number to total
			
 
				-            record's, if empty_ratio is out of [0. ,1.), do not sample the
			
 
				+        empty_ratio (float): the ratio of empty record number to total 
			
 
				+            record's, if empty_ratio is out of [0. ,1.), do not sample the 
			
 
				             records and use all the empty entries. 1. as default
			
 
				+        repeat (int): repeat times for dataset, use in benchmark.
			
 
				     """
			
 
				 
			
 
				     def __init__(self,
			
@@ -49,9 +50,15 @@ class COCODataSet(DetDataset):
 
				                  sample_num=-1,
			
 
				                  load_crowd=False,
			
 
				                  allow_empty=False,
			
 
				-                 empty_ratio=1.):
			
 
				-        super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path,
			
 
				-                                          data_fields, sample_num)
			
 
				+                 empty_ratio=1.,
			
 
				+                 repeat=1):
			
 
				+        super(COCODataSet, self).__init__(
			
 
				+            dataset_dir,
			
 
				+            image_dir,
			
 
				+            anno_path,
			
 
				+            data_fields,
			
 
				+            sample_num,
			
 
				+            repeat=repeat)
			
 
				         self.load_image_only = False
			
 
				         self.load_semantic = False
			
 
				         self.load_crowd = load_crowd
			
@@ -138,25 +145,14 @@ class COCODataSet(DetDataset):
 
				                         if not any(np.array(inst['bbox'])):
			
 
				                             continue
			
 
				 
			
 
				-                    # read rbox anno or not
			
 
				-                    is_rbox_anno = True if len(inst['bbox']) == 5 else False
			
 
				-                    if is_rbox_anno:
			
 
				-                        xc, yc, box_w, box_h, angle = inst['bbox']
			
 
				-                        x1 = xc - box_w / 2.0
			
 
				-                        y1 = yc - box_h / 2.0
			
 
				-                        x2 = x1 + box_w
			
 
				-                        y2 = y1 + box_h
			
 
				-                    else:
			
 
				-                        x1, y1, box_w, box_h = inst['bbox']
			
 
				-                        x2 = x1 + box_w
			
 
				-                        y2 = y1 + box_h
			
 
				+                    x1, y1, box_w, box_h = inst['bbox']
			
 
				+                    x2 = x1 + box_w
			
 
				+                    y2 = y1 + box_h
			
 
				                     eps = 1e-5
			
 
				                     if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
			
 
				                         inst['clean_bbox'] = [
			
 
				                             round(float(x), 3) for x in [x1, y1, x2, y2]
			
 
				                         ]
			
 
				-                        if is_rbox_anno:
			
 
				-                            inst['clean_rbox'] = [xc, yc, box_w, box_h, angle]
			
 
				                         bboxes.append(inst)
			
 
				                     else:
			
 
				                         logger.warning(
			
@@ -171,9 +167,6 @@ class COCODataSet(DetDataset):
 
				                     is_empty = True
			
 
				 
			
 
				                 gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
			
 
				-                if is_rbox_anno:
			
 
				-                    gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
			
 
				-                gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				                 gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				                 is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				                 gt_poly = [None] * num_bbox
			
@@ -183,13 +176,10 @@ class COCODataSet(DetDataset):
 
				                     catid = box['category_id']
			
 
				                     gt_class[i][0] = self.catid2clsid[catid]
			
 
				                     gt_bbox[i, :] = box['clean_bbox']
			
 
				-                    # xc, yc, w, h, theta
			
 
				-                    if is_rbox_anno:
			
 
				-                        gt_rbox[i, :] = box['clean_rbox']
			
 
				                     is_crowd[i][0] = box['iscrowd']
			
 
				-                    # check RLE format
			
 
				+                    # check RLE format 
			
 
				                     if 'segmentation' in box and box['iscrowd'] == 1:
			
 
				-                        gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
			
 
				+                        gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
			
 
				                     elif 'segmentation' in box and box['segmentation']:
			
 
				                         if not np.array(box['segmentation']
			
 
				                                         ).size > 0 and not self.allow_empty:
			
@@ -206,21 +196,12 @@ class COCODataSet(DetDataset):
 
				                         gt_poly) and not self.allow_empty:
			
 
				                     continue
			
 
				 
			
 
				-                if is_rbox_anno:
			
 
				-                    gt_rec = {
			
 
				-                        'is_crowd': is_crowd,
			
 
				-                        'gt_class': gt_class,
			
 
				-                        'gt_bbox': gt_bbox,
			
 
				-                        'gt_rbox': gt_rbox,
			
 
				-                        'gt_poly': gt_poly,
			
 
				-                    }
			
 
				-                else:
			
 
				-                    gt_rec = {
			
 
				-                        'is_crowd': is_crowd,
			
 
				-                        'gt_class': gt_class,
			
 
				-                        'gt_bbox': gt_bbox,
			
 
				-                        'gt_poly': gt_poly,
			
 
				-                    }
			
 
				+                gt_rec = {
			
 
				+                    'is_crowd': is_crowd,
			
 
				+                    'gt_class': gt_class,
			
 
				+                    'gt_bbox': gt_bbox,
			
 
				+                    'gt_poly': gt_poly,
			
 
				+                }
			
 
				 
			
 
				                 for k, v in gt_rec.items():
			
 
				                     if k in self.data_fields:
			
@@ -247,3 +228,126 @@ class COCODataSet(DetDataset):
 
				             empty_records = self._sample_empty(empty_records, len(records))
			
 
				             records += empty_records
			
 
				         self.roidbs = records
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class SlicedCOCODataSet(COCODataSet):
			
 
				+    """Sliced COCODataSet"""
			
 
				+
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            dataset_dir=None,
			
 
				+            image_dir=None,
			
 
				+            anno_path=None,
			
 
				+            data_fields=['image'],
			
 
				+            sample_num=-1,
			
 
				+            load_crowd=False,
			
 
				+            allow_empty=False,
			
 
				+            empty_ratio=1.,
			
 
				+            repeat=1,
			
 
				+            sliced_size=[640, 640],
			
 
				+            overlap_ratio=[0.25, 0.25], ):
			
 
				+        super(SlicedCOCODataSet, self).__init__(
			
 
				+            dataset_dir=dataset_dir,
			
 
				+            image_dir=image_dir,
			
 
				+            anno_path=anno_path,
			
 
				+            data_fields=data_fields,
			
 
				+            sample_num=sample_num,
			
 
				+            load_crowd=load_crowd,
			
 
				+            allow_empty=allow_empty,
			
 
				+            empty_ratio=empty_ratio,
			
 
				+            repeat=repeat, )
			
 
				+        self.sliced_size = sliced_size
			
 
				+        self.overlap_ratio = overlap_ratio
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
			
 
				+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
			
 
				+
			
 
				+        assert anno_path.endswith('.json'), \
			
 
				+            'invalid coco annotation file: ' + anno_path
			
 
				+        from pycocotools.coco import COCO
			
 
				+        coco = COCO(anno_path)
			
 
				+        img_ids = coco.getImgIds()
			
 
				+        img_ids.sort()
			
 
				+        cat_ids = coco.getCatIds()
			
 
				+        records = []
			
 
				+        empty_records = []
			
 
				+        ct = 0
			
 
				+        ct_sub = 0
			
 
				+
			
 
				+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
			
 
				+        self.cname2cid = dict({
			
 
				+            coco.loadCats(catid)[0]['name']: clsid
			
 
				+            for catid, clsid in self.catid2clsid.items()
			
 
				+        })
			
 
				+
			
 
				+        if 'annotations' not in coco.dataset:
			
 
				+            self.load_image_only = True
			
 
				+            logger.warning('Annotation file: {} does not contains ground truth '
			
 
				+                           'and load image information only.'.format(anno_path))
			
 
				+        try:
			
 
				+            import sahi
			
 
				+            from sahi.slicing import slice_image
			
 
				+        except Exception as e:
			
 
				+            logger.error(
			
 
				+                'sahi not found, plaese install sahi. '
			
 
				+                'for example: `pip install sahi`, see https://github.com/obss/sahi.'
			
 
				+            )
			
 
				+            raise e
			
 
				+
			
 
				+        sub_img_ids = 0
			
 
				+        for img_id in img_ids:
			
 
				+            img_anno = coco.loadImgs([img_id])[0]
			
 
				+            im_fname = img_anno['file_name']
			
 
				+            im_w = float(img_anno['width'])
			
 
				+            im_h = float(img_anno['height'])
			
 
				+
			
 
				+            im_path = os.path.join(image_dir,
			
 
				+                                   im_fname) if image_dir else im_fname
			
 
				+            is_empty = False
			
 
				+            if not os.path.exists(im_path):
			
 
				+                logger.warning('Illegal image file: {}, and it will be '
			
 
				+                               'ignored'.format(im_path))
			
 
				+                continue
			
 
				+
			
 
				+            if im_w < 0 or im_h < 0:
			
 
				+                logger.warning('Illegal width: {} or height: {} in annotation, '
			
 
				+                               'and im_id: {} will be ignored'.format(
			
 
				+                                   im_w, im_h, img_id))
			
 
				+                continue
			
 
				+
			
 
				+            slice_image_result = sahi.slicing.slice_image(
			
 
				+                image=im_path,
			
 
				+                slice_height=self.sliced_size[0],
			
 
				+                slice_width=self.sliced_size[1],
			
 
				+                overlap_height_ratio=self.overlap_ratio[0],
			
 
				+                overlap_width_ratio=self.overlap_ratio[1])
			
 
				+
			
 
				+            sub_img_num = len(slice_image_result)
			
 
				+            for _ind in range(sub_img_num):
			
 
				+                im = slice_image_result.images[_ind]
			
 
				+                coco_rec = {
			
 
				+                    'image': im,
			
 
				+                    'im_id': np.array([sub_img_ids + _ind]),
			
 
				+                    'h': im.shape[0],
			
 
				+                    'w': im.shape[1],
			
 
				+                    'ori_im_id': np.array([img_id]),
			
 
				+                    'st_pix': np.array(
			
 
				+                        slice_image_result.starting_pixels[_ind],
			
 
				+                        dtype=np.float32),
			
 
				+                    'is_last': 1 if _ind == sub_img_num - 1 else 0,
			
 
				+                } if 'image' in self.data_fields else {}
			
 
				+                records.append(coco_rec)
			
 
				+            ct_sub += sub_img_num
			
 
				+            ct += 1
			
 
				+            if self.sample_num > 0 and ct >= self.sample_num:
			
 
				+                break
			
 
				+        assert ct > 0, 'not found any coco record in %s' % (anno_path)
			
 
				+        logger.info('{} samples and slice to {} sub_samples in file {}'.format(
			
 
				+            ct, ct_sub, anno_path))
			
 
				+        if self.allow_empty and len(empty_records) > 0:
			
 
				+            empty_records = self._sample_empty(empty_records, len(records))
			
 
				+            records += empty_records
			
 
				+        self.roidbs = records
			
--- a/paddlers/models/ppdet/data/source/dataset.py
+++ b/paddlers/models/ppdet/data/source/dataset.py
@@ -1,20 +1,20 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+# 
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 import os
			
 
				+import copy
			
 
				 import numpy as np
			
 
				-
			
 
				 try:
			
 
				     from collections.abc import Sequence
			
 
				 except Exception:
			
@@ -22,7 +22,10 @@ except Exception:
 
				 from paddle.io import Dataset
			
 
				 from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				 from paddlers.models.ppdet.utils.download import get_dataset_path
			
 
				-import copy
			
 
				+from paddlers.models.ppdet.data import source
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				 
			
 
				 
			
 
				 @serializable
			
@@ -37,6 +40,7 @@ class DetDataset(Dataset):
 
				         data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				         sample_num (int): number of samples to load, -1 means all.
			
 
				         use_default_label (bool): whether to load default label list.
			
 
				+        repeat (int): repeat times for dataset, use in benchmark.
			
 
				     """
			
 
				 
			
 
				     def __init__(self,
			
@@ -46,6 +50,7 @@ class DetDataset(Dataset):
 
				                  data_fields=['image'],
			
 
				                  sample_num=-1,
			
 
				                  use_default_label=None,
			
 
				+                 repeat=1,
			
 
				                  **kwargs):
			
 
				         super(DetDataset, self).__init__()
			
 
				         self.dataset_dir = dataset_dir if dataset_dir is not None else ''
			
@@ -54,28 +59,32 @@ class DetDataset(Dataset):
 
				         self.data_fields = data_fields
			
 
				         self.sample_num = sample_num
			
 
				         self.use_default_label = use_default_label
			
 
				+        self.repeat = repeat
			
 
				         self._epoch = 0
			
 
				         self._curr_iter = 0
			
 
				 
			
 
				     def __len__(self, ):
			
 
				-        return len(self.roidbs)
			
 
				+        return len(self.roidbs) * self.repeat
			
 
				+
			
 
				+    def __call__(self, *args, **kwargs):
			
 
				+        return self
			
 
				 
			
 
				     def __getitem__(self, idx):
			
 
				+        n = len(self.roidbs)
			
 
				+        if self.repeat > 1:
			
 
				+            idx %= n
			
 
				         # data batch
			
 
				         roidb = copy.deepcopy(self.roidbs[idx])
			
 
				         if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
			
 
				-            n = len(self.roidbs)
			
 
				             idx = np.random.randint(n)
			
 
				             roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
			
 
				         elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
			
 
				-            n = len(self.roidbs)
			
 
				             idx = np.random.randint(n)
			
 
				             roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
			
 
				         elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
			
 
				-            n = len(self.roidbs)
			
 
				             roidb = [roidb, ] + [
			
 
				                 copy.deepcopy(self.roidbs[np.random.randint(n)])
			
 
				-                for _ in range(3)
			
 
				+                for _ in range(4)
			
 
				             ]
			
 
				         if isinstance(roidb, Sequence):
			
 
				             for r in roidb:
			
@@ -149,12 +158,15 @@ class ImageFolder(DetDataset):
 
				         self.sample_num = sample_num
			
 
				 
			
 
				     def check_or_download_dataset(self):
			
 
				+        return
			
 
				+
			
 
				+    def get_anno(self):
			
 
				+        if self.anno_path is None:
			
 
				+            return
			
 
				         if self.dataset_dir:
			
 
				-            # NOTE: ImageFolder is only used for prediction, in
			
 
				-            #       infer mode, image_dir is set by set_images
			
 
				-            #       so we only check anno_path here
			
 
				-            self.dataset_dir = get_dataset_path(self.dataset_dir,
			
 
				-                                                self.anno_path, None)
			
 
				+            return os.path.join(self.dataset_dir, self.anno_path)
			
 
				+        else:
			
 
				+            return self.anno_path
			
 
				 
			
 
				     def parse_dataset(self, ):
			
 
				         if not self.roidbs:
			
@@ -195,3 +207,93 @@ class ImageFolder(DetDataset):
 
				     def set_images(self, images):
			
 
				         self.image_dir = images
			
 
				         self.roidbs = self._load_images()
			
 
				+
			
 
				+    def set_slice_images(self,
			
 
				+                         images,
			
 
				+                         slice_size=[640, 640],
			
 
				+                         overlap_ratio=[0.25, 0.25]):
			
 
				+        self.image_dir = images
			
 
				+        ori_records = self._load_images()
			
 
				+        try:
			
 
				+            import sahi
			
 
				+            from sahi.slicing import slice_image
			
 
				+        except Exception as e:
			
 
				+            logger.error(
			
 
				+                'sahi not found, plaese install sahi. '
			
 
				+                'for example: `pip install sahi`, see https://github.com/obss/sahi.'
			
 
				+            )
			
 
				+            raise e
			
 
				+
			
 
				+        sub_img_ids = 0
			
 
				+        ct = 0
			
 
				+        ct_sub = 0
			
 
				+        records = []
			
 
				+        for i, ori_rec in enumerate(ori_records):
			
 
				+            im_path = ori_rec['im_file']
			
 
				+            slice_image_result = sahi.slicing.slice_image(
			
 
				+                image=im_path,
			
 
				+                slice_height=slice_size[0],
			
 
				+                slice_width=slice_size[1],
			
 
				+                overlap_height_ratio=overlap_ratio[0],
			
 
				+                overlap_width_ratio=overlap_ratio[1])
			
 
				+
			
 
				+            sub_img_num = len(slice_image_result)
			
 
				+            for _ind in range(sub_img_num):
			
 
				+                im = slice_image_result.images[_ind]
			
 
				+                rec = {
			
 
				+                    'image': im,
			
 
				+                    'im_id': np.array([sub_img_ids + _ind]),
			
 
				+                    'h': im.shape[0],
			
 
				+                    'w': im.shape[1],
			
 
				+                    'ori_im_id': np.array([ori_rec['im_id'][0]]),
			
 
				+                    'st_pix': np.array(
			
 
				+                        slice_image_result.starting_pixels[_ind],
			
 
				+                        dtype=np.float32),
			
 
				+                    'is_last': 1 if _ind == sub_img_num - 1 else 0,
			
 
				+                } if 'image' in self.data_fields else {}
			
 
				+                records.append(rec)
			
 
				+            ct_sub += sub_img_num
			
 
				+            ct += 1
			
 
				+        print('{} samples and slice to {} sub_samples'.format(ct, ct_sub))
			
 
				+        self.roidbs = records
			
 
				+
			
 
				+    def get_label_list(self):
			
 
				+        # Only VOC dataset needs label list in ImageFold 
			
 
				+        return self.anno_path
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class CommonDataset(object):
			
 
				+    def __init__(self, **dataset_args):
			
 
				+        super(CommonDataset, self).__init__()
			
 
				+        dataset_args = copy.deepcopy(dataset_args)
			
 
				+        type = dataset_args.pop("name")
			
 
				+        self.dataset = getattr(source, type)(**dataset_args)
			
 
				+
			
 
				+    def __call__(self):
			
 
				+        return self.dataset
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TrainDataset(CommonDataset):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class EvalMOTDataset(CommonDataset):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TestMOTDataset(CommonDataset):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class EvalDataset(CommonDataset):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TestDataset(CommonDataset):
			
 
				+    pass
			
--- a/paddlers/models/ppdet/data/source/keypoint_coco.py
+++ b/paddlers/models/ppdet/data/source/keypoint_coco.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 """
			
 
				 this code is base on https://github.com/open-mmlab/mmpose
			
@@ -27,7 +27,7 @@ from paddlers.models.ppdet.core.workspace import register, serializable
 
				 
			
 
				 @serializable
			
 
				 class KeypointBottomUpBaseDataset(DetDataset):
			
 
				-    """Base class for bottom-up datasets.
			
 
				+    """Base class for bottom-up datasets. 
			
 
				 
			
 
				     All datasets should subclass it.
			
 
				     All subclasses should overwrite:
			
@@ -91,7 +91,7 @@ class KeypointBottomUpBaseDataset(DetDataset):
 
				 @register
			
 
				 @serializable
			
 
				 class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
			
 
				-    """COCO dataset for bottom-up pose estimation.
			
 
				+    """COCO dataset for bottom-up pose estimation. 
			
 
				 
			
 
				     The dataset loads raw features and apply specified transforms
			
 
				     to return a dict containing the image tensors and other information.
			
@@ -262,7 +262,7 @@ class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
 
				 @register
			
 
				 @serializable
			
 
				 class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
			
 
				-    """CrowdPose dataset for bottom-up pose estimation.
			
 
				+    """CrowdPose dataset for bottom-up pose estimation. 
			
 
				 
			
 
				     The dataset loads raw features and apply specified transforms
			
 
				     to return a dict containing the image tensors and other information.
			
@@ -386,7 +386,7 @@ class KeypointTopDownBaseDataset(DetDataset):
 
				 @register
			
 
				 @serializable
			
 
				 class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
			
 
				-    """COCO dataset for top-down pose estimation.
			
 
				+    """COCO dataset for top-down pose estimation. 
			
 
				 
			
 
				     The dataset loads raw features and apply specified transforms
			
 
				     to return a dict containing the image tensors and other information.
			
--- a/paddlers/models/ppdet/data/source/mot.py
+++ b/paddlers/models/ppdet/data/source/mot.py
@@ -39,15 +39,16 @@ class MOTDataSet(DetDataset):
 
				         image_lists (str|list): mot data image lists, muiti-source mot dataset.
			
 
				         data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				         sample_num (int): number of samples to load, -1 means all.
			
 
				+        repeat (int): repeat times for dataset, use in benchmark.
			
 
				 
			
 
				     Notes:
			
 
				         MOT datasets root directory following this:
			
 
				             dataset/mot
			
 
				             |——————image_lists
			
 
				-            |        |——————caltech.train
			
 
				-            |        |——————caltech.val
			
 
				-            |        |——————mot16.train
			
 
				-            |        |——————mot17.train
			
 
				+            |        |——————caltech.train  
			
 
				+            |        |——————caltech.val   
			
 
				+            |        |——————mot16.train  
			
 
				+            |        |——————mot17.train  
			
 
				             |        ......
			
 
				             |——————Caltech
			
 
				             |——————MOT17
			
@@ -77,11 +78,13 @@ class MOTDataSet(DetDataset):
 
				                  dataset_dir=None,
			
 
				                  image_lists=[],
			
 
				                  data_fields=['image'],
			
 
				-                 sample_num=-1):
			
 
				+                 sample_num=-1,
			
 
				+                 repeat=1):
			
 
				         super(MOTDataSet, self).__init__(
			
 
				             dataset_dir=dataset_dir,
			
 
				             data_fields=data_fields,
			
 
				-            sample_num=sample_num)
			
 
				+            sample_num=sample_num,
			
 
				+            repeat=repeat)
			
 
				         self.dataset_dir = dataset_dir
			
 
				         self.image_lists = image_lists
			
 
				         if isinstance(self.image_lists, str):
			
@@ -243,8 +246,8 @@ class MCMOTDataSet(DetDataset):
 
				         MCMOT datasets root directory following this:
			
 
				             dataset/mot
			
 
				             |——————image_lists
			
 
				-            |        |——————visdrone_mcmot.train
			
 
				-            |        |——————visdrone_mcmot.val
			
 
				+            |        |——————visdrone_mcmot.train  
			
 
				+            |        |——————visdrone_mcmot.val   
			
 
				             visdrone_mcmot
			
 
				             |——————images
			
 
				             |        └——————train
			
@@ -348,10 +351,10 @@ class MCMOTDataSet(DetDataset):
 
				         self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
			
 
				         self.total_imgs = sum(self.num_imgs_each_data)
			
 
				 
			
 
				-        # cname2cid and cid2cname
			
 
				+        # cname2cid and cid2cname 
			
 
				         cname2cid = {}
			
 
				         if self.label_list is not None:
			
 
				-            # if use label_list for multi source mix dataset,
			
 
				+            # if use label_list for multi source mix dataset, 
			
 
				             # please make sure label_list in the first sub_dataset at least.
			
 
				             sub_dataset = self.image_lists[0].split('.')[0]
			
 
				             label_path = os.path.join(self.dataset_dir, sub_dataset,
			
@@ -461,7 +464,7 @@ class MOTImageFolder(DetDataset):
 
				         video_file (str): path of the video file, default ''.
			
 
				         frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
			
 
				         dataset_dir (str): root directory for dataset.
			
 
				-        keep_ori_im (bool): whether to keep original image, default False.
			
 
				+        keep_ori_im (bool): whether to keep original image, default False. 
			
 
				             Set True when used during MOT model inference while saving
			
 
				             images or video, or used in DeepSORT.
			
 
				     """
			
@@ -474,6 +477,7 @@ class MOTImageFolder(DetDataset):
 
				                  image_dir=None,
			
 
				                  sample_num=-1,
			
 
				                  keep_ori_im=False,
			
 
				+                 anno_path=None,
			
 
				                  **kwargs):
			
 
				         super(MOTImageFolder, self).__init__(
			
 
				             dataset_dir, image_dir, sample_num=sample_num)
			
@@ -483,6 +487,7 @@ class MOTImageFolder(DetDataset):
 
				         self._imid2path = {}
			
 
				         self.roidbs = None
			
 
				         self.frame_rate = frame_rate
			
 
				+        self.anno_path = anno_path
			
 
				 
			
 
				     def check_or_download_dataset(self):
			
 
				         return
			
@@ -573,6 +578,9 @@ class MOTImageFolder(DetDataset):
 
				                 "wrong or unsupported file format: {}".format(self.video_file)
			
 
				         self.roidbs = self._load_video_images()
			
 
				 
			
 
				+    def get_anno(self):
			
 
				+        return self.anno_path
			
 
				+
			
 
				 
			
 
				 def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
			
 
				     return f.lower().endswith(extensions)
			
--- a/paddlers/models/ppdet/data/source/voc.py
+++ b/paddlers/models/ppdet/data/source/voc.py
@@ -43,9 +43,10 @@ class VOCDataSet(DetDataset):
 
				         label_list (str): if use_default_label is False, will load
			
 
				             mapping between category and class index.
			
 
				         allow_empty (bool): whether to load empty entry. False as default
			
 
				-        empty_ratio (float): the ratio of empty record number to total
			
 
				-            record's, if empty_ratio is out of [0. ,1.), do not sample the
			
 
				+        empty_ratio (float): the ratio of empty record number to total 
			
 
				+            record's, if empty_ratio is out of [0. ,1.), do not sample the 
			
 
				             records and use all the empty entries. 1. as default
			
 
				+        repeat (int): repeat times for dataset, use in benchmark.
			
 
				     """
			
 
				 
			
 
				     def __init__(self,
			
@@ -56,13 +57,15 @@ class VOCDataSet(DetDataset):
 
				                  sample_num=-1,
			
 
				                  label_list=None,
			
 
				                  allow_empty=False,
			
 
				-                 empty_ratio=1.):
			
 
				+                 empty_ratio=1.,
			
 
				+                 repeat=1):
			
 
				         super(VOCDataSet, self).__init__(
			
 
				             dataset_dir=dataset_dir,
			
 
				             image_dir=image_dir,
			
 
				             anno_path=anno_path,
			
 
				             data_fields=data_fields,
			
 
				-            sample_num=sample_num)
			
 
				+            sample_num=sample_num,
			
 
				+            repeat=repeat)
			
 
				         self.label_list = label_list
			
 
				         self.allow_empty = allow_empty
			
 
				         self.empty_ratio = empty_ratio
			
--- a/paddlers/models/ppdet/data/transform/__init__.py
+++ b/paddlers/models/ppdet/data/transform/__init__.py
@@ -16,11 +16,13 @@ from . import operators
 
				 from . import batch_operators
			
 
				 from . import keypoint_operators
			
 
				 from . import mot_operators
			
 
				+from . import rotated_operators
			
 
				 
			
 
				 from .operators import *
			
 
				 from .batch_operators import *
			
 
				 from .keypoint_operators import *
			
 
				 from .mot_operators import *
			
 
				+from .rotated_operators import *
			
 
				 
			
 
				 __all__ = []
			
 
				 __all__ += registered_ops
			
--- a/paddlers/models/ppdet/data/transform/autoaugment_utils.py
+++ b/paddlers/models/ppdet/data/transform/autoaugment_utils.py
@@ -11,7 +11,7 @@
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				-# Reference:
			
 
				+# Reference: 
			
 
				 #   https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/autoaugment_utils.py
			
 
				 """AutoAugment util file."""
			
 
				 
			
@@ -65,7 +65,7 @@ def policy_v1():
 
				         [('ShearY_Only_BBoxes', 0.8, 2), ('Flip_Only_BBoxes', 0.0, 10)],
			
 
				         [('Equalize', 0.6, 10), ('TranslateX_BBox', 0.2, 2)],
			
 
				         [('Color', 1.0, 10), ('TranslateY_Only_BBoxes', 0.4, 6)],
			
 
				-        [('Rotate_BBox', 0.8, 10), ('Contrast', 0.0, 10)],  # ,
			
 
				+        [('Rotate_BBox', 0.8, 10), ('Contrast', 0.0, 10)],  # , 
			
 
				         [('Cutout', 0.2, 2), ('Brightness', 0.8, 10)],
			
 
				         [('Color', 1.0, 6), ('Equalize', 1.0, 2)],
			
 
				         [('Cutout_Only_BBoxes', 0.4, 6), ('TranslateY_Only_BBoxes', 0.8, 2)],
			
--- a/paddlers/models/ppdet/data/transform/batch_operators.py
+++ b/paddlers/models/ppdet/data/transform/batch_operators.py
@@ -47,6 +47,8 @@ __all__ = [
 
				     'PadMaskBatch',
			
 
				     'Gt2GFLTarget',
			
 
				     'Gt2CenterNetTarget',
			
 
				+    'PadGT',
			
 
				+    'PadRGT',
			
 
				 ]
			
 
				 
			
 
				 
			
@@ -108,12 +110,6 @@ class PadBatch(BaseOperator):
 
				                 padding_segm[:, :im_h, :im_w] = gt_segm
			
 
				                 data['gt_segm'] = padding_segm
			
 
				 
			
 
				-            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
			
 
				-                # ploy to rbox
			
 
				-                polys = data['gt_rbox2poly']
			
 
				-                rbox = bbox_utils.poly2rbox(polys)
			
 
				-                data['gt_rbox'] = rbox
			
 
				-
			
 
				         return samples
			
 
				 
			
 
				 
			
@@ -233,7 +229,7 @@ class Gt2YoloTarget(BaseOperator):
 
				                     gi = int(gx * grid_w)
			
 
				                     gj = int(gy * grid_h)
			
 
				 
			
 
				-                    # gtbox should be regresed in this layes if best match
			
 
				+                    # gtbox should be regresed in this layes if best match 
			
 
				                     # anchor index in anchor mask of this layer
			
 
				                     if best_idx in mask:
			
 
				                         best_n = mask.index(best_idx)
			
@@ -253,7 +249,7 @@ class Gt2YoloTarget(BaseOperator):
 
				                         # classification
			
 
				                         target[best_n, 6 + cls, gj, gi] = 1.
			
 
				 
			
 
				-                    # For non-matched anchors, calculate the target if the iou
			
 
				+                    # For non-matched anchors, calculate the target if the iou 
			
 
				                     # between anchor and gt is larger than iou_thresh
			
 
				                     if self.iou_thresh < 1:
			
 
				                         for idx, mask_i in enumerate(mask):
			
@@ -618,7 +614,7 @@ class Gt2TTFTarget(BaseOperator):
 
				     """
			
 
				     Gt2TTFTarget
			
 
				     Generate TTFNet targets by ground truth data
			
 
				-
			
 
				+    
			
 
				     Args:
			
 
				         num_classes(int): the number of classes.
			
 
				         down_ratio(int): the down ratio from images to heatmap, 4 by default.
			
@@ -980,12 +976,6 @@ class PadMaskBatch(BaseOperator):
 
				                 padding_mask[:im_h, :im_w] = 1.
			
 
				                 data['pad_mask'] = padding_mask
			
 
				 
			
 
				-            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
			
 
				-                # ploy to rbox
			
 
				-                polys = data['gt_rbox2poly']
			
 
				-                rbox = bbox_utils.poly2rbox(polys)
			
 
				-                data['gt_rbox'] = rbox
			
 
				-
			
 
				         return samples
			
 
				 
			
 
				 
			
@@ -994,7 +984,7 @@ class Gt2CenterNetTarget(BaseOperator):
 
				     """Gt2CenterNetTarget
			
 
				     Genterate CenterNet targets by ground-truth
			
 
				     Args:
			
 
				-        down_ratio (int): The down sample ratio between output feature and
			
 
				+        down_ratio (int): The down sample ratio between output feature and 
			
 
				                           input image.
			
 
				         num_classes (int): The number of classes, 80 by default.
			
 
				         max_objs (int): The maximum objects detected, 128 by default.
			
@@ -1068,3 +1058,110 @@ class Gt2CenterNetTarget(BaseOperator):
 
				         sample['size'] = wh
			
 
				         sample['offset'] = reg
			
 
				         return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class PadGT(BaseOperator):
			
 
				+    """
			
 
				+    Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
			
 
				+    The num_max_boxes is the largest for batch.
			
 
				+    Args:
			
 
				+        return_gt_mask (bool): If true, return `pad_gt_mask`,
			
 
				+                                1 means bbox, 0 means no bbox.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, return_gt_mask=True):
			
 
				+        super(PadGT, self).__init__()
			
 
				+        self.return_gt_mask = return_gt_mask
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        num_max_boxes = max([len(s['gt_bbox']) for s in samples])
			
 
				+        for sample in samples:
			
 
				+            if self.return_gt_mask:
			
 
				+                sample['pad_gt_mask'] = np.zeros(
			
 
				+                    (num_max_boxes, 1), dtype=np.float32)
			
 
				+            if num_max_boxes == 0:
			
 
				+                continue
			
 
				+
			
 
				+            num_gt = len(sample['gt_bbox'])
			
 
				+            pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
			
 
				+            pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
			
 
				+            if num_gt > 0:
			
 
				+                pad_gt_class[:num_gt] = sample['gt_class']
			
 
				+                pad_gt_bbox[:num_gt] = sample['gt_bbox']
			
 
				+            sample['gt_class'] = pad_gt_class
			
 
				+            sample['gt_bbox'] = pad_gt_bbox
			
 
				+            # pad_gt_mask
			
 
				+            if 'pad_gt_mask' in sample:
			
 
				+                sample['pad_gt_mask'][:num_gt] = 1
			
 
				+            # gt_score
			
 
				+            if 'gt_score' in sample:
			
 
				+                pad_gt_score = np.zeros((num_max_boxes, 1), dtype=np.float32)
			
 
				+                if num_gt > 0:
			
 
				+                    pad_gt_score[:num_gt] = sample['gt_score']
			
 
				+                sample['gt_score'] = pad_gt_score
			
 
				+            if 'is_crowd' in sample:
			
 
				+                pad_is_crowd = np.zeros((num_max_boxes, 1), dtype=np.int32)
			
 
				+                if num_gt > 0:
			
 
				+                    pad_is_crowd[:num_gt] = sample['is_crowd']
			
 
				+                sample['is_crowd'] = pad_is_crowd
			
 
				+            if 'difficult' in sample:
			
 
				+                pad_diff = np.zeros((num_max_boxes, 1), dtype=np.int32)
			
 
				+                if num_gt > 0:
			
 
				+                    pad_diff[:num_gt] = sample['difficult']
			
 
				+                sample['difficult'] = pad_diff
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class PadRGT(BaseOperator):
			
 
				+    """
			
 
				+    Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
			
 
				+    The num_max_boxes is the largest for batch.
			
 
				+    Args:
			
 
				+        return_gt_mask (bool): If true, return `pad_gt_mask`,
			
 
				+                                1 means bbox, 0 means no bbox.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, return_gt_mask=True):
			
 
				+        super(PadRGT, self).__init__()
			
 
				+        self.return_gt_mask = return_gt_mask
			
 
				+
			
 
				+    def pad_field(self, sample, field, num_gt):
			
 
				+        name, shape, dtype = field
			
 
				+        if name in sample:
			
 
				+            pad_v = np.zeros(shape, dtype=dtype)
			
 
				+            if num_gt > 0:
			
 
				+                pad_v[:num_gt] = sample[name]
			
 
				+            sample[name] = pad_v
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        num_max_boxes = max([len(s['gt_bbox']) for s in samples])
			
 
				+        for sample in samples:
			
 
				+            if self.return_gt_mask:
			
 
				+                sample['pad_gt_mask'] = np.zeros(
			
 
				+                    (num_max_boxes, 1), dtype=np.float32)
			
 
				+            if num_max_boxes == 0:
			
 
				+                continue
			
 
				+
			
 
				+            num_gt = len(sample['gt_bbox'])
			
 
				+            pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
			
 
				+            pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
			
 
				+            if num_gt > 0:
			
 
				+                pad_gt_class[:num_gt] = sample['gt_class']
			
 
				+                pad_gt_bbox[:num_gt] = sample['gt_bbox']
			
 
				+            sample['gt_class'] = pad_gt_class
			
 
				+            sample['gt_bbox'] = pad_gt_bbox
			
 
				+            # pad_gt_mask
			
 
				+            if 'pad_gt_mask' in sample:
			
 
				+                sample['pad_gt_mask'][:num_gt] = 1
			
 
				+            # gt_score
			
 
				+            names = ['gt_score', 'is_crowd', 'difficult', 'gt_poly', 'gt_rbox']
			
 
				+            dims = [1, 1, 1, 8, 5]
			
 
				+            dtypes = [np.float32, np.int32, np.int32, np.float32, np.float32]
			
 
				+
			
 
				+            for name, dim, dtype in zip(names, dims, dtypes):
			
 
				+                self.pad_field(sample, [name, (num_max_boxes, dim), dtype],
			
 
				+                               num_gt)
			
 
				+
			
 
				+        return samples
			
--- a/paddlers/models/ppdet/data/transform/keypoint_operators.py
+++ b/paddlers/models/ppdet/data/transform/keypoint_operators.py
@@ -511,18 +511,18 @@ class RandomFlipHalfBodyTransform(object):
 
				 
			
 
				 @register_keypointop
			
 
				 class AugmentationbyInformantionDropping(object):
			
 
				-    """AID: Augmentation by Informantion Dropping. Please refer
			
 
				-        to https://arxiv.org/abs/2008.07139
			
 
				-
			
 
				+    """AID: Augmentation by Informantion Dropping. Please refer 
			
 
				+        to https://arxiv.org/abs/2008.07139 
			
 
				+    
			
 
				     Args:
			
 
				         prob_cutout (float): The probability of the Cutout augmentation.
			
 
				         offset_factor (float): Offset factor of cutout center.
			
 
				-        num_patch (int): Number of patches to be cutout.
			
 
				+        num_patch (int): Number of patches to be cutout.                       
			
 
				         records(dict): the dict contained the image and coords
			
 
				-
			
 
				+        
			
 
				     Returns:
			
 
				         records (dict): contain the image and coords after tranformed
			
 
				-
			
 
				+    
			
 
				     """
			
 
				 
			
 
				     def __init__(self,
			
@@ -698,8 +698,8 @@ class ToHeatmapsTopDown(object):
 
				         tmp_size = self.sigma * 3
			
 
				         feat_stride = image_size / self.hmsize
			
 
				         for joint_id in range(num_joints):
			
 
				-            mu_x = int(joints[joint_id][0] + 0.5) / feat_stride[0]
			
 
				-            mu_y = int(joints[joint_id][1] + 0.5) / feat_stride[1]
			
 
				+            mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
			
 
				+            mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
			
 
				             # Check that any part of the gaussian is in-bounds
			
 
				             ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
			
 
				             br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
			
@@ -796,7 +796,7 @@ class ToHeatmapsTopDown_DARK(object):
 
				 class ToHeatmapsTopDown_UDP(object):
			
 
				     """This code is based on:
			
 
				         https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py
			
 
				-
			
 
				+       
			
 
				         to generate the gaussian heatmaps of keypoint for heatmap loss.
			
 
				         ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
			
 
				         for Human Pose Estimation (CVPR 2020).
			
--- a/paddlers/models/ppdet/data/transform/mot_operators.py
+++ b/paddlers/models/ppdet/data/transform/mot_operators.py
@@ -132,7 +132,7 @@ class LetterBoxResize(BaseOperator):
 
				 
			
 
				 @register_op
			
 
				 class MOTRandomAffine(BaseOperator):
			
 
				-    """
			
 
				+    """ 
			
 
				     Affine transform to image and coords to achieve the rotate, scale and
			
 
				     shift effect for training image.
			
 
				 
			
@@ -271,7 +271,7 @@ class Gt2JDETargetThres(BaseOperator):
 
				         anchors (list): anchors of JDE model
			
 
				         anchor_masks (list): anchor_masks of JDE model
			
 
				         downsample_ratios (list): downsample ratios of JDE model
			
 
				-        ide_thresh (float): thresh of identity, higher is groud truth
			
 
				+        ide_thresh (float): thresh of identity, higher is groud truth 
			
 
				         fg_thresh (float): thresh of foreground, higher is foreground
			
 
				         bg_thresh (float): thresh of background, lower is background
			
 
				         num_classes (int): number of classes
			
@@ -529,8 +529,8 @@ class Gt2FairMOTTarget(Gt2TTFTarget):
 
				     Generate FairMOT targets by ground truth data.
			
 
				     Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
			
 
				         1. the gaussian kernal radius to generate a heatmap.
			
 
				-        2. the targets needed during traing.
			
 
				-
			
 
				+        2. the targets needed during training.
			
 
				+    
			
 
				     Args:
			
 
				         num_classes(int): the number of classes.
			
 
				         down_ratio(int): the down ratio from images to heatmap, 4 by default.
			
--- a/paddlers/models/ppdet/data/transform/operators.py
+++ b/paddlers/models/ppdet/data/transform/operators.py
@@ -41,7 +41,6 @@ import threading
 
				 MUTEX = threading.Lock()
			
 
				 
			
 
				 from paddlers.models.ppdet.core.workspace import serializable
			
 
				-from paddlers.models.ppdet.modeling import bbox_utils
			
 
				 from ..reader import Compose
			
 
				 
			
 
				 from .op_helper import (satisfy_sample_constraint, filter_and_process,
			
@@ -123,12 +122,15 @@ class Decode(BaseOperator):
 
				                 sample['image'] = f.read()
			
 
				             sample.pop('im_file')
			
 
				 
			
 
				-        im = sample['image']
			
 
				-        data = np.frombuffer(im, dtype='uint8')
			
 
				-        im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
			
 
				-        if 'keep_ori_im' in sample and sample['keep_ori_im']:
			
 
				-            sample['ori_image'] = im
			
 
				-        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
			
 
				+        try:
			
 
				+            im = sample['image']
			
 
				+            data = np.frombuffer(im, dtype='uint8')
			
 
				+            im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
			
 
				+            if 'keep_ori_im' in sample and sample['keep_ori_im']:
			
 
				+                sample['ori_image'] = im
			
 
				+            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
			
 
				+        except:
			
 
				+            im = sample['image']
			
 
				 
			
 
				         sample['image'] = im
			
 
				         if 'h' not in sample:
			
@@ -357,19 +359,26 @@ class RandomErasingImage(BaseOperator):
 
				 
			
 
				 @register_op
			
 
				 class NormalizeImage(BaseOperator):
			
 
				-    def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1],
			
 
				-                 is_scale=True):
			
 
				+    def __init__(self,
			
 
				+                 mean=[0.485, 0.456, 0.406],
			
 
				+                 std=[0.229, 0.224, 0.225],
			
 
				+                 is_scale=True,
			
 
				+                 norm_type='mean_std'):
			
 
				         """
			
 
				         Args:
			
 
				             mean (list): the pixel mean
			
 
				             std (list): the pixel variance
			
 
				+            is_scale (bool): scale the pixel to [0,1]
			
 
				+            norm_type (str): type in ['mean_std', 'none']
			
 
				         """
			
 
				         super(NormalizeImage, self).__init__()
			
 
				         self.mean = mean
			
 
				         self.std = std
			
 
				         self.is_scale = is_scale
			
 
				+        self.norm_type = norm_type
			
 
				         if not (isinstance(self.mean, list) and isinstance(self.std, list) and
			
 
				-                isinstance(self.is_scale, bool)):
			
 
				+                isinstance(self.is_scale, bool) and
			
 
				+                self.norm_type in ['mean_std', 'none']):
			
 
				             raise TypeError("{}: input type is invalid.".format(self))
			
 
				         from functools import reduce
			
 
				         if reduce(lambda x, y: x * y, self.std) == 0:
			
@@ -378,20 +387,20 @@ class NormalizeImage(BaseOperator):
 
				     def apply(self, sample, context=None):
			
 
				         """Normalize the image.
			
 
				         Operators:
			
 
				-            1.(optional) Scale the image to [0,1]
			
 
				-            2. Each pixel minus mean and is divided by std
			
 
				+            1.(optional) Scale the pixel to [0,1]
			
 
				+            2.(optional) Each pixel minus mean and is divided by std
			
 
				         """
			
 
				         im = sample['image']
			
 
				         im = im.astype(np.float32, copy=False)
			
 
				-        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
			
 
				-        std = np.array(self.std)[np.newaxis, np.newaxis, :]
			
 
				-
			
 
				         if self.is_scale:
			
 
				-            im = im / 255.0
			
 
				-
			
 
				-        im -= mean
			
 
				-        im /= std
			
 
				-
			
 
				+            scale = 1.0 / 255.0
			
 
				+            im *= scale
			
 
				+
			
 
				+        if self.norm_type == 'mean_std':
			
 
				+            mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
			
 
				+            std = np.array(self.std)[np.newaxis, np.newaxis, :]
			
 
				+            im -= mean
			
 
				+            im /= std
			
 
				         sample['image'] = im
			
 
				         return sample
			
 
				 
			
@@ -448,6 +457,10 @@ class GridMask(BaseOperator):
 
				 @register_op
			
 
				 class RandomDistort(BaseOperator):
			
 
				     """Random color distortion.
			
 
				+    Note:
			
 
				+        The 'probability' in [lower, upper, probability] is the probability of not using this transformation,
			
 
				+        not the probability of using this transformation. And this only applies in this operator(RandomDistort),
			
 
				+        'probability' in other BaseOperator means the probability of using that transformation.
			
 
				     Args:
			
 
				         hue (list): hue settings. in [lower, upper, probability] format.
			
 
				         saturation (list): saturation settings. in [lower, upper, probability] format.
			
@@ -657,18 +670,6 @@ class RandomFlip(BaseOperator):
 
				         bbox[:, 2] = width - oldx1
			
 
				         return bbox
			
 
				 
			
 
				-    def apply_rbox(self, bbox, width):
			
 
				-        oldx1 = bbox[:, 0].copy()
			
 
				-        oldx2 = bbox[:, 2].copy()
			
 
				-        oldx3 = bbox[:, 4].copy()
			
 
				-        oldx4 = bbox[:, 6].copy()
			
 
				-        bbox[:, 0] = width - oldx1
			
 
				-        bbox[:, 2] = width - oldx2
			
 
				-        bbox[:, 4] = width - oldx3
			
 
				-        bbox[:, 6] = width - oldx4
			
 
				-        bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
			
 
				-        return bbox
			
 
				-
			
 
				     def apply(self, sample, context=None):
			
 
				         """Filp the image and bounding box.
			
 
				         Operators:
			
@@ -700,10 +701,6 @@ class RandomFlip(BaseOperator):
 
				             if 'gt_segm' in sample and sample['gt_segm'].any():
			
 
				                 sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
			
 
				 
			
 
				-            if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
			
 
				-                sample['gt_rbox2poly'] = self.apply_rbox(sample['gt_rbox2poly'],
			
 
				-                                                         width)
			
 
				-
			
 
				             sample['flipped'] = True
			
 
				             sample['image'] = im
			
 
				         return sample
			
@@ -713,7 +710,7 @@ class RandomFlip(BaseOperator):
 
				 class Resize(BaseOperator):
			
 
				     def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
			
 
				         """
			
 
				-        Resize image to target size. if keep_ratio is True,
			
 
				+        Resize image to target size. if keep_ratio is True, 
			
 
				         resize the image's long side to the maximum of target_size
			
 
				         if keep_ratio is False, resize the image to target size(h, w)
			
 
				         Args:
			
@@ -824,7 +821,7 @@ class Resize(BaseOperator):
 
				             im_scale_x = resize_w / im_shape[1]
			
 
				 
			
 
				         im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
			
 
				-        sample['image'] = im
			
 
				+        sample['image'] = im.astype(np.float32)
			
 
				         sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
			
 
				         if 'scale_factor' in sample:
			
 
				             scale_factor = sample['scale_factor']
			
@@ -841,16 +838,6 @@ class Resize(BaseOperator):
 
				                                                 [im_scale_x, im_scale_y],
			
 
				                                                 [resize_w, resize_h])
			
 
				 
			
 
				-        # apply rbox
			
 
				-        if 'gt_rbox2poly' in sample:
			
 
				-            if np.array(sample['gt_rbox2poly']).shape[1] != 8:
			
 
				-                logger.warning(
			
 
				-                    "gt_rbox2poly's length shoule be 8, but actually is {}".
			
 
				-                    format(len(sample['gt_rbox2poly'])))
			
 
				-            sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
			
 
				-                                                     [im_scale_x, im_scale_y],
			
 
				-                                                     [resize_w, resize_h])
			
 
				-
			
 
				         # apply polygon
			
 
				         if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				             sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
			
@@ -1054,7 +1041,7 @@ class CropWithSampling(BaseOperator):
 
				            [max sample, max trial, min scale, max scale,
			
 
				             min aspect ratio, max aspect ratio,
			
 
				             min overlap, max overlap]
			
 
				-            avoid_no_bbox (bool): whether to to avoid the
			
 
				+            avoid_no_bbox (bool): whether to avoid the
			
 
				                                   situation where the box does not appear.
			
 
				         """
			
 
				         super(CropWithSampling, self).__init__()
			
@@ -1145,7 +1132,7 @@ class CropWithDataAchorSampling(BaseOperator):
 
				             das_anchor_scales (list[float]): a list of anchor scales in data
			
 
				                 anchor smapling.
			
 
				             min_size (float): minimum size of sampled bbox.
			
 
				-            avoid_no_bbox (bool): whether to to avoid the
			
 
				+            avoid_no_bbox (bool): whether to avoid the
			
 
				                                   situation where the box does not appear.
			
 
				         """
			
 
				         super(CropWithDataAchorSampling, self).__init__()
			
@@ -1504,6 +1491,11 @@ class RandomCrop(BaseOperator):
 
				                 if 'is_crowd' in sample:
			
 
				                     sample['is_crowd'] = np.take(
			
 
				                         sample['is_crowd'], valid_ids, axis=0)
			
 
				+
			
 
				+                if 'difficult' in sample:
			
 
				+                    sample['difficult'] = np.take(
			
 
				+                        sample['difficult'], valid_ids, axis=0)
			
 
				+
			
 
				                 return sample
			
 
				 
			
 
				         return sample
			
@@ -1604,7 +1596,7 @@ class RandomScaledCrop(BaseOperator):
 
				 @register_op
			
 
				 class Cutmix(BaseOperator):
			
 
				     def __init__(self, alpha=1.5, beta=1.5):
			
 
				-        """
			
 
				+        """ 
			
 
				         CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
			
 
				         Cutmix image and gt_bbbox/gt_score
			
 
				         Args:
			
@@ -1747,7 +1739,7 @@ class Mixup(BaseOperator):
 
				             gt_score2 = np.ones_like(sample[1]['gt_class'])
			
 
				             gt_score = np.concatenate(
			
 
				                 (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
			
 
				-            result['gt_score'] = gt_score
			
 
				+            result['gt_score'] = gt_score.astype('float32')
			
 
				         if 'is_crowd' in sample[0]:
			
 
				             is_crowd1 = sample[0]['is_crowd']
			
 
				             is_crowd2 = sample[1]['is_crowd']
			
@@ -2029,13 +2021,14 @@ class Pad(BaseOperator):
 
				         if self.size:
			
 
				             h, w = self.size
			
 
				             assert (
			
 
				-                im_h < h and im_w < w
			
 
				+                im_h <= h and im_w <= w
			
 
				             ), '(h, w) of target size should be greater than (im_h, im_w)'
			
 
				         else:
			
 
				-            h = np.ceil(im_h / self.size_divisor) * self.size_divisor
			
 
				-            w = np.ceil(im_w / self.size_divisor) * self.size_divisor
			
 
				+            h = int(np.ceil(im_h / self.size_divisor) * self.size_divisor)
			
 
				+            w = int(np.ceil(im_w / self.size_divisor) * self.size_divisor)
			
 
				 
			
 
				         if h == im_h and w == im_w:
			
 
				+            sample['image'] = im.astype(np.float32)
			
 
				             return sample
			
 
				 
			
 
				         if self.pad_mode == -1:
			
@@ -2106,44 +2099,30 @@ class Poly2Mask(BaseOperator):
 
				 
			
 
				 
			
 
				 @register_op
			
 
				-class Rbox2Poly(BaseOperator):
			
 
				-    """
			
 
				-    Convert rbbox format to poly format.
			
 
				+class AugmentHSV(BaseOperator):
			
 
				+    """ 
			
 
				+    Augment the SV channel of image data.
			
 
				+    Args:
			
 
				+        fraction (float): the fraction for augment. Default: 0.5.
			
 
				+        is_bgr (bool): whether the image is BGR mode. Default: True.
			
 
				+        hgain (float): H channel gains
			
 
				+        sgain (float): S channel gains
			
 
				+        vgain (float): V channel gains
			
 
				     """
			
 
				 
			
 
				-    def __init__(self):
			
 
				-        super(Rbox2Poly, self).__init__()
			
 
				-
			
 
				-    def apply(self, sample, context=None):
			
 
				-        assert 'gt_rbox' in sample
			
 
				-        assert sample['gt_rbox'].shape[1] == 5
			
 
				-        rrects = sample['gt_rbox']
			
 
				-        x_ctr = rrects[:, 0]
			
 
				-        y_ctr = rrects[:, 1]
			
 
				-        width = rrects[:, 2]
			
 
				-        height = rrects[:, 3]
			
 
				-        x1 = x_ctr - width / 2.0
			
 
				-        y1 = y_ctr - height / 2.0
			
 
				-        x2 = x_ctr + width / 2.0
			
 
				-        y2 = y_ctr + height / 2.0
			
 
				-        sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
			
 
				-        polys = bbox_utils.rbox2poly_np(rrects)
			
 
				-        sample['gt_rbox2poly'] = polys
			
 
				-        return sample
			
 
				-
			
 
				-
			
 
				-@register_op
			
 
				-class AugmentHSV(BaseOperator):
			
 
				-    def __init__(self, fraction=0.50, is_bgr=True):
			
 
				-        """
			
 
				-        Augment the SV channel of image data.
			
 
				-        Args:
			
 
				-            fraction (float): the fraction for augment. Default: 0.5.
			
 
				-            is_bgr (bool): whether the image is BGR mode. Default: True.
			
 
				-        """
			
 
				+    def __init__(self,
			
 
				+                 fraction=0.50,
			
 
				+                 is_bgr=True,
			
 
				+                 hgain=None,
			
 
				+                 sgain=None,
			
 
				+                 vgain=None):
			
 
				         super(AugmentHSV, self).__init__()
			
 
				         self.fraction = fraction
			
 
				         self.is_bgr = is_bgr
			
 
				+        self.hgain = hgain
			
 
				+        self.sgain = sgain
			
 
				+        self.vgain = vgain
			
 
				+        self.use_hsvgain = False if hgain is None else True
			
 
				 
			
 
				     def apply(self, sample, context=None):
			
 
				         img = sample['image']
			
@@ -2151,27 +2130,39 @@ class AugmentHSV(BaseOperator):
 
				             img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
			
 
				         else:
			
 
				             img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
			
 
				-        S = img_hsv[:, :, 1].astype(np.float32)
			
 
				-        V = img_hsv[:, :, 2].astype(np.float32)
			
 
				 
			
 
				-        a = (random.random() * 2 - 1) * self.fraction + 1
			
 
				-        S *= a
			
 
				-        if a > 1:
			
 
				-            np.clip(S, a_min=0, a_max=255, out=S)
			
 
				+        if self.use_hsvgain:
			
 
				+            hsv_augs = np.random.uniform(
			
 
				+                -1, 1, 3) * [self.hgain, self.sgain, self.vgain]
			
 
				+            # random selection of h, s, v
			
 
				+            hsv_augs *= np.random.randint(0, 2, 3)
			
 
				+            img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
			
 
				+            img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
			
 
				+            img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
			
 
				+
			
 
				+        else:
			
 
				+            S = img_hsv[:, :, 1].astype(np.float32)
			
 
				+            V = img_hsv[:, :, 2].astype(np.float32)
			
 
				+
			
 
				+            a = (random.random() * 2 - 1) * self.fraction + 1
			
 
				+            S *= a
			
 
				+            if a > 1:
			
 
				+                np.clip(S, a_min=0, a_max=255, out=S)
			
 
				+
			
 
				+            a = (random.random() * 2 - 1) * self.fraction + 1
			
 
				+            V *= a
			
 
				+            if a > 1:
			
 
				+                np.clip(V, a_min=0, a_max=255, out=V)
			
 
				 
			
 
				-        a = (random.random() * 2 - 1) * self.fraction + 1
			
 
				-        V *= a
			
 
				-        if a > 1:
			
 
				-            np.clip(V, a_min=0, a_max=255, out=V)
			
 
				+            img_hsv[:, :, 1] = S.astype(np.uint8)
			
 
				+            img_hsv[:, :, 2] = V.astype(np.uint8)
			
 
				 
			
 
				-        img_hsv[:, :, 1] = S.astype(np.uint8)
			
 
				-        img_hsv[:, :, 2] = V.astype(np.uint8)
			
 
				         if self.is_bgr:
			
 
				             cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
			
 
				         else:
			
 
				             cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
			
 
				 
			
 
				-        sample['image'] = img
			
 
				+        sample['image'] = img.astype(np.float32)
			
 
				         return sample
			
 
				 
			
 
				 
			
@@ -2223,7 +2214,7 @@ class RandomResizeCrop(BaseOperator):
 
				         'long', resize the image's long side to the maximum of target_size, if keep_ratio is
			
 
				         True and mode is 'short', resize the image's short side to the minimum of target_size.
			
 
				         cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
			
 
				-        mode (str): resize mode, `long` or `short`. Details see resizes.
			
 
				+        mode (str): resize mode, `long` or `short`. Details see resizes. 
			
 
				         prob (float): probability of this op.
			
 
				         keep_ratio (bool): whether keep_ratio or not, default true
			
 
				         interp (int): the interpolation method
			
@@ -2425,16 +2416,6 @@ class RandomResizeCrop(BaseOperator):
 
				                                                 [im_scale_x, im_scale_y],
			
 
				                                                 [resize_w, resize_h])
			
 
				 
			
 
				-        # apply rbox
			
 
				-        if 'gt_rbox2poly' in sample:
			
 
				-            if np.array(sample['gt_rbox2poly']).shape[1] != 8:
			
 
				-                logger.warn(
			
 
				-                    "gt_rbox2poly's length shoule be 8, but actually is {}".
			
 
				-                    format(len(sample['gt_rbox2poly'])))
			
 
				-            sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
			
 
				-                                                     [im_scale_x, im_scale_y],
			
 
				-                                                     [resize_w, resize_h])
			
 
				-
			
 
				         # apply polygon
			
 
				         if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				             sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
			
@@ -2892,7 +2873,7 @@ class FlipWarpAffine(BaseOperator):
 
				         """FlipWarpAffine
			
 
				         1. Random Crop
			
 
				         2. Flip the image horizontal
			
 
				-        3. Warp affine the image
			
 
				+        3. Warp affine the image 
			
 
				         """
			
 
				         super(FlipWarpAffine, self).__init__()
			
 
				         self.keep_res = keep_res
			
@@ -3013,3 +2994,409 @@ class CenterRandColor(BaseOperator):
 
				             img = func(img, img_gray)
			
 
				         sample['image'] = img
			
 
				         return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Mosaic(BaseOperator):
			
 
				+    """ Mosaic operator for image and gt_bboxes
			
 
				+    The code is based on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/datasets/mosaicdetection.py
			
 
				+
			
 
				+    1. get mosaic coords
			
 
				+    2. clip bbox and get mosaic_labels
			
 
				+    3. random_affine augment
			
 
				+    4. Mixup augment as copypaste (optinal), not used in tiny/nano
			
 
				+
			
 
				+    Args:
			
 
				+        prob (float): probability of using Mosaic, 1.0 as default
			
 
				+        input_dim (list[int]): input shape
			
 
				+        degrees (list[2]): the rotate range to apply, transform range is [min, max]
			
 
				+        translate (list[2]): the translate range to apply, transform range is [min, max]
			
 
				+        scale (list[2]): the scale range to apply, transform range is [min, max]
			
 
				+        shear (list[2]): the shear range to apply, transform range is [min, max]
			
 
				+        enable_mixup (bool): whether to enable Mixup or not
			
 
				+        mixup_prob (float): probability of using Mixup, 1.0 as default
			
 
				+        mixup_scale (list[int]): scale range of Mixup
			
 
				+        remove_outside_box (bool): whether remove outside boxes, False as
			
 
				+            default in COCO dataset, True in MOT dataset
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 prob=1.0,
			
 
				+                 input_dim=[640, 640],
			
 
				+                 degrees=[-10, 10],
			
 
				+                 translate=[-0.1, 0.1],
			
 
				+                 scale=[0.1, 2],
			
 
				+                 shear=[-2, 2],
			
 
				+                 enable_mixup=True,
			
 
				+                 mixup_prob=1.0,
			
 
				+                 mixup_scale=[0.5, 1.5],
			
 
				+                 remove_outside_box=False):
			
 
				+        super(Mosaic, self).__init__()
			
 
				+        self.prob = prob
			
 
				+        if isinstance(input_dim, Integral):
			
 
				+            input_dim = [input_dim, input_dim]
			
 
				+        self.input_dim = input_dim
			
 
				+        self.degrees = degrees
			
 
				+        self.translate = translate
			
 
				+        self.scale = scale
			
 
				+        self.shear = shear
			
 
				+        self.enable_mixup = enable_mixup
			
 
				+        self.mixup_prob = mixup_prob
			
 
				+        self.mixup_scale = mixup_scale
			
 
				+        self.remove_outside_box = remove_outside_box
			
 
				+
			
 
				+    def get_mosaic_coords(self, mosaic_idx, xc, yc, w, h, input_h, input_w):
			
 
				+        # (x1, y1, x2, y2) means coords in large image,
			
 
				+        # small_coords means coords in small image in mosaic aug.
			
 
				+        if mosaic_idx == 0:
			
 
				+            # top left
			
 
				+            x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
			
 
				+            small_coords = w - (x2 - x1), h - (y2 - y1), w, h
			
 
				+        elif mosaic_idx == 1:
			
 
				+            # top right
			
 
				+            x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
			
 
				+            small_coords = 0, h - (y2 - y1), min(w, x2 - x1), h
			
 
				+        elif mosaic_idx == 2:
			
 
				+            # bottom left
			
 
				+            x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
			
 
				+            small_coords = w - (x2 - x1), 0, w, min(y2 - y1, h)
			
 
				+        elif mosaic_idx == 3:
			
 
				+            # bottom right
			
 
				+            x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2,
			
 
				+                                                                   yc + h)
			
 
				+            small_coords = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
			
 
				+
			
 
				+        return (x1, y1, x2, y2), small_coords
			
 
				+
			
 
				+    def random_affine_augment(self,
			
 
				+                              img,
			
 
				+                              labels=[],
			
 
				+                              input_dim=[640, 640],
			
 
				+                              degrees=[-10, 10],
			
 
				+                              scales=[0.1, 2],
			
 
				+                              shears=[-2, 2],
			
 
				+                              translates=[-0.1, 0.1]):
			
 
				+        # random rotation and scale
			
 
				+        degree = random.uniform(degrees[0], degrees[1])
			
 
				+        scale = random.uniform(scales[0], scales[1])
			
 
				+        assert scale > 0, "Argument scale should be positive."
			
 
				+        R = cv2.getRotationMatrix2D(angle=degree, center=(0, 0), scale=scale)
			
 
				+        M = np.ones([2, 3])
			
 
				+
			
 
				+        # random shear
			
 
				+        shear = random.uniform(shears[0], shears[1])
			
 
				+        shear_x = math.tan(shear * math.pi / 180)
			
 
				+        shear_y = math.tan(shear * math.pi / 180)
			
 
				+        M[0] = R[0] + shear_y * R[1]
			
 
				+        M[1] = R[1] + shear_x * R[0]
			
 
				+
			
 
				+        # random translation
			
 
				+        translate = random.uniform(translates[0], translates[1])
			
 
				+        translation_x = translate * input_dim[0]
			
 
				+        translation_y = translate * input_dim[1]
			
 
				+        M[0, 2] = translation_x
			
 
				+        M[1, 2] = translation_y
			
 
				+
			
 
				+        # warpAffine
			
 
				+        img = cv2.warpAffine(
			
 
				+            img, M, dsize=tuple(input_dim), borderValue=(114, 114, 114))
			
 
				+
			
 
				+        num_gts = len(labels)
			
 
				+        if num_gts > 0:
			
 
				+            # warp corner points
			
 
				+            corner_points = np.ones((4 * num_gts, 3))
			
 
				+            corner_points[:, :2] = labels[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
			
 
				+                4 * num_gts, 2)  # x1y1, x2y2, x1y2, x2y1
			
 
				+            # apply affine transform
			
 
				+            corner_points = corner_points @M.T
			
 
				+            corner_points = corner_points.reshape(num_gts, 8)
			
 
				+
			
 
				+            # create new boxes
			
 
				+            corner_xs = corner_points[:, 0::2]
			
 
				+            corner_ys = corner_points[:, 1::2]
			
 
				+            new_bboxes = np.concatenate((corner_xs.min(1), corner_ys.min(1),
			
 
				+                                         corner_xs.max(1), corner_ys.max(1)))
			
 
				+            new_bboxes = new_bboxes.reshape(4, num_gts).T
			
 
				+
			
 
				+            # clip boxes
			
 
				+            new_bboxes[:, 0::2] = np.clip(new_bboxes[:, 0::2], 0, input_dim[0])
			
 
				+            new_bboxes[:, 1::2] = np.clip(new_bboxes[:, 1::2], 0, input_dim[1])
			
 
				+            labels[:, :4] = new_bboxes
			
 
				+
			
 
				+        return img, labels
			
 
				+
			
 
				+    def __call__(self, sample, context=None):
			
 
				+        if not isinstance(sample, Sequence):
			
 
				+            return sample
			
 
				+
			
 
				+        assert len(
			
 
				+            sample) == 5, "Mosaic needs 5 samples, 4 for mosaic and 1 for mixup."
			
 
				+        if np.random.uniform(0., 1.) > self.prob:
			
 
				+            return sample[0]
			
 
				+
			
 
				+        mosaic_gt_bbox, mosaic_gt_class, mosaic_is_crowd, mosaic_difficult = [], [], [], []
			
 
				+        input_h, input_w = self.input_dim
			
 
				+        yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
			
 
				+        xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
			
 
				+        mosaic_img = np.full((input_h * 2, input_w * 2, 3), 114, dtype=np.uint8)
			
 
				+
			
 
				+        # 1. get mosaic coords
			
 
				+        for mosaic_idx, sp in enumerate(sample[:4]):
			
 
				+            img = sp['image']
			
 
				+            gt_bbox = sp['gt_bbox']
			
 
				+            h0, w0 = img.shape[:2]
			
 
				+            scale = min(1. * input_h / h0, 1. * input_w / w0)
			
 
				+            img = cv2.resize(
			
 
				+                img, (int(w0 * scale), int(h0 * scale)),
			
 
				+                interpolation=cv2.INTER_LINEAR)
			
 
				+            (h, w, c) = img.shape[:3]
			
 
				+
			
 
				+            # suffix l means large image, while s means small image in mosaic aug.
			
 
				+            (l_x1, l_y1, l_x2, l_y2), (
			
 
				+                s_x1, s_y1, s_x2, s_y2) = self.get_mosaic_coords(
			
 
				+                    mosaic_idx, xc, yc, w, h, input_h, input_w)
			
 
				+
			
 
				+            mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
			
 
				+            padw, padh = l_x1 - s_x1, l_y1 - s_y1
			
 
				+
			
 
				+            # Normalized xywh to pixel xyxy format
			
 
				+            _gt_bbox = gt_bbox.copy()
			
 
				+            if len(gt_bbox) > 0:
			
 
				+                _gt_bbox[:, 0] = scale * gt_bbox[:, 0] + padw
			
 
				+                _gt_bbox[:, 1] = scale * gt_bbox[:, 1] + padh
			
 
				+                _gt_bbox[:, 2] = scale * gt_bbox[:, 2] + padw
			
 
				+                _gt_bbox[:, 3] = scale * gt_bbox[:, 3] + padh
			
 
				+
			
 
				+            mosaic_gt_bbox.append(_gt_bbox)
			
 
				+            mosaic_gt_class.append(sp['gt_class'])
			
 
				+            if 'is_crowd' in sp:
			
 
				+                mosaic_is_crowd.append(sp['is_crowd'])
			
 
				+            if 'difficult' in sp:
			
 
				+                mosaic_difficult.append(sp['difficult'])
			
 
				+
			
 
				+        # 2. clip bbox and get mosaic_labels([gt_bbox, gt_class, is_crowd])
			
 
				+        if len(mosaic_gt_bbox):
			
 
				+            mosaic_gt_bbox = np.concatenate(mosaic_gt_bbox, 0)
			
 
				+            mosaic_gt_class = np.concatenate(mosaic_gt_class, 0)
			
 
				+            if mosaic_is_crowd:
			
 
				+                mosaic_is_crowd = np.concatenate(mosaic_is_crowd, 0)
			
 
				+                mosaic_labels = np.concatenate([
			
 
				+                    mosaic_gt_bbox,
			
 
				+                    mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
			
 
				+                    mosaic_is_crowd.astype(mosaic_gt_bbox.dtype)
			
 
				+                ], 1)
			
 
				+            elif mosaic_difficult:
			
 
				+                mosaic_difficult = np.concatenate(mosaic_difficult, 0)
			
 
				+                mosaic_labels = np.concatenate([
			
 
				+                    mosaic_gt_bbox,
			
 
				+                    mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
			
 
				+                    mosaic_difficult.astype(mosaic_gt_bbox.dtype)
			
 
				+                ], 1)
			
 
				+            else:
			
 
				+                mosaic_labels = np.concatenate([
			
 
				+                    mosaic_gt_bbox, mosaic_gt_class.astype(mosaic_gt_bbox.dtype)
			
 
				+                ], 1)
			
 
				+            if self.remove_outside_box:
			
 
				+                # for MOT dataset
			
 
				+                flag1 = mosaic_gt_bbox[:, 0] < 2 * input_w
			
 
				+                flag2 = mosaic_gt_bbox[:, 2] > 0
			
 
				+                flag3 = mosaic_gt_bbox[:, 1] < 2 * input_h
			
 
				+                flag4 = mosaic_gt_bbox[:, 3] > 0
			
 
				+                flag_all = flag1 * flag2 * flag3 * flag4
			
 
				+                mosaic_labels = mosaic_labels[flag_all]
			
 
				+            else:
			
 
				+                mosaic_labels[:, 0] = np.clip(mosaic_labels[:, 0], 0,
			
 
				+                                              2 * input_w)
			
 
				+                mosaic_labels[:, 1] = np.clip(mosaic_labels[:, 1], 0,
			
 
				+                                              2 * input_h)
			
 
				+                mosaic_labels[:, 2] = np.clip(mosaic_labels[:, 2], 0,
			
 
				+                                              2 * input_w)
			
 
				+                mosaic_labels[:, 3] = np.clip(mosaic_labels[:, 3], 0,
			
 
				+                                              2 * input_h)
			
 
				+        else:
			
 
				+            mosaic_labels = np.zeros((1, 6))
			
 
				+
			
 
				+        # 3. random_affine augment
			
 
				+        mosaic_img, mosaic_labels = self.random_affine_augment(
			
 
				+            mosaic_img,
			
 
				+            mosaic_labels,
			
 
				+            input_dim=self.input_dim,
			
 
				+            degrees=self.degrees,
			
 
				+            translates=self.translate,
			
 
				+            scales=self.scale,
			
 
				+            shears=self.shear)
			
 
				+
			
 
				+        # 4. Mixup augment as copypaste, https://arxiv.org/abs/2012.07177
			
 
				+        # optinal, not used(enable_mixup=False) in tiny/nano
			
 
				+        if (self.enable_mixup and not len(mosaic_labels) == 0 and
			
 
				+                random.random() < self.mixup_prob):
			
 
				+            sample_mixup = sample[4]
			
 
				+            mixup_img = sample_mixup['image']
			
 
				+            if 'is_crowd' in sample_mixup:
			
 
				+                cp_labels = np.concatenate([
			
 
				+                    sample_mixup['gt_bbox'],
			
 
				+                    sample_mixup['gt_class'].astype(mosaic_labels.dtype),
			
 
				+                    sample_mixup['is_crowd'].astype(mosaic_labels.dtype)
			
 
				+                ], 1)
			
 
				+            elif 'difficult' in sample_mixup:
			
 
				+                cp_labels = np.concatenate([
			
 
				+                    sample_mixup['gt_bbox'],
			
 
				+                    sample_mixup['gt_class'].astype(mosaic_labels.dtype),
			
 
				+                    sample_mixup['difficult'].astype(mosaic_labels.dtype)
			
 
				+                ], 1)
			
 
				+            else:
			
 
				+                cp_labels = np.concatenate([
			
 
				+                    sample_mixup['gt_bbox'],
			
 
				+                    sample_mixup['gt_class'].astype(mosaic_labels.dtype)
			
 
				+                ], 1)
			
 
				+            mosaic_img, mosaic_labels = self.mixup_augment(
			
 
				+                mosaic_img, mosaic_labels, self.input_dim, cp_labels, mixup_img)
			
 
				+
			
 
				+        sample0 = sample[0]
			
 
				+        sample0['image'] = mosaic_img.astype(np.uint8)  # can not be float32
			
 
				+        sample0['h'] = float(mosaic_img.shape[0])
			
 
				+        sample0['w'] = float(mosaic_img.shape[1])
			
 
				+        sample0['im_shape'][0] = sample0['h']
			
 
				+        sample0['im_shape'][1] = sample0['w']
			
 
				+        sample0['gt_bbox'] = mosaic_labels[:, :4].astype(np.float32)
			
 
				+        sample0['gt_class'] = mosaic_labels[:, 4:5].astype(np.float32)
			
 
				+        if 'is_crowd' in sample[0]:
			
 
				+            sample0['is_crowd'] = mosaic_labels[:, 5:6].astype(np.float32)
			
 
				+        if 'difficult' in sample[0]:
			
 
				+            sample0['difficult'] = mosaic_labels[:, 5:6].astype(np.float32)
			
 
				+        return sample0
			
 
				+
			
 
				+    def mixup_augment(self, origin_img, origin_labels, input_dim, cp_labels,
			
 
				+                      img):
			
 
				+        jit_factor = random.uniform(*self.mixup_scale)
			
 
				+        FLIP = random.uniform(0, 1) > 0.5
			
 
				+        if len(img.shape) == 3:
			
 
				+            cp_img = np.ones(
			
 
				+                (input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114
			
 
				+        else:
			
 
				+            cp_img = np.ones(input_dim, dtype=np.uint8) * 114
			
 
				+
			
 
				+        cp_scale_ratio = min(input_dim[0] / img.shape[0],
			
 
				+                             input_dim[1] / img.shape[1])
			
 
				+        resized_img = cv2.resize(
			
 
				+            img, (int(img.shape[1] * cp_scale_ratio),
			
 
				+                  int(img.shape[0] * cp_scale_ratio)),
			
 
				+            interpolation=cv2.INTER_LINEAR)
			
 
				+
			
 
				+        cp_img[:int(img.shape[0] * cp_scale_ratio), :int(img.shape[
			
 
				+            1] * cp_scale_ratio)] = resized_img
			
 
				+
			
 
				+        cp_img = cv2.resize(cp_img, (int(cp_img.shape[1] * jit_factor),
			
 
				+                                     int(cp_img.shape[0] * jit_factor)))
			
 
				+        cp_scale_ratio *= jit_factor
			
 
				+
			
 
				+        if FLIP:
			
 
				+            cp_img = cp_img[:, ::-1, :]
			
 
				+
			
 
				+        origin_h, origin_w = cp_img.shape[:2]
			
 
				+        target_h, target_w = origin_img.shape[:2]
			
 
				+        padded_img = np.zeros(
			
 
				+            (max(origin_h, target_h), max(origin_w, target_w), 3),
			
 
				+            dtype=np.uint8)
			
 
				+        padded_img[:origin_h, :origin_w] = cp_img
			
 
				+
			
 
				+        x_offset, y_offset = 0, 0
			
 
				+        if padded_img.shape[0] > target_h:
			
 
				+            y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
			
 
				+        if padded_img.shape[1] > target_w:
			
 
				+            x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
			
 
				+        padded_cropped_img = padded_img[y_offset:y_offset + target_h, x_offset:
			
 
				+                                        x_offset + target_w]
			
 
				+
			
 
				+        # adjust boxes
			
 
				+        cp_bboxes_origin_np = cp_labels[:, :4].copy()
			
 
				+        cp_bboxes_origin_np[:, 0::2] = np.clip(cp_bboxes_origin_np[:, 0::2] *
			
 
				+                                               cp_scale_ratio, 0, origin_w)
			
 
				+        cp_bboxes_origin_np[:, 1::2] = np.clip(cp_bboxes_origin_np[:, 1::2] *
			
 
				+                                               cp_scale_ratio, 0, origin_h)
			
 
				+
			
 
				+        if FLIP:
			
 
				+            cp_bboxes_origin_np[:, 0::2] = (
			
 
				+                origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1])
			
 
				+        cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
			
 
				+        if self.remove_outside_box:
			
 
				+            # for MOT dataset
			
 
				+            cp_bboxes_transformed_np[:, 0::2] -= x_offset
			
 
				+            cp_bboxes_transformed_np[:, 1::2] -= y_offset
			
 
				+        else:
			
 
				+            cp_bboxes_transformed_np[:, 0::2] = np.clip(
			
 
				+                cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w)
			
 
				+            cp_bboxes_transformed_np[:, 1::2] = np.clip(
			
 
				+                cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h)
			
 
				+
			
 
				+        cls_labels = cp_labels[:, 4:5].copy()
			
 
				+        box_labels = cp_bboxes_transformed_np
			
 
				+        if cp_labels.shape[-1] == 6:
			
 
				+            crd_labels = cp_labels[:, 5:6].copy()
			
 
				+            labels = np.hstack((box_labels, cls_labels, crd_labels))
			
 
				+        else:
			
 
				+            labels = np.hstack((box_labels, cls_labels))
			
 
				+        if self.remove_outside_box:
			
 
				+            labels = labels[labels[:, 0] < target_w]
			
 
				+            labels = labels[labels[:, 2] > 0]
			
 
				+            labels = labels[labels[:, 1] < target_h]
			
 
				+            labels = labels[labels[:, 3] > 0]
			
 
				+
			
 
				+        origin_labels = np.vstack((origin_labels, labels))
			
 
				+        origin_img = origin_img.astype(np.float32)
			
 
				+        origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(
			
 
				+            np.float32)
			
 
				+
			
 
				+        return origin_img.astype(np.uint8), origin_labels
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class PadResize(BaseOperator):
			
 
				+    """ PadResize for image and gt_bbbox
			
 
				+
			
 
				+    Args:
			
 
				+        target_size (list[int]): input shape
			
 
				+        fill_value (float): pixel value of padded image
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, target_size, fill_value=114):
			
 
				+        super(PadResize, self).__init__()
			
 
				+        if isinstance(target_size, Integral):
			
 
				+            target_size = [target_size, target_size]
			
 
				+        self.target_size = target_size
			
 
				+        self.fill_value = fill_value
			
 
				+
			
 
				+    def _resize(self, img, bboxes, labels):
			
 
				+        ratio = min(self.target_size[0] / img.shape[0],
			
 
				+                    self.target_size[1] / img.shape[1])
			
 
				+        w, h = int(img.shape[1] * ratio), int(img.shape[0] * ratio)
			
 
				+        resized_img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR)
			
 
				+
			
 
				+        if len(bboxes) > 0:
			
 
				+            bboxes *= ratio
			
 
				+            mask = np.minimum(bboxes[:, 2] - bboxes[:, 0],
			
 
				+                              bboxes[:, 3] - bboxes[:, 1]) > 1
			
 
				+            bboxes = bboxes[mask]
			
 
				+            labels = labels[mask]
			
 
				+        return resized_img, bboxes, labels
			
 
				+
			
 
				+    def _pad(self, img):
			
 
				+        h, w, _ = img.shape
			
 
				+        if h == self.target_size[0] and w == self.target_size[1]:
			
 
				+            return img
			
 
				+        padded_img = np.full(
			
 
				+            (self.target_size[0], self.target_size[1], 3),
			
 
				+            self.fill_value,
			
 
				+            dtype=np.uint8)
			
 
				+        padded_img[:h, :w] = img
			
 
				+        return padded_img
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        image = sample['image']
			
 
				+        bboxes = sample['gt_bbox']
			
 
				+        labels = sample['gt_class']
			
 
				+        image, bboxes, labels = self._resize(image, bboxes, labels)
			
 
				+        sample['image'] = self._pad(image).astype(np.float32)
			
 
				+        sample['gt_bbox'] = bboxes
			
 
				+        sample['gt_class'] = labels
			
 
				+        return sample
			
--- a/paddlers/models/ppdet/data/transform/rotated_operators.py
+++ b/paddlers/models/ppdet/data/transform/rotated_operators.py
@@ -0,0 +1,479 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import print_function
			
 
				+from __future__ import division
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except Exception:
			
 
				+    from collections import Sequence
			
 
				+
			
 
				+from numbers import Number, Integral
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import math
			
 
				+import copy
			
 
				+
			
 
				+from .operators import register_op, BaseOperator
			
 
				+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RRotate(BaseOperator):
			
 
				+    """ Rotate Image, Polygon, Box
			
 
				+
			
 
				+    Args:
			
 
				+        scale (float): rotate scale
			
 
				+        angle (float): rotate angle
			
 
				+        fill_value (int, tuple): fill color
			
 
				+        auto_bound (bool): whether auto bound or not
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
			
 
				+        super(RRotate, self).__init__()
			
 
				+        self.scale = scale
			
 
				+        self.angle = angle
			
 
				+        self.fill_value = fill_value
			
 
				+        self.auto_bound = auto_bound
			
 
				+
			
 
				+    def get_rotated_matrix(self, angle, scale, h, w):
			
 
				+        center = ((w - 1) * 0.5, (h - 1) * 0.5)
			
 
				+        matrix = cv2.getRotationMatrix2D(center, -angle, scale)
			
 
				+        # calculate the new size
			
 
				+        cos = np.abs(matrix[0, 0])
			
 
				+        sin = np.abs(matrix[0, 1])
			
 
				+        new_w = h * sin + w * cos
			
 
				+        new_h = h * cos + w * sin
			
 
				+        # calculate offset
			
 
				+        n_w = int(np.round(new_w))
			
 
				+        n_h = int(np.round(new_h))
			
 
				+        if self.auto_bound:
			
 
				+            ratio = min(w / n_w, h / n_h)
			
 
				+            matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
			
 
				+        else:
			
 
				+            matrix[0, 2] += (new_w - w) * 0.5
			
 
				+            matrix[1, 2] += (new_h - h) * 0.5
			
 
				+            w = n_w
			
 
				+            h = n_h
			
 
				+        return matrix, h, w
			
 
				+
			
 
				+    def get_rect_from_pts(self, pts, h, w):
			
 
				+        """ get minimum rectangle of points
			
 
				+        """
			
 
				+        assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
			
 
				+        min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
			
 
				+                                                            axis=1)
			
 
				+        max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
			
 
				+                                                            axis=1)
			
 
				+        min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
			
 
				+        max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
			
 
				+        boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
			
 
				+        return boxes
			
 
				+
			
 
				+    def apply_image(self, image, matrix, h, w):
			
 
				+        return cv2.warpAffine(
			
 
				+            image, matrix, (w, h), borderValue=self.fill_value)
			
 
				+
			
 
				+    def apply_pts(self, pts, matrix, h, w):
			
 
				+        assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
			
 
				+        # n is number of samples and m is two times the number of points due to (x, y)
			
 
				+        _, m = pts.shape
			
 
				+        # transpose points
			
 
				+        pts_ = pts.reshape(-1, 2).T
			
 
				+        # pad 1 to convert the points to homogeneous coordinates
			
 
				+        padding = np.ones((1, pts_.shape[1]), pts.dtype)
			
 
				+        rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
			
 
				+        return rotated_pts[:2, :].T.reshape(-1, m)
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        image = sample['image']
			
 
				+        h, w = image.shape[:2]
			
 
				+        matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
			
 
				+        sample['image'] = self.apply_image(image, matrix, h, w)
			
 
				+        polys = sample['gt_poly']
			
 
				+        # TODO: segment or keypoint to be processed 
			
 
				+        if len(polys) > 0:
			
 
				+            pts = self.apply_pts(polys, matrix, h, w)
			
 
				+            sample['gt_poly'] = pts
			
 
				+            sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomRRotate(BaseOperator):
			
 
				+    """ Random Rotate Image
			
 
				+    Args:
			
 
				+        scale (float, tuple, list): rotate scale
			
 
				+        scale_mode (str): mode of scale, [range, value, None]
			
 
				+        angle (float, tuple, list): rotate angle
			
 
				+        angle_mode (str): mode of angle, [range, value, None]
			
 
				+        fill_value (float, tuple, list): fill value
			
 
				+        rotate_prob (float): probability of rotation
			
 
				+        auto_bound (bool): whether auto bound or not
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 scale=1.0,
			
 
				+                 scale_mode=None,
			
 
				+                 angle=0.,
			
 
				+                 angle_mode=None,
			
 
				+                 fill_value=0.,
			
 
				+                 rotate_prob=1.0,
			
 
				+                 auto_bound=True):
			
 
				+        super(RandomRRotate, self).__init__()
			
 
				+        self.scale = scale
			
 
				+        self.scale_mode = scale_mode
			
 
				+        self.angle = angle
			
 
				+        self.angle_mode = angle_mode
			
 
				+        self.fill_value = fill_value
			
 
				+        self.rotate_prob = rotate_prob
			
 
				+        self.auto_bound = auto_bound
			
 
				+
			
 
				+    def get_angle(self, angle, angle_mode):
			
 
				+        assert not angle_mode or angle_mode in [
			
 
				+            'range', 'value'
			
 
				+        ], 'angle mode should be in [range, value, None]'
			
 
				+        if not angle_mode:
			
 
				+            return angle
			
 
				+        elif angle_mode == 'range':
			
 
				+            low, high = angle
			
 
				+            return np.random.rand() * (high - low) + low
			
 
				+        elif angle_mode == 'value':
			
 
				+            return np.random.choice(angle)
			
 
				+
			
 
				+    def get_scale(self, scale, scale_mode):
			
 
				+        assert not scale_mode or scale_mode in [
			
 
				+            'range', 'value'
			
 
				+        ], 'scale mode should be in [range, value, None]'
			
 
				+        if not scale_mode:
			
 
				+            return scale
			
 
				+        elif scale_mode == 'range':
			
 
				+            low, high = scale
			
 
				+            return np.random.rand() * (high - low) + low
			
 
				+        elif scale_mode == 'value':
			
 
				+            return np.random.choice(scale)
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        if np.random.rand() > self.rotate_prob:
			
 
				+            return sample
			
 
				+
			
 
				+        angle = self.get_angle(self.angle, self.angle_mode)
			
 
				+        scale = self.get_scale(self.scale, self.scale_mode)
			
 
				+        rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
			
 
				+        return rotator(sample)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Poly2RBox(BaseOperator):
			
 
				+    """ Polygon to Rotated Box, using new OpenCV definition since 4.5.1
			
 
				+
			
 
				+    Args:
			
 
				+        filter_threshold (int, float): threshold to filter annotations
			
 
				+        filter_mode (str): filter mode, ['area', 'edge']
			
 
				+        rbox_type (str): rbox type, ['le135', 'oc']
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
			
 
				+        super(Poly2RBox, self).__init__()
			
 
				+        self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
			
 
				+        self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
			
 
				+
			
 
				+    def filter(self, size, threshold, mode):
			
 
				+        if mode == 'area':
			
 
				+            if size[0] * size[1] < threshold:
			
 
				+                return True
			
 
				+        elif mode == 'edge':
			
 
				+            if min(size) < threshold:
			
 
				+                return True
			
 
				+        return False
			
 
				+
			
 
				+    def get_rbox(self, polys):
			
 
				+        valid_ids, rboxes, bboxes = [], [], []
			
 
				+        for i, poly in enumerate(polys):
			
 
				+            cx, cy, w, h, angle = self.rbox_fn(poly)
			
 
				+            if self.filter_fn((w, h)):
			
 
				+                continue
			
 
				+            rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
			
 
				+            valid_ids.append(i)
			
 
				+            xmin, ymin = min(poly[0::2]), min(poly[1::2])
			
 
				+            xmax, ymax = max(poly[0::2]), max(poly[1::2])
			
 
				+            bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
			
 
				+
			
 
				+        if len(valid_ids) == 0:
			
 
				+            rboxes = np.zeros((0, 5), dtype=np.float32)
			
 
				+            bboxes = np.zeros((0, 4), dtype=np.float32)
			
 
				+        else:
			
 
				+            rboxes = np.stack(rboxes)
			
 
				+            bboxes = np.stack(bboxes)
			
 
				+
			
 
				+        return rboxes, bboxes, valid_ids
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
			
 
				+        sample['gt_rbox'] = rboxes
			
 
				+        sample['gt_bbox'] = bboxes
			
 
				+        for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
			
 
				+            if k in sample:
			
 
				+                sample[k] = sample[k][valid_ids]
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Poly2Array(BaseOperator):
			
 
				+    """ convert gt_poly to np.array for rotated bboxes
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(Poly2Array, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        if 'gt_poly' in sample:
			
 
				+            sample['gt_poly'] = np.array(
			
 
				+                sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RResize(BaseOperator):
			
 
				+    def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
			
 
				+        """
			
 
				+        Resize image to target size. if keep_ratio is True, 
			
 
				+        resize the image's long side to the maximum of target_size
			
 
				+        if keep_ratio is False, resize the image to target size(h, w)
			
 
				+        Args:
			
 
				+            target_size (int|list): image target size
			
 
				+            keep_ratio (bool): whether keep_ratio or not, default true
			
 
				+            interp (int): the interpolation method
			
 
				+        """
			
 
				+        super(RResize, self).__init__()
			
 
				+        self.keep_ratio = keep_ratio
			
 
				+        self.interp = interp
			
 
				+        if not isinstance(target_size, (Integral, Sequence)):
			
 
				+            raise TypeError(
			
 
				+                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
			
 
				+                format(type(target_size)))
			
 
				+        if isinstance(target_size, Integral):
			
 
				+            target_size = [target_size, target_size]
			
 
				+        self.target_size = target_size
			
 
				+
			
 
				+    def apply_image(self, image, scale):
			
 
				+        im_scale_x, im_scale_y = scale
			
 
				+
			
 
				+        return cv2.resize(
			
 
				+            image,
			
 
				+            None,
			
 
				+            None,
			
 
				+            fx=im_scale_x,
			
 
				+            fy=im_scale_y,
			
 
				+            interpolation=self.interp)
			
 
				+
			
 
				+    def apply_pts(self, pts, scale, size):
			
 
				+        im_scale_x, im_scale_y = scale
			
 
				+        resize_w, resize_h = size
			
 
				+        pts[:, 0::2] *= im_scale_x
			
 
				+        pts[:, 1::2] *= im_scale_y
			
 
				+        pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
			
 
				+        pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
			
 
				+        return pts
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """ Resize the image numpy.
			
 
				+        """
			
 
				+        im = sample['image']
			
 
				+        if not isinstance(im, np.ndarray):
			
 
				+            raise TypeError("{}: image type is not numpy.".format(self))
			
 
				+        if len(im.shape) != 3:
			
 
				+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
			
 
				+
			
 
				+        # apply image
			
 
				+        im_shape = im.shape
			
 
				+        if self.keep_ratio:
			
 
				+
			
 
				+            im_size_min = np.min(im_shape[0:2])
			
 
				+            im_size_max = np.max(im_shape[0:2])
			
 
				+
			
 
				+            target_size_min = np.min(self.target_size)
			
 
				+            target_size_max = np.max(self.target_size)
			
 
				+
			
 
				+            im_scale = min(target_size_min / im_size_min,
			
 
				+                           target_size_max / im_size_max)
			
 
				+
			
 
				+            resize_h = im_scale * float(im_shape[0])
			
 
				+            resize_w = im_scale * float(im_shape[1])
			
 
				+
			
 
				+            im_scale_x = im_scale
			
 
				+            im_scale_y = im_scale
			
 
				+        else:
			
 
				+            resize_h, resize_w = self.target_size
			
 
				+            im_scale_y = resize_h / im_shape[0]
			
 
				+            im_scale_x = resize_w / im_shape[1]
			
 
				+
			
 
				+        im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
			
 
				+        sample['image'] = im.astype(np.float32)
			
 
				+        sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
			
 
				+        if 'scale_factor' in sample:
			
 
				+            scale_factor = sample['scale_factor']
			
 
				+            sample['scale_factor'] = np.asarray(
			
 
				+                [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
			
 
				+                dtype=np.float32)
			
 
				+        else:
			
 
				+            sample['scale_factor'] = np.asarray(
			
 
				+                [im_scale_y, im_scale_x], dtype=np.float32)
			
 
				+
			
 
				+        # apply bbox
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
			
 
				+                                               [im_scale_x, im_scale_y],
			
 
				+                                               [resize_w, resize_h])
			
 
				+
			
 
				+        # apply polygon
			
 
				+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				+            sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
			
 
				+                                               [im_scale_x, im_scale_y],
			
 
				+                                               [resize_w, resize_h])
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomRFlip(BaseOperator):
			
 
				+    def __init__(self, prob=0.5):
			
 
				+        """
			
 
				+        Args:
			
 
				+            prob (float): the probability of flipping image
			
 
				+        """
			
 
				+        super(RandomRFlip, self).__init__()
			
 
				+        self.prob = prob
			
 
				+        if not (isinstance(self.prob, float)):
			
 
				+            raise TypeError("{}: input type is invalid.".format(self))
			
 
				+
			
 
				+    def apply_image(self, image):
			
 
				+        return image[:, ::-1, :]
			
 
				+
			
 
				+    def apply_pts(self, pts, width):
			
 
				+        oldx = pts[:, 0::2].copy()
			
 
				+        pts[:, 0::2] = width - oldx - 1
			
 
				+        return pts
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """Filp the image and bounding box.
			
 
				+        Operators:
			
 
				+            1. Flip the image numpy.
			
 
				+            2. Transform the bboxes' x coordinates.
			
 
				+              (Must judge whether the coordinates are normalized!)
			
 
				+            3. Transform the segmentations' x coordinates.
			
 
				+              (Must judge whether the coordinates are normalized!)
			
 
				+        Output:
			
 
				+            sample: the image, bounding box and segmentation part
			
 
				+                    in sample are flipped.
			
 
				+        """
			
 
				+        if np.random.uniform(0, 1) < self.prob:
			
 
				+            im = sample['image']
			
 
				+            height, width = im.shape[:2]
			
 
				+            im = self.apply_image(im)
			
 
				+            if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+                sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
			
 
				+            if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				+                sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
			
 
				+
			
 
				+            sample['flipped'] = True
			
 
				+            sample['image'] = im
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class VisibleRBox(BaseOperator):
			
 
				+    """
			
 
				+    In debug mode, visualize images according to `gt_box`.
			
 
				+    (Currently only supported when not cropping and flipping image.)
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, output_dir='debug'):
			
 
				+        super(VisibleRBox, self).__init__()
			
 
				+        self.output_dir = output_dir
			
 
				+        if not os.path.isdir(output_dir):
			
 
				+            os.makedirs(output_dir)
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        image = Image.fromarray(sample['image'].astype(np.uint8))
			
 
				+        out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
			
 
				+        width = sample['w']
			
 
				+        height = sample['h']
			
 
				+        # gt_poly = sample['gt_rbox']
			
 
				+        gt_poly = sample['gt_poly']
			
 
				+        gt_class = sample['gt_class']
			
 
				+        draw = ImageDraw.Draw(image)
			
 
				+        for i in range(gt_poly.shape[0]):
			
 
				+            x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
			
 
				+            draw.line(
			
 
				+                [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
			
 
				+                width=2,
			
 
				+                fill='green')
			
 
				+            # draw label
			
 
				+            xmin = min(x1, x2, x3, x4)
			
 
				+            ymin = min(y1, y2, y3, y4)
			
 
				+            text = str(gt_class[i][0])
			
 
				+            tw, th = draw.textsize(text)
			
 
				+            draw.rectangle(
			
 
				+                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
			
 
				+            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
			
 
				+
			
 
				+        if 'gt_keypoint' in sample.keys():
			
 
				+            gt_keypoint = sample['gt_keypoint']
			
 
				+            if self.is_normalized:
			
 
				+                for i in range(gt_keypoint.shape[1]):
			
 
				+                    if i % 2:
			
 
				+                        gt_keypoint[:, i] = gt_keypoint[:, i] * height
			
 
				+                    else:
			
 
				+                        gt_keypoint[:, i] = gt_keypoint[:, i] * width
			
 
				+            for i in range(gt_keypoint.shape[0]):
			
 
				+                keypoint = gt_keypoint[i]
			
 
				+                for j in range(int(keypoint.shape[0] / 2)):
			
 
				+                    x1 = round(keypoint[2 * j]).astype(np.int32)
			
 
				+                    y1 = round(keypoint[2 * j + 1]).astype(np.int32)
			
 
				+                    draw.ellipse(
			
 
				+                        (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
			
 
				+        save_path = os.path.join(self.output_dir, out_file_name)
			
 
				+        image.save(save_path, quality=95)
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Rbox2Poly(BaseOperator):
			
 
				+    """
			
 
				+    Convert rbbox format to poly format.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(Rbox2Poly, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        assert 'gt_rbox' in sample
			
 
				+        assert sample['gt_rbox'].shape[1] == 5
			
 
				+        rboxes = sample['gt_rbox']
			
 
				+        polys = rbox2poly_np(rboxes)
			
 
				+        sample['gt_poly'] = polys
			
 
				+        xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
			
 
				+        xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
			
 
				+        sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
			
 
				+        return sample
			
--- a/paddlers/models/ppdet/data/utils.py
+++ b/paddlers/models/ppdet/data/utils.py
@@ -0,0 +1,72 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import numbers
			
 
				+import numpy as np
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence, Mapping
			
 
				+except:
			
 
				+    from collections import Sequence, Mapping
			
 
				+
			
 
				+
			
 
				+def default_collate_fn(batch):
			
 
				+    """
			
 
				+    Default batch collating function for :code:`paddle.io.DataLoader`,
			
 
				+    get input data as a list of sample datas, each element in list
			
 
				+    if the data of a sample, and sample data should composed of list,
			
 
				+    dictionary, string, number, numpy array, this
			
 
				+    function will parse input data recursively and stack number,
			
 
				+    numpy array and paddle.Tensor datas as batch datas. e.g. for
			
 
				+    following input data:
			
 
				+    [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
			
 
				+     {'image': np.array(shape=[3, 224, 224]), 'label': 3},
			
 
				+     {'image': np.array(shape=[3, 224, 224]), 'label': 4},
			
 
				+     {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
			
 
				+    
			
 
				+    
			
 
				+    This default collate function zipped each number and numpy array
			
 
				+    field together and stack each field as the batch field as follows:
			
 
				+    {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
			
 
				+    Args:  
			
 
				+        batch(list of sample data): batch should be a list of sample data.
			
 
				+    
			
 
				+    Returns:
			
 
				+        Batched data: batched each number, numpy array and paddle.Tensor
			
 
				+                      in input data.
			
 
				+    """
			
 
				+    sample = batch[0]
			
 
				+    if isinstance(sample, np.ndarray):
			
 
				+        batch = np.stack(batch, axis=0)
			
 
				+        return batch
			
 
				+    elif isinstance(sample, numbers.Number):
			
 
				+        batch = np.array(batch)
			
 
				+        return batch
			
 
				+    elif isinstance(sample, (str, bytes)):
			
 
				+        return batch
			
 
				+    elif isinstance(sample, Mapping):
			
 
				+        return {
			
 
				+            key: default_collate_fn([d[key] for d in batch])
			
 
				+            for key in sample
			
 
				+        }
			
 
				+    elif isinstance(sample, Sequence):
			
 
				+        sample_fields_num = len(sample)
			
 
				+        if not all(len(sample) == sample_fields_num for sample in iter(batch)):
			
 
				+            raise RuntimeError(
			
 
				+                "fileds number not same among samples in a batch")
			
 
				+        return [default_collate_fn(fields) for fields in zip(*batch)]
			
 
				+
			
 
				+    raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
			
 
				+                    "dict, list, number, but got {}".format(type(sample)))
			
--- a/paddlers/models/ppdet/engine/__init__.py
+++ b/paddlers/models/ppdet/engine/__init__.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from . import trainer
			
--- a/paddlers/models/ppdet/engine/callbacks.py
+++ b/paddlers/models/ppdet/engine/callbacks.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -182,7 +182,7 @@ class Checkpointer(Callback):
 
				                 ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
			
 
				                     save_name = str(
			
 
				                         epoch_id) if epoch_id != end_epoch - 1 else "model_final"
			
 
				-                    weight = self.weight
			
 
				+                    weight = self.weight.state_dict()
			
 
				             elif mode == 'eval':
			
 
				                 if 'save_best_model' in status and status['save_best_model']:
			
 
				                     for metric in self.model._metrics:
			
@@ -198,15 +198,25 @@ class Checkpointer(Callback):
 
				                                         "training iterations being too few or not " \
			
 
				                                         "loading the correct weights.")
			
 
				                             return
			
 
				-                        if map_res[key][0] > self.best_ap:
			
 
				+                        if map_res[key][0] >= self.best_ap:
			
 
				                             self.best_ap = map_res[key][0]
			
 
				                             save_name = 'best_model'
			
 
				-                            weight = self.weight
			
 
				+                            weight = self.weight.state_dict()
			
 
				                         logger.info("Best test {} ap is {:0.3f}.".format(
			
 
				                             key, self.best_ap))
			
 
				             if weight:
			
 
				-                save_model(weight, self.model.optimizer, self.save_dir,
			
 
				-                           save_name, epoch_id + 1)
			
 
				+                if self.model.use_ema:
			
 
				+                    # save model and ema_model
			
 
				+                    save_model(
			
 
				+                        status['weight'],
			
 
				+                        self.model.optimizer,
			
 
				+                        self.save_dir,
			
 
				+                        save_name,
			
 
				+                        epoch_id + 1,
			
 
				+                        ema_model=weight)
			
 
				+                else:
			
 
				+                    save_model(weight, self.model.optimizer, self.save_dir,
			
 
				+                               save_name, epoch_id + 1)
			
 
				 
			
 
				 
			
 
				 class WiferFaceEval(Callback):
			
@@ -251,7 +261,7 @@ class VisualDLWriter(Callback):
 
				                 for loss_name, loss_value in training_staus.get().items():
			
 
				                     self.vdl_writer.add_scalar(loss_name, loss_value,
			
 
				                                                self.vdl_loss_step)
			
 
				-                    self.vdl_loss_step += 1
			
 
				+                self.vdl_loss_step += 1
			
 
				             elif mode == 'test':
			
 
				                 ori_image = status['original_image']
			
 
				                 result_image = status['result_image']
			
@@ -279,6 +289,157 @@ class VisualDLWriter(Callback):
 
				                 self.vdl_mAP_step += 1
			
 
				 
			
 
				 
			
 
				+class WandbCallback(Callback):
			
 
				+    def __init__(self, model):
			
 
				+        super(WandbCallback, self).__init__(model)
			
 
				+
			
 
				+        try:
			
 
				+            import wandb
			
 
				+            self.wandb = wandb
			
 
				+        except Exception as e:
			
 
				+            logger.error('wandb not found, please install wandb. '
			
 
				+                         'Use: `pip install wandb`.')
			
 
				+            raise e
			
 
				+
			
 
				+        self.wandb_params = model.cfg.get('wandb', None)
			
 
				+        self.save_dir = os.path.join(self.model.cfg.save_dir,
			
 
				+                                     self.model.cfg.filename)
			
 
				+        if self.wandb_params is None:
			
 
				+            self.wandb_params = {}
			
 
				+        for k, v in model.cfg.items():
			
 
				+            if k.startswith("wandb_"):
			
 
				+                self.wandb_params.update({k.lstrip("wandb_"): v})
			
 
				+
			
 
				+        self._run = None
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            _ = self.run
			
 
				+            self.run.config.update(self.model.cfg)
			
 
				+            self.run.define_metric("epoch")
			
 
				+            self.run.define_metric("eval/*", step_metric="epoch")
			
 
				+
			
 
				+        self.best_ap = 0
			
 
				+
			
 
				+    @property
			
 
				+    def run(self):
			
 
				+        if self._run is None:
			
 
				+            if self.wandb.run is not None:
			
 
				+                logger.info(
			
 
				+                    "There is an ongoing wandb run which will be used"
			
 
				+                    "for logging. Please use `wandb.finish()` to end that"
			
 
				+                    "if the behaviour is not intended")
			
 
				+                self._run = self.wandb.run
			
 
				+            else:
			
 
				+                self._run = self.wandb.init(**self.wandb_params)
			
 
				+        return self._run
			
 
				+
			
 
				+    def save_model(self,
			
 
				+                   optimizer,
			
 
				+                   save_dir,
			
 
				+                   save_name,
			
 
				+                   last_epoch,
			
 
				+                   ema_model=None,
			
 
				+                   ap=None,
			
 
				+                   tags=None):
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            model_path = os.path.join(save_dir, save_name)
			
 
				+            metadata = {}
			
 
				+            metadata["last_epoch"] = last_epoch
			
 
				+            if ap:
			
 
				+                metadata["ap"] = ap
			
 
				+            if ema_model is None:
			
 
				+                ema_artifact = self.wandb.Artifact(
			
 
				+                    name="ema_model-{}".format(self.run.id),
			
 
				+                    type="model",
			
 
				+                    metadata=metadata)
			
 
				+                model_artifact = self.wandb.Artifact(
			
 
				+                    name="model-{}".format(self.run.id),
			
 
				+                    type="model",
			
 
				+                    metadata=metadata)
			
 
				+
			
 
				+                ema_artifact.add_file(model_path + ".pdema", name="model_ema")
			
 
				+                model_artifact.add_file(model_path + ".pdparams", name="model")
			
 
				+
			
 
				+                self.run.log_artifact(ema_artifact, aliases=tags)
			
 
				+                self.run.log_artfact(model_artifact, aliases=tags)
			
 
				+            else:
			
 
				+                model_artifact = self.wandb.Artifact(
			
 
				+                    name="model-{}".format(self.run.id),
			
 
				+                    type="model",
			
 
				+                    metadata=metadata)
			
 
				+                model_artifact.add_file(model_path + ".pdparams", name="model")
			
 
				+                self.run.log_artifact(model_artifact, aliases=tags)
			
 
				+
			
 
				+    def on_step_end(self, status):
			
 
				+
			
 
				+        mode = status['mode']
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            if mode == 'train':
			
 
				+                training_status = status['training_staus'].get()
			
 
				+                for k, v in training_status.items():
			
 
				+                    training_status[k] = float(v)
			
 
				+                metrics = {"train/" + k: v for k, v in training_status.items()}
			
 
				+                self.run.log(metrics)
			
 
				+
			
 
				+    def on_epoch_end(self, status):
			
 
				+        mode = status['mode']
			
 
				+        epoch_id = status['epoch_id']
			
 
				+        save_name = None
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            if mode == 'train':
			
 
				+                end_epoch = self.model.cfg.epoch
			
 
				+                if (
			
 
				+                        epoch_id + 1
			
 
				+                ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
			
 
				+                    save_name = str(
			
 
				+                        epoch_id) if epoch_id != end_epoch - 1 else "model_final"
			
 
				+                    tags = ["latest", "epoch_{}".format(epoch_id)]
			
 
				+                    self.save_model(
			
 
				+                        self.model.optimizer,
			
 
				+                        self.save_dir,
			
 
				+                        save_name,
			
 
				+                        epoch_id + 1,
			
 
				+                        self.model.use_ema,
			
 
				+                        tags=tags)
			
 
				+            if mode == 'eval':
			
 
				+                merged_dict = {}
			
 
				+                for metric in self.model._metrics:
			
 
				+                    for key, map_value in metric.get_results().items():
			
 
				+                        merged_dict["eval/{}-mAP".format(key)] = map_value[0]
			
 
				+                merged_dict["epoch"] = status["epoch_id"]
			
 
				+                self.run.log(merged_dict)
			
 
				+
			
 
				+                if 'save_best_model' in status and status['save_best_model']:
			
 
				+                    for metric in self.model._metrics:
			
 
				+                        map_res = metric.get_results()
			
 
				+                        if 'bbox' in map_res:
			
 
				+                            key = 'bbox'
			
 
				+                        elif 'keypoint' in map_res:
			
 
				+                            key = 'keypoint'
			
 
				+                        else:
			
 
				+                            key = 'mask'
			
 
				+                        if key not in map_res:
			
 
				+                            logger.warning("Evaluation results empty, this may be due to " \
			
 
				+                                        "training iterations being too few or not " \
			
 
				+                                        "loading the correct weights.")
			
 
				+                            return
			
 
				+                        if map_res[key][0] >= self.best_ap:
			
 
				+                            self.best_ap = map_res[key][0]
			
 
				+                            save_name = 'best_model'
			
 
				+                            tags = ["best", "epoch_{}".format(epoch_id)]
			
 
				+
			
 
				+                            self.save_model(
			
 
				+                                self.model.optimizer,
			
 
				+                                self.save_dir,
			
 
				+                                save_name,
			
 
				+                                last_epoch=epoch_id + 1,
			
 
				+                                ema_model=self.model.use_ema,
			
 
				+                                ap=self.best_ap,
			
 
				+                                tags=tags)
			
 
				+
			
 
				+    def on_train_end(self, status):
			
 
				+        self.run.finish()
			
 
				+
			
 
				+
			
 
				 class SniperProposalsGenerator(Callback):
			
 
				     def __init__(self, model):
			
 
				         super(SniperProposalsGenerator, self).__init__(model)
			
--- a/paddlers/models/ppdet/engine/env.py
+++ b/paddlers/models/ppdet/engine/env.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/engine/export_utils.py
+++ b/paddlers/models/ppdet/engine/export_utils.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -41,22 +41,26 @@ TRT_MIN_SUBGRAPH = {
 
				     'HigherHRNet': 3,
			
 
				     'HRNet': 3,
			
 
				     'DeepSORT': 3,
			
 
				+    'ByteTrack': 10,
			
 
				     'JDE': 10,
			
 
				     'FairMOT': 5,
			
 
				     'GFL': 16,
			
 
				     'PicoDet': 3,
			
 
				     'CenterNet': 5,
			
 
				     'TOOD': 5,
			
 
				+    'YOLOX': 8,
			
 
				 }
			
 
				 
			
 
				 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
			
 
				-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
			
 
				+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
			
 
				 
			
 
				 
			
 
				 def _prune_input_spec(input_spec, program, targets):
			
 
				     # try to prune static program to figure out pruned input spec
			
 
				     # so we perform following operations in static mode
			
 
				+    device = paddle.get_device()
			
 
				     paddle.enable_static()
			
 
				+    paddle.set_device(device)
			
 
				     pruned_input_spec = [{}]
			
 
				     program = program.clone()
			
 
				     program = program._prune(targets=targets)
			
@@ -67,7 +71,7 @@ def _prune_input_spec(input_spec, program, targets):
 
				             pruned_input_spec[0][name] = spec
			
 
				         except Exception:
			
 
				             pass
			
 
				-    paddle.disable_static()
			
 
				+    paddle.disable_static(place=device)
			
 
				     return pruned_input_spec
			
 
				 
			
 
				 
			
@@ -88,6 +92,7 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
 
				             if key == 'Resize':
			
 
				                 if int(image_shape[1]) != -1:
			
 
				                     value['target_size'] = image_shape[1:]
			
 
				+                value['interp'] = value.get('interp', 1)  # cv2.INTER_LINEAR
			
 
				             if fuse_normalize and key == 'NormalizeImage':
			
 
				                 continue
			
 
				             p.update(value)
			
@@ -120,12 +125,20 @@ def _dump_infer_config(config, path, image_shape, model):
 
				     setup_orderdict()
			
 
				     use_dynamic_shape = True if image_shape[2] == -1 else False
			
 
				     infer_cfg = OrderedDict({
			
 
				-        'mode': 'fluid',
			
 
				+        'mode': 'paddle',
			
 
				         'draw_threshold': 0.5,
			
 
				         'metric': config['metric'],
			
 
				         'use_dynamic_shape': use_dynamic_shape
			
 
				     })
			
 
				+    export_onnx = config.get('export_onnx', False)
			
 
				+    export_eb = config.get('export_eb', False)
			
 
				+
			
 
				     infer_arch = config['architecture']
			
 
				+    if 'RCNN' in infer_arch and export_onnx:
			
 
				+        logger.warning(
			
 
				+            "Exporting RCNN model to ONNX only support batch_size = 1")
			
 
				+        infer_cfg['export_onnx'] = True
			
 
				+        infer_cfg['export_eb'] = export_eb
			
 
				 
			
 
				     if infer_arch in MOT_ARCH:
			
 
				         if infer_arch == 'DeepSORT':
			
@@ -140,6 +153,12 @@ def _dump_infer_config(config, path, image_shape, model):
 
				             infer_cfg['min_subgraph_size'] = min_subgraph_size
			
 
				             arch_state = True
			
 
				             break
			
 
				+
			
 
				+    if infer_arch == 'YOLOX':
			
 
				+        infer_cfg['arch'] = infer_arch
			
 
				+        infer_cfg['min_subgraph_size'] = TRT_MIN_SUBGRAPH[infer_arch]
			
 
				+        arch_state = True
			
 
				+
			
 
				     if not arch_state:
			
 
				         logger.error(
			
 
				             'Architecture: {} is not supported for exporting model now.\n'.
			
@@ -165,12 +184,17 @@ def _dump_infer_config(config, path, image_shape, model):
 
				         reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape[1:])
			
 
				 
			
 
				     if infer_arch == 'PicoDet':
			
 
				-        infer_cfg['NMS'] = config['PicoHead']['nms']
			
 
				-        # In order to speed up the prediction, the threshold of nms
			
 
				+        if hasattr(config, 'export') and config['export'].get(
			
 
				+                'post_process',
			
 
				+                False) and not config['export'].get('benchmark', False):
			
 
				+            infer_cfg['arch'] = 'GFL'
			
 
				+        head_name = 'PicoHeadV2' if config['PicoHeadV2'] else 'PicoHead'
			
 
				+        infer_cfg['NMS'] = config[head_name]['nms']
			
 
				+        # In order to speed up the prediction, the threshold of nms 
			
 
				         # is adjusted here, which can be changed in infer_cfg.yml
			
 
				-        config['PicoHead']['nms']["score_threshold"] = 0.3
			
 
				-        config['PicoHead']['nms']["nms_threshold"] = 0.5
			
 
				-        infer_cfg['fpn_stride'] = config['PicoHead']['fpn_stride']
			
 
				+        config[head_name]['nms']["score_threshold"] = 0.3
			
 
				+        config[head_name]['nms']["nms_threshold"] = 0.5
			
 
				+        infer_cfg['fpn_stride'] = config[head_name]['fpn_stride']
			
 
				 
			
 
				     yaml.dump(infer_cfg, open(path, 'w'))
			
 
				     logger.info("Export inference config file to {}".format(os.path.join(path)))
			
--- a/paddlers/models/ppdet/engine/tracker.py
+++ b/paddlers/models/ppdet/engine/tracker.py
@@ -17,27 +17,33 @@ from __future__ import division
 
				 from __future__ import print_function
			
 
				 
			
 
				 import os
			
 
				-import cv2
			
 
				 import glob
			
 
				 import re
			
 
				 import paddle
			
 
				+import paddle.nn as nn
			
 
				 import numpy as np
			
 
				-import os.path as osp
			
 
				+from tqdm import tqdm
			
 
				 from collections import defaultdict
			
 
				 
			
 
				 from paddlers.models.ppdet.core.workspace import create
			
 
				 from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
			
 
				 from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
			
 
				 from paddlers.models.ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results
			
 
				-
			
 
				-from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric
			
 
				-from paddlers.models.ppdet.metrics import MCMOTMetric
			
 
				+from paddlers.models.ppdet.modeling.mot.tracker import JDETracker, DeepSORTTracker, OCSORTTracker
			
 
				+from paddlers.models.ppdet.modeling.architectures import YOLOX
			
 
				+from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric, MCMOTMetric
			
 
				+import paddlers.models.ppdet.utils.stats as stats
			
 
				 
			
 
				 from .callbacks import Callback, ComposeCallback
			
 
				 
			
 
				 from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				 logger = setup_logger(__name__)
			
 
				 
			
 
				+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
			
 
				+MOT_ARCH_JDE = ['JDE', 'FairMOT']
			
 
				+MOT_ARCH_SDE = ['DeepSORT', 'ByteTrack']
			
 
				+MOT_DATA_TYPE = ['mot', 'mcmot', 'kitti']
			
 
				+
			
 
				 __all__ = ['Tracker']
			
 
				 
			
 
				 
			
@@ -55,6 +61,12 @@ class Tracker(object):
 
				         # build model
			
 
				         self.model = create(cfg.architecture)
			
 
				 
			
 
				+        if isinstance(self.model.detector, YOLOX):
			
 
				+            for k, m in self.model.named_sublayers():
			
 
				+                if isinstance(m, nn.BatchNorm2D):
			
 
				+                    m._epsilon = 1e-3  # for amp(fp16)
			
 
				+                    m._momentum = 0.97  # 0.03 in pytorch
			
 
				+
			
 
				         self.status = {}
			
 
				         self.start_epoch = 0
			
 
				 
			
@@ -108,11 +120,15 @@ class Tracker(object):
 
				         load_weight(self.model, weights, self.optimizer)
			
 
				 
			
 
				     def load_weights_sde(self, det_weights, reid_weights):
			
 
				-        if self.model.detector:
			
 
				+        with_detector = self.model.detector is not None
			
 
				+        with_reid = self.model.reid is not None
			
 
				+
			
 
				+        if with_detector:
			
 
				             load_weight(self.model.detector, det_weights)
			
 
				-            load_weight(self.model.reid, reid_weights)
			
 
				+            if with_reid:
			
 
				+                load_weight(self.model.reid, reid_weights)
			
 
				         else:
			
 
				-            load_weight(self.model.reid, reid_weights, self.optimizer)
			
 
				+            load_weight(self.model.reid, reid_weights)
			
 
				 
			
 
				     def _eval_seq_jde(self,
			
 
				                       dataloader,
			
@@ -131,11 +147,8 @@ class Tracker(object):
 
				         self.model.eval()
			
 
				         results = defaultdict(list)  # support single class and multi classes
			
 
				 
			
 
				-        for step_id, data in enumerate(dataloader):
			
 
				+        for step_id, data in enumerate(tqdm(dataloader)):
			
 
				             self.status['step_id'] = step_id
			
 
				-            if frame_id % 40 == 0:
			
 
				-                logger.info('Processing frame {} ({:.2f} fps)'.format(
			
 
				-                    frame_id, 1. / max(1e-5, timer.average_time)))
			
 
				             # forward
			
 
				             timer.tic()
			
 
				             pred_dets, pred_embs = self.model(data)
			
@@ -184,24 +197,23 @@ class Tracker(object):
 
				         if save_dir:
			
 
				             if not os.path.exists(save_dir): os.makedirs(save_dir)
			
 
				         use_detector = False if not self.model.detector else True
			
 
				+        use_reid = False if not self.model.reid else True
			
 
				 
			
 
				         timer = MOTTimer()
			
 
				         results = defaultdict(list)
			
 
				         frame_id = 0
			
 
				         self.status['mode'] = 'track'
			
 
				         self.model.eval()
			
 
				-        self.model.reid.eval()
			
 
				+        if use_reid:
			
 
				+            self.model.reid.eval()
			
 
				         if not use_detector:
			
 
				             dets_list = load_det_results(det_file, len(dataloader))
			
 
				             logger.info('Finish loading detection results file {}.'.format(
			
 
				                 det_file))
			
 
				 
			
 
				-        for step_id, data in enumerate(dataloader):
			
 
				+        tracker = self.model.tracker
			
 
				+        for step_id, data in enumerate(tqdm(dataloader)):
			
 
				             self.status['step_id'] = step_id
			
 
				-            if frame_id % 40 == 0:
			
 
				-                logger.info('Processing frame {} ({:.2f} fps)'.format(
			
 
				-                    frame_id, 1. / max(1e-5, timer.average_time)))
			
 
				-
			
 
				             ori_image = data['ori_image']  # [bs, H, W, 3]
			
 
				             ori_image_shape = data['ori_image'].shape[1:3]
			
 
				             # ori_image_shape: [H, W]
			
@@ -214,7 +226,7 @@ class Tracker(object):
 
				             scale_factor = data['scale_factor'][0].numpy()
			
 
				 
			
 
				             empty_detections = False
			
 
				-            # when it has no detected bboxes, will not inference reid model
			
 
				+            # when it has no detected bboxes, will not inference reid model 
			
 
				             # and if visualize, use original image instead
			
 
				 
			
 
				             # forward
			
@@ -240,7 +252,7 @@ class Tracker(object):
 
				                 outs['bbox'] = outs['bbox'].numpy()
			
 
				                 outs['bbox_num'] = outs['bbox_num'].numpy()
			
 
				 
			
 
				-                if outs['bbox_num'] > 0 and empty_detections == False:
			
 
				+                if len(outs['bbox']) > 0 and empty_detections == False:
			
 
				                     # detector outputs: pred_cls_ids, pred_scores, pred_bboxes
			
 
				                     pred_cls_ids = outs['bbox'][:, 0:1]
			
 
				                     pred_scores = outs['bbox'][:, 1:2]
			
@@ -249,13 +261,15 @@ class Tracker(object):
 
				                         # with LetterBoxResize and JDEBBoxPostProcess.
			
 
				                         #
			
 
				                         # 'scaled' means whether the coords after detector outputs
			
 
				-                        # have been scaled back to the original image, set True
			
 
				+                        # have been scaled back to the original image, set True 
			
 
				                         # in general detector, set False in JDE YOLOv3.
			
 
				                         pred_bboxes = scale_coords(outs['bbox'][:, 2:],
			
 
				                                                    input_shape, im_shape,
			
 
				                                                    scale_factor)
			
 
				                     else:
			
 
				                         pred_bboxes = outs['bbox'][:, 2:]
			
 
				+                    pred_dets_old = np.concatenate(
			
 
				+                        (pred_cls_ids, pred_scores, pred_bboxes), axis=1)
			
 
				                 else:
			
 
				                     logger.warning(
			
 
				                         'Frame {} has not detected object, try to modify score threshold.'.
			
@@ -281,52 +295,104 @@ class Tracker(object):
 
				                 # thus will not inference reid model
			
 
				                 continue
			
 
				 
			
 
				-            pred_scores = pred_scores[keep_idx[0]]
			
 
				             pred_cls_ids = pred_cls_ids[keep_idx[0]]
			
 
				-            pred_tlwhs = np.concatenate(
			
 
				-                (pred_xyxys[:, 0:2],
			
 
				-                 pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
			
 
				-                axis=1)
			
 
				+            pred_scores = pred_scores[keep_idx[0]]
			
 
				             pred_dets = np.concatenate(
			
 
				-                (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
			
 
				-
			
 
				-            tracker = self.model.tracker
			
 
				-            crops = get_crops(
			
 
				-                pred_xyxys,
			
 
				-                ori_image,
			
 
				-                w=tracker.input_size[0],
			
 
				-                h=tracker.input_size[1])
			
 
				-            crops = paddle.to_tensor(crops)
			
 
				-
			
 
				-            data.update({'crops': crops})
			
 
				-            pred_embs = self.model(data).numpy()
			
 
				-
			
 
				-            tracker.predict()
			
 
				-            online_targets = tracker.update(pred_dets, pred_embs)
			
 
				-
			
 
				-            online_tlwhs, online_scores, online_ids = [], [], []
			
 
				-            for t in online_targets:
			
 
				-                if not t.is_confirmed() or t.time_since_update > 1:
			
 
				-                    continue
			
 
				-                tlwh = t.to_tlwh()
			
 
				-                tscore = t.score
			
 
				-                tid = t.track_id
			
 
				-                if tscore < draw_threshold: continue
			
 
				-                if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
			
 
				-                if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
			
 
				-                        3] > tracker.vertical_ratio:
			
 
				-                    continue
			
 
				-                online_tlwhs.append(tlwh)
			
 
				-                online_scores.append(tscore)
			
 
				-                online_ids.append(tid)
			
 
				-            timer.toc()
			
 
				+                (pred_cls_ids, pred_scores, pred_xyxys), axis=1)
			
 
				+
			
 
				+            if use_reid:
			
 
				+                crops = get_crops(
			
 
				+                    pred_xyxys,
			
 
				+                    ori_image,
			
 
				+                    w=tracker.input_size[0],
			
 
				+                    h=tracker.input_size[1])
			
 
				+                crops = paddle.to_tensor(crops)
			
 
				+
			
 
				+                data.update({'crops': crops})
			
 
				+                pred_embs = self.model(data)['embeddings'].numpy()
			
 
				+            else:
			
 
				+                pred_embs = None
			
 
				 
			
 
				-            # save results
			
 
				-            results[0].append(
			
 
				-                (frame_id + 1, online_tlwhs, online_scores, online_ids))
			
 
				-            save_vis_results(data, frame_id, online_ids, online_tlwhs,
			
 
				-                             online_scores, timer.average_time, show_image,
			
 
				-                             save_dir, self.cfg.num_classes)
			
 
				+            if isinstance(tracker, DeepSORTTracker):
			
 
				+                online_tlwhs, online_scores, online_ids = [], [], []
			
 
				+                tracker.predict()
			
 
				+                online_targets = tracker.update(pred_dets, pred_embs)
			
 
				+                for t in online_targets:
			
 
				+                    if not t.is_confirmed() or t.time_since_update > 1:
			
 
				+                        continue
			
 
				+                    tlwh = t.to_tlwh()
			
 
				+                    tscore = t.score
			
 
				+                    tid = t.track_id
			
 
				+                    if tscore < draw_threshold: continue
			
 
				+                    if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
			
 
				+                    if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
			
 
				+                            3] > tracker.vertical_ratio:
			
 
				+                        continue
			
 
				+                    online_tlwhs.append(tlwh)
			
 
				+                    online_scores.append(tscore)
			
 
				+                    online_ids.append(tid)
			
 
				+                timer.toc()
			
 
				+
			
 
				+                # save results
			
 
				+                results[0].append(
			
 
				+                    (frame_id + 1, online_tlwhs, online_scores, online_ids))
			
 
				+                save_vis_results(data, frame_id, online_ids, online_tlwhs,
			
 
				+                                 online_scores, timer.average_time, show_image,
			
 
				+                                 save_dir, self.cfg.num_classes)
			
 
				+
			
 
				+            elif isinstance(tracker, JDETracker):
			
 
				+                # trick hyperparams only used for MOTChallenge (MOT17, MOT20) Test-set
			
 
				+                tracker.track_buffer, tracker.conf_thres = get_trick_hyperparams(
			
 
				+                    seq_name, tracker.track_buffer, tracker.conf_thres)
			
 
				+
			
 
				+                online_targets_dict = tracker.update(pred_dets_old, pred_embs)
			
 
				+                online_tlwhs = defaultdict(list)
			
 
				+                online_scores = defaultdict(list)
			
 
				+                online_ids = defaultdict(list)
			
 
				+                for cls_id in range(self.cfg.num_classes):
			
 
				+                    online_targets = online_targets_dict[cls_id]
			
 
				+                    for t in online_targets:
			
 
				+                        tlwh = t.tlwh
			
 
				+                        tid = t.track_id
			
 
				+                        tscore = t.score
			
 
				+                        if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
			
 
				+                        if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
			
 
				+                                3] > tracker.vertical_ratio:
			
 
				+                            continue
			
 
				+                        online_tlwhs[cls_id].append(tlwh)
			
 
				+                        online_ids[cls_id].append(tid)
			
 
				+                        online_scores[cls_id].append(tscore)
			
 
				+                    # save results
			
 
				+                    results[cls_id].append(
			
 
				+                        (frame_id + 1, online_tlwhs[cls_id],
			
 
				+                         online_scores[cls_id], online_ids[cls_id]))
			
 
				+                timer.toc()
			
 
				+                save_vis_results(data, frame_id, online_ids, online_tlwhs,
			
 
				+                                 online_scores, timer.average_time, show_image,
			
 
				+                                 save_dir, self.cfg.num_classes)
			
 
				+            elif isinstance(tracker, OCSORTTracker):
			
 
				+                # OC_SORT Tracker
			
 
				+                online_targets = tracker.update(pred_dets_old, pred_embs)
			
 
				+                online_tlwhs = []
			
 
				+                online_ids = []
			
 
				+                online_scores = []
			
 
				+                for t in online_targets:
			
 
				+                    tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]]
			
 
				+                    tscore = float(t[4])
			
 
				+                    tid = int(t[5])
			
 
				+                    if tlwh[2] * tlwh[3] > 0:
			
 
				+                        online_tlwhs.append(tlwh)
			
 
				+                        online_ids.append(tid)
			
 
				+                        online_scores.append(tscore)
			
 
				+                timer.toc()
			
 
				+                # save results
			
 
				+                results[0].append(
			
 
				+                    (frame_id + 1, online_tlwhs, online_scores, online_ids))
			
 
				+                save_vis_results(data, frame_id, online_ids, online_tlwhs,
			
 
				+                                 online_scores, timer.average_time, show_image,
			
 
				+                                 save_dir, self.cfg.num_classes)
			
 
				+            else:
			
 
				+                raise ValueError(tracker)
			
 
				             frame_id += 1
			
 
				 
			
 
				         return results, frame_id, timer.average_time, timer.calls
			
@@ -345,10 +411,10 @@ class Tracker(object):
 
				         if not os.path.exists(output_dir): os.makedirs(output_dir)
			
 
				         result_root = os.path.join(output_dir, 'mot_results')
			
 
				         if not os.path.exists(result_root): os.makedirs(result_root)
			
 
				-        assert data_type in ['mot', 'mcmot', 'kitti'], \
			
 
				+        assert data_type in MOT_DATA_TYPE, \
			
 
				             "data_type should be 'mot', 'mcmot' or 'kitti'"
			
 
				-        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
			
 
				-            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
			
 
				+        assert model_type in MOT_ARCH, \
			
 
				+            "model_type should be 'JDE', 'DeepSORT', 'FairMOT' or 'ByteTrack'"
			
 
				 
			
 
				         # run tracking
			
 
				         n_frame = 0
			
@@ -371,7 +437,7 @@ class Tracker(object):
 
				 
			
 
				             save_dir = os.path.join(output_dir, 'mot_outputs',
			
 
				                                     seq) if save_images or save_videos else None
			
 
				-            logger.info('start seq: {}'.format(seq))
			
 
				+            logger.info('Evaluate seq: {}'.format(seq))
			
 
				 
			
 
				             self.dataset.set_images(self.get_infer_images(infer_dir))
			
 
				             dataloader = create('EvalMOTReader')(self.dataset, 0)
			
@@ -379,13 +445,13 @@ class Tracker(object):
 
				             result_filename = os.path.join(result_root, '{}.txt'.format(seq))
			
 
				 
			
 
				             with paddle.no_grad():
			
 
				-                if model_type in ['JDE', 'FairMOT']:
			
 
				+                if model_type in MOT_ARCH_JDE:
			
 
				                     results, nf, ta, tc = self._eval_seq_jde(
			
 
				                         dataloader,
			
 
				                         save_dir=save_dir,
			
 
				                         show_image=show_image,
			
 
				                         frame_rate=frame_rate)
			
 
				-                elif model_type in ['DeepSORT']:
			
 
				+                elif model_type in MOT_ARCH_SDE:
			
 
				                     results, nf, ta, tc = self._eval_seq_sde(
			
 
				                         dataloader,
			
 
				                         save_dir=save_dir,
			
@@ -412,7 +478,6 @@ class Tracker(object):
 
				                 os.system(cmd_str)
			
 
				                 logger.info('Save video in {}.'.format(output_video_path))
			
 
				 
			
 
				-            logger.info('Evaluate seq: {}'.format(seq))
			
 
				             # update metrics
			
 
				             for metric in self._metrics:
			
 
				                 metric.update(data_root, seq, data_type, result_root,
			
@@ -471,12 +536,12 @@ class Tracker(object):
 
				         if not os.path.exists(output_dir): os.makedirs(output_dir)
			
 
				         result_root = os.path.join(output_dir, 'mot_results')
			
 
				         if not os.path.exists(result_root): os.makedirs(result_root)
			
 
				-        assert data_type in ['mot', 'mcmot', 'kitti'], \
			
 
				+        assert data_type in MOT_DATA_TYPE, \
			
 
				             "data_type should be 'mot', 'mcmot' or 'kitti'"
			
 
				-        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
			
 
				-            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
			
 
				+        assert model_type in MOT_ARCH, \
			
 
				+            "model_type should be 'JDE', 'DeepSORT', 'FairMOT' or 'ByteTrack'"
			
 
				 
			
 
				-        # run tracking
			
 
				+        # run tracking        
			
 
				         if video_file:
			
 
				             seq = video_file.split('/')[-1].split('.')[0]
			
 
				             self.dataset.set_video(video_file, frame_rate)
			
@@ -504,14 +569,14 @@ class Tracker(object):
 
				             frame_rate = self.dataset.frame_rate
			
 
				 
			
 
				         with paddle.no_grad():
			
 
				-            if model_type in ['JDE', 'FairMOT']:
			
 
				+            if model_type in MOT_ARCH_JDE:
			
 
				                 results, nf, ta, tc = self._eval_seq_jde(
			
 
				                     dataloader,
			
 
				                     save_dir=save_dir,
			
 
				                     show_image=show_image,
			
 
				                     frame_rate=frame_rate,
			
 
				                     draw_threshold=draw_threshold)
			
 
				-            elif model_type in ['DeepSORT']:
			
 
				+            elif model_type in MOT_ARCH_SDE:
			
 
				                 results, nf, ta, tc = self._eval_seq_sde(
			
 
				                     dataloader,
			
 
				                     save_dir=save_dir,
			
@@ -535,3 +600,35 @@ class Tracker(object):
 
				 
			
 
				         write_mot_results(result_filename, results, data_type,
			
 
				                           self.cfg.num_classes)
			
 
				+
			
 
				+
			
 
				+def get_trick_hyperparams(video_name, ori_buffer, ori_thresh):
			
 
				+    if video_name[:3] != 'MOT':
			
 
				+        # only used for MOTChallenge (MOT17, MOT20) Test-set
			
 
				+        return ori_buffer, ori_thresh
			
 
				+
			
 
				+    video_name = video_name[:8]
			
 
				+    if 'MOT17-05' in video_name:
			
 
				+        track_buffer = 14
			
 
				+    elif 'MOT17-13' in video_name:
			
 
				+        track_buffer = 25
			
 
				+    else:
			
 
				+        track_buffer = ori_buffer
			
 
				+
			
 
				+    if 'MOT17-01' in video_name:
			
 
				+        track_thresh = 0.65
			
 
				+    elif 'MOT17-06' in video_name:
			
 
				+        track_thresh = 0.65
			
 
				+    elif 'MOT17-12' in video_name:
			
 
				+        track_thresh = 0.7
			
 
				+    elif 'MOT17-14' in video_name:
			
 
				+        track_thresh = 0.67
			
 
				+    else:
			
 
				+        track_thresh = ori_thresh
			
 
				+
			
 
				+    if 'MOT20-06' in video_name or 'MOT20-08' in video_name:
			
 
				+        track_thresh = 0.3
			
 
				+    else:
			
 
				+        track_thresh = ori_thresh
			
 
				+
			
 
				+    return track_buffer, ori_thresh
			
--- a/paddlers/models/ppdet/engine/trainer.py
+++ b/paddlers/models/ppdet/engine/trainer.py
@@ -20,38 +20,44 @@ import os
 
				 import sys
			
 
				 import copy
			
 
				 import time
			
 
				+from tqdm import tqdm
			
 
				 
			
 
				 import numpy as np
			
 
				 import typing
			
 
				-from PIL import Image, ImageOps
			
 
				+from PIL import Image, ImageOps, ImageFile
			
 
				+
			
 
				+ImageFile.LOAD_TRUNCATED_IMAGES = True
			
 
				 
			
 
				 import paddle
			
 
				+import paddle.nn as nn
			
 
				 import paddle.distributed as dist
			
 
				 from paddle.distributed import fleet
			
 
				-from paddle import amp
			
 
				 from paddle.static import InputSpec
			
 
				 from paddlers.models.ppdet.optimizer import ModelEMA
			
 
				 
			
 
				 from paddlers.models.ppdet.core.workspace import create
			
 
				-from paddlers.models.ppdet.modeling.architectures.meta_arch import BaseArch
			
 
				 from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
			
 
				 from paddlers.models.ppdet.utils.visualizer import visualize_results, save_result
			
 
				 from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
			
 
				 from paddlers.models.ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
			
 
				 from paddlers.models.ppdet.data.source.sniper_coco import SniperCOCODataSet
			
 
				 from paddlers.models.ppdet.data.source.category import get_categories
			
 
				-from paddlers.models.ppdet.utils import stats
			
 
				+import paddlers.models.ppdet.utils.stats as stats
			
 
				+from paddlers.models.ppdet.utils.fuse_utils import fuse_conv_bn
			
 
				 from paddlers.models.ppdet.utils import profiler
			
 
				+from paddlers.models.ppdet.modeling.post_process import multiclass_nms
			
 
				 
			
 
				-from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator
			
 
				+from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator, WandbCallback
			
 
				 from .export_utils import _dump_infer_config, _prune_input_spec
			
 
				 
			
 
				+from paddle.distributed.fleet.utils.hybrid_parallel_util import fused_allreduce_gradients
			
 
				+
			
 
				 from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				 logger = setup_logger('ppdet.engine')
			
 
				 
			
 
				 __all__ = ['Trainer']
			
 
				 
			
 
				-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
			
 
				+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
			
 
				 
			
 
				 
			
 
				 class Trainer(object):
			
@@ -62,19 +68,30 @@ class Trainer(object):
 
				         self.mode = mode.lower()
			
 
				         self.optimizer = None
			
 
				         self.is_loaded_weights = False
			
 
				+        self.use_amp = self.cfg.get('amp', False)
			
 
				+        self.amp_level = self.cfg.get('amp_level', 'O1')
			
 
				+        self.custom_white_list = self.cfg.get('custom_white_list', None)
			
 
				+        self.custom_black_list = self.cfg.get('custom_black_list', None)
			
 
				 
			
 
				         # build data loader
			
 
				+        capital_mode = self.mode.capitalize()
			
 
				         if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']:
			
 
				-            self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
			
 
				+            self.dataset = self.cfg['{}MOTDataset'.format(
			
 
				+                capital_mode)] = create('{}MOTDataset'.format(capital_mode))()
			
 
				         else:
			
 
				-            self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
			
 
				+            self.dataset = self.cfg['{}Dataset'.format(capital_mode)] = create(
			
 
				+                '{}Dataset'.format(capital_mode))()
			
 
				 
			
 
				         if cfg.architecture == 'DeepSORT' and self.mode == 'train':
			
 
				             logger.error('DeepSORT has no need of training on mot dataset.')
			
 
				             sys.exit(1)
			
 
				 
			
 
				+        if cfg.architecture == 'FairMOT' and self.mode == 'eval':
			
 
				+            images = self.parse_mot_images(cfg)
			
 
				+            self.dataset.set_images(images)
			
 
				+
			
 
				         if self.mode == 'train':
			
 
				-            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
			
 
				+            self.loader = create('{}Reader'.format(capital_mode))(
			
 
				                 self.dataset, cfg.worker_num)
			
 
				 
			
 
				         if cfg.architecture == 'JDE' and self.mode == 'train':
			
@@ -94,41 +111,73 @@ class Trainer(object):
 
				             self.model = self.cfg.model
			
 
				             self.is_loaded_weights = True
			
 
				 
			
 
				-        #normalize params for deploy
			
 
				-        self.model.load_meanstd(cfg['TestReader']['sample_transforms'])
			
 
				+        if cfg.architecture == 'YOLOX':
			
 
				+            for k, m in self.model.named_sublayers():
			
 
				+                if isinstance(m, nn.BatchNorm2D):
			
 
				+                    m._epsilon = 1e-3  # for amp(fp16)
			
 
				+                    m._momentum = 0.97  # 0.03 in pytorch
			
 
				 
			
 
				-        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
			
 
				-        if self.use_ema:
			
 
				-            ema_decay = self.cfg.get('ema_decay', 0.9998)
			
 
				-            cycle_epoch = self.cfg.get('cycle_epoch', -1)
			
 
				-            self.ema = ModelEMA(
			
 
				-                self.model,
			
 
				-                decay=ema_decay,
			
 
				-                use_thres_step=True,
			
 
				-                cycle_epoch=cycle_epoch)
			
 
				+        #normalize params for deploy
			
 
				+        if 'slim' in cfg and cfg['slim_type'] == 'OFA':
			
 
				+            self.model.model.load_meanstd(cfg['TestReader'][
			
 
				+                'sample_transforms'])
			
 
				+        elif 'slim' in cfg and cfg['slim_type'] == 'Distill':
			
 
				+            self.model.student_model.load_meanstd(cfg['TestReader'][
			
 
				+                'sample_transforms'])
			
 
				+        elif 'slim' in cfg and cfg[
			
 
				+                'slim_type'] == 'DistillPrune' and self.mode == 'train':
			
 
				+            self.model.student_model.load_meanstd(cfg['TestReader'][
			
 
				+                'sample_transforms'])
			
 
				+        else:
			
 
				+            self.model.load_meanstd(cfg['TestReader']['sample_transforms'])
			
 
				 
			
 
				         # EvalDataset build with BatchSampler to evaluate in single device
			
 
				         # TODO: multi-device evaluate
			
 
				         if self.mode == 'eval':
			
 
				-            self._eval_batch_sampler = paddle.io.BatchSampler(
			
 
				-                self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
			
 
				-            reader_name = '{}Reader'.format(self.mode.capitalize())
			
 
				-            # If metric is VOC, need to be set collate_batch=False.
			
 
				-            if cfg.metric == 'VOC':
			
 
				-                cfg[reader_name]['collate_batch'] = False
			
 
				-            self.loader = create(reader_name)(self.dataset, cfg.worker_num,
			
 
				-                                              self._eval_batch_sampler)
			
 
				+            if cfg.architecture == 'FairMOT':
			
 
				+                self.loader = create('EvalMOTReader')(self.dataset, 0)
			
 
				+            else:
			
 
				+                self._eval_batch_sampler = paddle.io.BatchSampler(
			
 
				+                    self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
			
 
				+                reader_name = '{}Reader'.format(self.mode.capitalize())
			
 
				+                # If metric is VOC, need to be set collate_batch=False.
			
 
				+                if cfg.metric == 'VOC':
			
 
				+                    cfg[reader_name]['collate_batch'] = False
			
 
				+                self.loader = create(reader_name)(self.dataset, cfg.worker_num,
			
 
				+                                                  self._eval_batch_sampler)
			
 
				         # TestDataset build after user set images, skip loader creation here
			
 
				 
			
 
				         # build optimizer in train mode
			
 
				         if self.mode == 'train':
			
 
				             steps_per_epoch = len(self.loader)
			
 
				+            if steps_per_epoch < 1:
			
 
				+                logger.warning(
			
 
				+                    "Samples in dataset are less than batch_size, please set smaller batch_size in TrainReader."
			
 
				+                )
			
 
				             self.lr = create('LearningRate')(steps_per_epoch)
			
 
				             self.optimizer = create('OptimizerBuilder')(self.lr, self.model)
			
 
				 
			
 
				-        if self.cfg.get('unstructured_prune'):
			
 
				-            self.pruner = create('UnstructuredPruner')(self.model,
			
 
				-                                                       steps_per_epoch)
			
 
				+            # Unstructured pruner is only enabled in the train mode.
			
 
				+            if self.cfg.get('unstructured_prune'):
			
 
				+                self.pruner = create('UnstructuredPruner')(self.model,
			
 
				+                                                           steps_per_epoch)
			
 
				+        if self.use_amp and self.amp_level == 'O2':
			
 
				+            self.model, self.optimizer = paddle.amp.decorate(
			
 
				+                models=self.model,
			
 
				+                optimizers=self.optimizer,
			
 
				+                level=self.amp_level)
			
 
				+        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
			
 
				+        if self.use_ema:
			
 
				+            ema_decay = self.cfg.get('ema_decay', 0.9998)
			
 
				+            ema_decay_type = self.cfg.get('ema_decay_type', 'threshold')
			
 
				+            cycle_epoch = self.cfg.get('cycle_epoch', -1)
			
 
				+            ema_black_list = self.cfg.get('ema_black_list', None)
			
 
				+            self.ema = ModelEMA(
			
 
				+                self.model,
			
 
				+                decay=ema_decay,
			
 
				+                ema_decay_type=ema_decay_type,
			
 
				+                cycle_epoch=cycle_epoch,
			
 
				+                ema_black_list=ema_black_list)
			
 
				 
			
 
				         self._nranks = dist.get_world_size()
			
 
				         self._local_rank = dist.get_rank()
			
@@ -152,6 +201,8 @@ class Trainer(object):
 
				                 self._callbacks.append(VisualDLWriter(self))
			
 
				             if self.cfg.get('save_proposals', False):
			
 
				                 self._callbacks.append(SniperProposalsGenerator(self))
			
 
				+            if self.cfg.get('use_wandb', False) or 'wandb' in self.cfg:
			
 
				+                self._callbacks.append(WandbCallback(self))
			
 
				             self._compose_callback = ComposeCallback(self._callbacks)
			
 
				         elif self.mode == 'eval':
			
 
				             self._callbacks = [LogPrinter(self)]
			
@@ -172,7 +223,7 @@ class Trainer(object):
 
				         classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False
			
 
				         if self.cfg.metric == 'COCO' or self.cfg.metric == "SNIPERCOCO":
			
 
				             # TODO: bias should be unified
			
 
				-            bias = self.cfg['bias'] if 'bias' in self.cfg else 0
			
 
				+            bias = 1 if self.cfg.get('bias', False) else 0
			
 
				             output_eval = self.cfg['output_eval'] \
			
 
				                 if 'output_eval' in self.cfg else None
			
 
				             save_prediction_only = self.cfg.get('save_prediction_only', False)
			
@@ -184,13 +235,14 @@ class Trainer(object):
 
				 
			
 
				             # when do validation in train, annotation file should be get from
			
 
				             # EvalReader instead of self.dataset(which is TrainReader)
			
 
				-            anno_file = self.dataset.get_anno()
			
 
				-            dataset = self.dataset
			
 
				             if self.mode == 'train' and validate:
			
 
				                 eval_dataset = self.cfg['EvalDataset']
			
 
				                 eval_dataset.check_or_download_dataset()
			
 
				                 anno_file = eval_dataset.get_anno()
			
 
				                 dataset = eval_dataset
			
 
				+            else:
			
 
				+                dataset = self.dataset
			
 
				+                anno_file = dataset.get_anno()
			
 
				 
			
 
				             IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox'
			
 
				             if self.cfg.metric == "COCO":
			
@@ -222,11 +274,7 @@ class Trainer(object):
 
				             output_eval = self.cfg['output_eval'] \
			
 
				                 if 'output_eval' in self.cfg else None
			
 
				             save_prediction_only = self.cfg.get('save_prediction_only', False)
			
 
				-
			
 
				-            # pass clsid2catid info to metric instance to avoid multiple loading
			
 
				-            # annotation file
			
 
				-            clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
			
 
				-                                if self.mode == 'eval' else None
			
 
				+            imid2path = self.cfg.get('imid2path', None)
			
 
				 
			
 
				             # when do validation in train, annotation file should be get from
			
 
				             # EvalReader instead of self.dataset(which is TrainReader)
			
@@ -239,19 +287,25 @@ class Trainer(object):
 
				             self._metrics = [
			
 
				                 RBoxMetric(
			
 
				                     anno_file=anno_file,
			
 
				-                    clsid2catid=clsid2catid,
			
 
				                     classwise=classwise,
			
 
				                     output_eval=output_eval,
			
 
				                     bias=bias,
			
 
				-                    save_prediction_only=save_prediction_only)
			
 
				+                    save_prediction_only=save_prediction_only,
			
 
				+                    imid2path=imid2path)
			
 
				             ]
			
 
				         elif self.cfg.metric == 'VOC':
			
 
				+            output_eval = self.cfg['output_eval'] \
			
 
				+                if 'output_eval' in self.cfg else None
			
 
				+            save_prediction_only = self.cfg.get('save_prediction_only', False)
			
 
				+
			
 
				             self._metrics = [
			
 
				                 VOCMetric(
			
 
				                     label_list=self.dataset.get_label_list(),
			
 
				                     class_num=self.cfg.num_classes,
			
 
				                     map_type=self.cfg.map_type,
			
 
				-                    classwise=classwise)
			
 
				+                    classwise=classwise,
			
 
				+                    output_eval=output_eval,
			
 
				+                    save_prediction_only=save_prediction_only)
			
 
				             ]
			
 
				         elif self.cfg.metric == 'WiderFace':
			
 
				             multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True
			
@@ -334,19 +388,29 @@ class Trainer(object):
 
				             self.start_epoch = load_weight(self.model.student_model, weights,
			
 
				                                            self.optimizer)
			
 
				         else:
			
 
				-            self.start_epoch = load_weight(self.model, weights, self.optimizer)
			
 
				+            self.start_epoch = load_weight(self.model, weights, self.optimizer,
			
 
				+                                           self.ema if self.use_ema else None)
			
 
				         logger.debug("Resume weights of epoch {}".format(self.start_epoch))
			
 
				 
			
 
				     def train(self, validate=False):
			
 
				         assert self.mode == 'train', "Model not in 'train' mode"
			
 
				         Init_mark = False
			
 
				+        if validate:
			
 
				+            self.cfg['EvalDataset'] = self.cfg.EvalDataset = create(
			
 
				+                "EvalDataset")()
			
 
				 
			
 
				-        sync_bn = (getattr(self.cfg, 'norm_type', None) in [None, 'sync_bn'] and
			
 
				+        model = self.model
			
 
				+        sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
			
 
				                    self.cfg.use_gpu and self._nranks > 1)
			
 
				         if sync_bn:
			
 
				-            self.model = BaseArch.convert_sync_batchnorm(self.model)
			
 
				-
			
 
				-        model = self.model
			
 
				+            model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
			
 
				+
			
 
				+        # enabel auto mixed precision mode
			
 
				+        if self.use_amp:
			
 
				+            scaler = paddle.amp.GradScaler(
			
 
				+                enable=self.cfg.use_gpu or self.cfg.use_npu,
			
 
				+                init_loss_scaling=self.cfg.get('init_loss_scaling', 1024))
			
 
				+        # get distributed model
			
 
				         if self.cfg.get('fleet', False):
			
 
				             model = fleet.distributed_model(model)
			
 
				             self.optimizer = fleet.distributed_optimizer(self.optimizer)
			
@@ -354,12 +418,7 @@ class Trainer(object):
 
				             find_unused_parameters = self.cfg[
			
 
				                 'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
			
 
				             model = paddle.DataParallel(
			
 
				-                self.model, find_unused_parameters=find_unused_parameters)
			
 
				-
			
 
				-        # initial fp16
			
 
				-        if self.cfg.get('fp16', False):
			
 
				-            scaler = amp.GradScaler(
			
 
				-                enable=self.cfg.use_gpu, init_loss_scaling=1024)
			
 
				+                model, find_unused_parameters=find_unused_parameters)
			
 
				 
			
 
				         self.status.update({
			
 
				             'epoch_id': self.start_epoch,
			
@@ -381,6 +440,9 @@ class Trainer(object):
 
				 
			
 
				         self._compose_callback.on_train_begin(self.status)
			
 
				 
			
 
				+        use_fused_allreduce_gradients = self.cfg[
			
 
				+            'use_fused_allreduce_gradients'] if 'use_fused_allreduce_gradients' in self.cfg else False
			
 
				+
			
 
				         for epoch_id in range(self.start_epoch, self.cfg.epoch):
			
 
				             self.status['mode'] = 'train'
			
 
				             self.status['epoch_id'] = epoch_id
			
@@ -395,23 +457,56 @@ class Trainer(object):
 
				                 self._compose_callback.on_step_begin(self.status)
			
 
				                 data['epoch_id'] = epoch_id
			
 
				 
			
 
				-                if self.cfg.get('fp16', False):
			
 
				-                    with amp.auto_cast(enable=self.cfg.use_gpu):
			
 
				-                        # model forward
			
 
				-                        outputs = model(data)
			
 
				-                        loss = outputs['loss']
			
 
				-
			
 
				-                    # model backward
			
 
				-                    scaled_loss = scaler.scale(loss)
			
 
				-                    scaled_loss.backward()
			
 
				+                if self.use_amp:
			
 
				+                    if isinstance(
			
 
				+                            model, paddle.
			
 
				+                            DataParallel) and use_fused_allreduce_gradients:
			
 
				+                        with model.no_sync():
			
 
				+                            with paddle.amp.auto_cast(
			
 
				+                                    enable=self.cfg.use_gpu,
			
 
				+                                    custom_white_list=self.custom_white_list,
			
 
				+                                    custom_black_list=self.custom_black_list,
			
 
				+                                    level=self.amp_level):
			
 
				+                                # model forward
			
 
				+                                outputs = model(data)
			
 
				+                                loss = outputs['loss']
			
 
				+                            # model backward
			
 
				+                            scaled_loss = scaler.scale(loss)
			
 
				+                            scaled_loss.backward()
			
 
				+                        fused_allreduce_gradients(
			
 
				+                            list(model.parameters()), None)
			
 
				+                    else:
			
 
				+                        with paddle.amp.auto_cast(
			
 
				+                                enable=self.cfg.use_gpu,
			
 
				+                                custom_white_list=self.custom_white_list,
			
 
				+                                custom_black_list=self.custom_black_list,
			
 
				+                                level=self.amp_level):
			
 
				+                            # model forward
			
 
				+                            outputs = model(data)
			
 
				+                            loss = outputs['loss']
			
 
				+                        # model backward
			
 
				+                        scaled_loss = scaler.scale(loss)
			
 
				+                        scaled_loss.backward()
			
 
				                     # in dygraph mode, optimizer.minimize is equal to optimizer.step
			
 
				                     scaler.minimize(self.optimizer, scaled_loss)
			
 
				                 else:
			
 
				-                    # model forward
			
 
				-                    outputs = model(data)
			
 
				-                    loss = outputs['loss']
			
 
				-                    # model backward
			
 
				-                    loss.backward()
			
 
				+                    if isinstance(
			
 
				+                            model, paddle.
			
 
				+                            DataParallel) and use_fused_allreduce_gradients:
			
 
				+                        with model.no_sync():
			
 
				+                            # model forward
			
 
				+                            outputs = model(data)
			
 
				+                            loss = outputs['loss']
			
 
				+                            # model backward
			
 
				+                            loss.backward()
			
 
				+                        fused_allreduce_gradients(
			
 
				+                            list(model.parameters()), None)
			
 
				+                    else:
			
 
				+                        # model forward
			
 
				+                        outputs = model(data)
			
 
				+                        loss = outputs['loss']
			
 
				+                        # model backward
			
 
				+                        loss.backward()
			
 
				                     self.optimizer.step()
			
 
				                 curr_lr = self.optimizer.get_lr()
			
 
				                 self.lr.step()
			
@@ -426,21 +521,23 @@ class Trainer(object):
 
				                 self.status['batch_time'].update(time.time() - iter_tic)
			
 
				                 self._compose_callback.on_step_end(self.status)
			
 
				                 if self.use_ema:
			
 
				-                    self.ema.update(self.model)
			
 
				+                    self.ema.update()
			
 
				                 iter_tic = time.time()
			
 
				 
			
 
				-            # apply ema weight on model
			
 
				-            if self.use_ema:
			
 
				-                weight = copy.deepcopy(self.model.state_dict())
			
 
				-                self.model.set_dict(self.ema.apply())
			
 
				             if self.cfg.get('unstructured_prune'):
			
 
				                 self.pruner.update_params()
			
 
				 
			
 
				+            is_snapshot = (self._nranks < 2 or self._local_rank == 0) \
			
 
				+                       and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 or epoch_id == self.end_epoch - 1)
			
 
				+            if is_snapshot and self.use_ema:
			
 
				+                # apply ema weight on model
			
 
				+                weight = copy.deepcopy(self.model.state_dict())
			
 
				+                self.model.set_dict(self.ema.apply())
			
 
				+                self.status['weight'] = weight
			
 
				+
			
 
				             self._compose_callback.on_epoch_end(self.status)
			
 
				 
			
 
				-            if validate and (self._nranks < 2 or self._local_rank == 0) \
			
 
				-                    and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \
			
 
				-                             or epoch_id == self.end_epoch - 1):
			
 
				+            if validate and is_snapshot:
			
 
				                 if not hasattr(self, '_eval_loader'):
			
 
				                     # build evaluation dataset and loader
			
 
				                     self._eval_dataset = self.cfg.EvalDataset
			
@@ -461,13 +558,15 @@ class Trainer(object):
 
				                     Init_mark = True
			
 
				                     self._init_metrics(validate=validate)
			
 
				                     self._reset_metrics()
			
 
				+
			
 
				                 with paddle.no_grad():
			
 
				                     self.status['save_best_model'] = True
			
 
				                     self._eval_with_loader(self._eval_loader)
			
 
				 
			
 
				-            # restore origin weight on model
			
 
				-            if self.use_ema:
			
 
				+            if is_snapshot and self.use_ema:
			
 
				+                # reset original weight
			
 
				                 self.model.set_dict(weight)
			
 
				+                self.status.pop('weight')
			
 
				 
			
 
				         self._compose_callback.on_train_end(self.status)
			
 
				 
			
@@ -485,7 +584,15 @@ class Trainer(object):
 
				             self.status['step_id'] = step_id
			
 
				             self._compose_callback.on_step_begin(self.status)
			
 
				             # forward
			
 
				-            outs = self.model(data)
			
 
				+            if self.use_amp:
			
 
				+                with paddle.amp.auto_cast(
			
 
				+                        enable=self.cfg.use_gpu,
			
 
				+                        custom_white_list=self.custom_white_list,
			
 
				+                        custom_black_list=self.custom_black_list,
			
 
				+                        level=self.amp_level):
			
 
				+                    outs = self.model(data)
			
 
				+            else:
			
 
				+                outs = self.model(data)
			
 
				 
			
 
				             # update metrics
			
 
				             for metric in self._metrics:
			
@@ -513,32 +620,267 @@ class Trainer(object):
 
				         with paddle.no_grad():
			
 
				             self._eval_with_loader(self.loader)
			
 
				 
			
 
				+    def _eval_with_loader_slice(self,
			
 
				+                                loader,
			
 
				+                                slice_size=[640, 640],
			
 
				+                                overlap_ratio=[0.25, 0.25],
			
 
				+                                combine_method='nms',
			
 
				+                                match_threshold=0.6,
			
 
				+                                match_metric='iou'):
			
 
				+        sample_num = 0
			
 
				+        tic = time.time()
			
 
				+        self._compose_callback.on_epoch_begin(self.status)
			
 
				+        self.status['mode'] = 'eval'
			
 
				+        self.model.eval()
			
 
				+        if self.cfg.get('print_flops', False):
			
 
				+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
			
 
				+                self.dataset, self.cfg.worker_num, self._eval_batch_sampler)
			
 
				+            self._flops(flops_loader)
			
 
				+
			
 
				+        merged_bboxs = []
			
 
				+        for step_id, data in enumerate(loader):
			
 
				+            self.status['step_id'] = step_id
			
 
				+            self._compose_callback.on_step_begin(self.status)
			
 
				+            # forward
			
 
				+            if self.use_amp:
			
 
				+                with paddle.amp.auto_cast(
			
 
				+                        enable=self.cfg.use_gpu,
			
 
				+                        custom_white_list=self.custom_white_list,
			
 
				+                        custom_black_list=self.custom_black_list,
			
 
				+                        level=self.amp_level):
			
 
				+                    outs = self.model(data)
			
 
				+            else:
			
 
				+                outs = self.model(data)
			
 
				+
			
 
				+            shift_amount = data['st_pix']
			
 
				+            outs['bbox'][:, 2:4] = outs['bbox'][:, 2:4] + shift_amount
			
 
				+            outs['bbox'][:, 4:6] = outs['bbox'][:, 4:6] + shift_amount
			
 
				+            merged_bboxs.append(outs['bbox'])
			
 
				+
			
 
				+            if data['is_last'] > 0:
			
 
				+                # merge matching predictions
			
 
				+                merged_results = {'bbox': []}
			
 
				+                if combine_method == 'nms':
			
 
				+                    final_boxes = multiclass_nms(
			
 
				+                        np.concatenate(merged_bboxs), self.cfg.num_classes,
			
 
				+                        match_threshold, match_metric)
			
 
				+                    merged_results['bbox'] = np.concatenate(final_boxes)
			
 
				+                elif combine_method == 'concat':
			
 
				+                    merged_results['bbox'] = np.concatenate(merged_bboxs)
			
 
				+                else:
			
 
				+                    raise ValueError(
			
 
				+                        "Now only support 'nms' or 'concat' to fuse detection results."
			
 
				+                    )
			
 
				+                merged_results['im_id'] = np.array([[0]])
			
 
				+                merged_results['bbox_num'] = np.array(
			
 
				+                    [len(merged_results['bbox'])])
			
 
				+
			
 
				+                merged_bboxs = []
			
 
				+                data['im_id'] = data['ori_im_id']
			
 
				+                # update metrics
			
 
				+                for metric in self._metrics:
			
 
				+                    metric.update(data, merged_results)
			
 
				+
			
 
				+                # multi-scale inputs: all inputs have same im_id
			
 
				+                if isinstance(data, typing.Sequence):
			
 
				+                    sample_num += data[0]['im_id'].numpy().shape[0]
			
 
				+                else:
			
 
				+                    sample_num += data['im_id'].numpy().shape[0]
			
 
				+
			
 
				+            self._compose_callback.on_step_end(self.status)
			
 
				+
			
 
				+        self.status['sample_num'] = sample_num
			
 
				+        self.status['cost_time'] = time.time() - tic
			
 
				+
			
 
				+        # accumulate metric to log out
			
 
				+        for metric in self._metrics:
			
 
				+            metric.accumulate()
			
 
				+            metric.log()
			
 
				+        self._compose_callback.on_epoch_end(self.status)
			
 
				+        # reset metric states for metric may performed multiple times
			
 
				+        self._reset_metrics()
			
 
				+
			
 
				+    def evaluate_slice(self,
			
 
				+                       slice_size=[640, 640],
			
 
				+                       overlap_ratio=[0.25, 0.25],
			
 
				+                       combine_method='nms',
			
 
				+                       match_threshold=0.6,
			
 
				+                       match_metric='iou'):
			
 
				+        with paddle.no_grad():
			
 
				+            self._eval_with_loader_slice(self.loader, slice_size, overlap_ratio,
			
 
				+                                         combine_method, match_threshold,
			
 
				+                                         match_metric)
			
 
				+
			
 
				+    def slice_predict(self,
			
 
				+                      images,
			
 
				+                      slice_size=[640, 640],
			
 
				+                      overlap_ratio=[0.25, 0.25],
			
 
				+                      combine_method='nms',
			
 
				+                      match_threshold=0.6,
			
 
				+                      match_metric='iou',
			
 
				+                      draw_threshold=0.5,
			
 
				+                      output_dir='output',
			
 
				+                      save_results=False,
			
 
				+                      visualize=True):
			
 
				+        self.dataset.set_slice_images(images, slice_size, overlap_ratio)
			
 
				+        loader = create('TestReader')(self.dataset, 0)
			
 
				+
			
 
				+        imid2path = self.dataset.get_imid2path()
			
 
				+
			
 
				+        anno_file = self.dataset.get_anno()
			
 
				+        clsid2catid, catid2name = get_categories(
			
 
				+            self.cfg.metric, anno_file=anno_file)
			
 
				+
			
 
				+        # Run Infer 
			
 
				+        self.status['mode'] = 'test'
			
 
				+        self.model.eval()
			
 
				+        if self.cfg.get('print_flops', False):
			
 
				+            flops_loader = create('TestReader')(self.dataset, 0)
			
 
				+            self._flops(flops_loader)
			
 
				+
			
 
				+        results = []  # all images
			
 
				+        merged_bboxs = []  # single image
			
 
				+        for step_id, data in enumerate(tqdm(loader)):
			
 
				+            self.status['step_id'] = step_id
			
 
				+            # forward
			
 
				+            outs = self.model(data)
			
 
				+
			
 
				+            outs['bbox'] = outs['bbox'].numpy()  # only in test mode
			
 
				+            shift_amount = data['st_pix']
			
 
				+            outs['bbox'][:, 2:4] = outs['bbox'][:, 2:4] + shift_amount.numpy()
			
 
				+            outs['bbox'][:, 4:6] = outs['bbox'][:, 4:6] + shift_amount.numpy()
			
 
				+            merged_bboxs.append(outs['bbox'])
			
 
				+
			
 
				+            if data['is_last'] > 0:
			
 
				+                # merge matching predictions
			
 
				+                merged_results = {'bbox': []}
			
 
				+                if combine_method == 'nms':
			
 
				+                    final_boxes = multiclass_nms(
			
 
				+                        np.concatenate(merged_bboxs), self.cfg.num_classes,
			
 
				+                        match_threshold, match_metric)
			
 
				+                    merged_results['bbox'] = np.concatenate(final_boxes)
			
 
				+                elif combine_method == 'concat':
			
 
				+                    merged_results['bbox'] = np.concatenate(merged_bboxs)
			
 
				+                else:
			
 
				+                    raise ValueError(
			
 
				+                        "Now only support 'nms' or 'concat' to fuse detection results."
			
 
				+                    )
			
 
				+                merged_results['im_id'] = np.array([[0]])
			
 
				+                merged_results['bbox_num'] = np.array(
			
 
				+                    [len(merged_results['bbox'])])
			
 
				+
			
 
				+                merged_bboxs = []
			
 
				+                data['im_id'] = data['ori_im_id']
			
 
				+
			
 
				+                for key in ['im_shape', 'scale_factor', 'im_id']:
			
 
				+                    if isinstance(data, typing.Sequence):
			
 
				+                        merged_results[key] = data[0][key]
			
 
				+                    else:
			
 
				+                        merged_results[key] = data[key]
			
 
				+                for key, value in merged_results.items():
			
 
				+                    if hasattr(value, 'numpy'):
			
 
				+                        merged_results[key] = value.numpy()
			
 
				+                results.append(merged_results)
			
 
				+
			
 
				+        if visualize:
			
 
				+            for outs in results:
			
 
				+                batch_res = get_infer_results(outs, clsid2catid)
			
 
				+                bbox_num = outs['bbox_num']
			
 
				+                start = 0
			
 
				+                for i, im_id in enumerate(outs['im_id']):
			
 
				+                    image_path = imid2path[int(im_id)]
			
 
				+                    image = Image.open(image_path).convert('RGB')
			
 
				+                    image = ImageOps.exif_transpose(image)
			
 
				+                    self.status['original_image'] = np.array(image.copy())
			
 
				+                    end = start + bbox_num[i]
			
 
				+                    bbox_res = batch_res['bbox'][start:end] \
			
 
				+                            if 'bbox' in batch_res else None
			
 
				+                    mask_res, segm_res, keypoint_res = None, None, None
			
 
				+                    image = visualize_results(
			
 
				+                        image, bbox_res, mask_res, segm_res, keypoint_res,
			
 
				+                        int(im_id), catid2name, draw_threshold)
			
 
				+                    self.status['result_image'] = np.array(image.copy())
			
 
				+                    if self._compose_callback:
			
 
				+                        self._compose_callback.on_step_end(self.status)
			
 
				+                    # save image with detection
			
 
				+                    save_name = self._get_save_image_name(output_dir,
			
 
				+                                                          image_path)
			
 
				+                    logger.info("Detection bbox results save in {}".format(
			
 
				+                        save_name))
			
 
				+                    image.save(save_name, quality=95)
			
 
				+                    start = end
			
 
				+
			
 
				     def predict(self,
			
 
				                 images,
			
 
				                 draw_threshold=0.5,
			
 
				                 output_dir='output',
			
 
				-                save_txt=False):
			
 
				+                save_results=False,
			
 
				+                visualize=True):
			
 
				+        if not os.path.exists(output_dir):
			
 
				+            os.makedirs(output_dir)
			
 
				+
			
 
				         self.dataset.set_images(images)
			
 
				         loader = create('TestReader')(self.dataset, 0)
			
 
				 
			
 
				         imid2path = self.dataset.get_imid2path()
			
 
				 
			
 
				+        def setup_metrics_for_loader():
			
 
				+            # mem
			
 
				+            metrics = copy.deepcopy(self._metrics)
			
 
				+            mode = self.mode
			
 
				+            save_prediction_only = self.cfg[
			
 
				+                'save_prediction_only'] if 'save_prediction_only' in self.cfg else None
			
 
				+            output_eval = self.cfg[
			
 
				+                'output_eval'] if 'output_eval' in self.cfg else None
			
 
				+
			
 
				+            # modify
			
 
				+            self.mode = '_test'
			
 
				+            self.cfg['save_prediction_only'] = True
			
 
				+            self.cfg['output_eval'] = output_dir
			
 
				+            self.cfg['imid2path'] = imid2path
			
 
				+            self._init_metrics()
			
 
				+
			
 
				+            # restore
			
 
				+            self.mode = mode
			
 
				+            self.cfg.pop('save_prediction_only')
			
 
				+            if save_prediction_only is not None:
			
 
				+                self.cfg['save_prediction_only'] = save_prediction_only
			
 
				+
			
 
				+            self.cfg.pop('output_eval')
			
 
				+            if output_eval is not None:
			
 
				+                self.cfg['output_eval'] = output_eval
			
 
				+
			
 
				+            self.cfg.pop('imid2path')
			
 
				+
			
 
				+            _metrics = copy.deepcopy(self._metrics)
			
 
				+            self._metrics = metrics
			
 
				+
			
 
				+            return _metrics
			
 
				+
			
 
				+        if save_results:
			
 
				+            metrics = setup_metrics_for_loader()
			
 
				+        else:
			
 
				+            metrics = []
			
 
				+
			
 
				         anno_file = self.dataset.get_anno()
			
 
				         clsid2catid, catid2name = get_categories(
			
 
				             self.cfg.metric, anno_file=anno_file)
			
 
				 
			
 
				-        # Run Infer
			
 
				+        # Run Infer 
			
 
				         self.status['mode'] = 'test'
			
 
				         self.model.eval()
			
 
				         if self.cfg.get('print_flops', False):
			
 
				             flops_loader = create('TestReader')(self.dataset, 0)
			
 
				             self._flops(flops_loader)
			
 
				         results = []
			
 
				-        for step_id, data in enumerate(loader):
			
 
				+        for step_id, data in enumerate(tqdm(loader)):
			
 
				             self.status['step_id'] = step_id
			
 
				             # forward
			
 
				             outs = self.model(data)
			
 
				 
			
 
				+            for _m in metrics:
			
 
				+                _m.update(data, outs)
			
 
				+
			
 
				             for key in ['im_shape', 'scale_factor', 'im_id']:
			
 
				                 if isinstance(data, typing.Sequence):
			
 
				                     outs[key] = data[0][key]
			
@@ -548,64 +890,64 @@ class Trainer(object):
 
				                 if hasattr(value, 'numpy'):
			
 
				                     outs[key] = value.numpy()
			
 
				             results.append(outs)
			
 
				+
			
 
				         # sniper
			
 
				         if type(self.dataset) == SniperCOCODataSet:
			
 
				             results = self.dataset.anno_cropper.aggregate_chips_detections(
			
 
				                 results)
			
 
				 
			
 
				-        for outs in results:
			
 
				-            batch_res = get_infer_results(outs, clsid2catid)
			
 
				-            bbox_num = outs['bbox_num']
			
 
				-
			
 
				-            start = 0
			
 
				-            for i, im_id in enumerate(outs['im_id']):
			
 
				-                image_path = imid2path[int(im_id)]
			
 
				-                image = Image.open(image_path).convert('RGB')
			
 
				-                image = ImageOps.exif_transpose(image)
			
 
				-                self.status['original_image'] = np.array(image.copy())
			
 
				-
			
 
				-                end = start + bbox_num[i]
			
 
				-                bbox_res = batch_res['bbox'][start:end] \
			
 
				-                        if 'bbox' in batch_res else None
			
 
				-                mask_res = batch_res['mask'][start:end] \
			
 
				-                        if 'mask' in batch_res else None
			
 
				-                segm_res = batch_res['segm'][start:end] \
			
 
				-                        if 'segm' in batch_res else None
			
 
				-                keypoint_res = batch_res['keypoint'][start:end] \
			
 
				-                        if 'keypoint' in batch_res else None
			
 
				-                image = visualize_results(
			
 
				-                    image, bbox_res, mask_res, segm_res, keypoint_res,
			
 
				-                    int(im_id), catid2name, draw_threshold)
			
 
				-                self.status['result_image'] = np.array(image.copy())
			
 
				-                if self._compose_callback:
			
 
				-                    self._compose_callback.on_step_end(self.status)
			
 
				-                # save image with detection
			
 
				-                save_name = self._get_save_image_name(output_dir, image_path)
			
 
				-                logger.info("Detection bbox results save in {}".format(
			
 
				-                    save_name))
			
 
				-                image.save(save_name, quality=95)
			
 
				-                if save_txt:
			
 
				-                    save_path = os.path.splitext(save_name)[0] + '.txt'
			
 
				-                    results = {}
			
 
				-                    results["im_id"] = im_id
			
 
				-                    if bbox_res:
			
 
				-                        results["bbox_res"] = bbox_res
			
 
				-                    if keypoint_res:
			
 
				-                        results["keypoint_res"] = keypoint_res
			
 
				-                    save_result(save_path, results, catid2name, draw_threshold)
			
 
				-                start = end
			
 
				+        for _m in metrics:
			
 
				+            _m.accumulate()
			
 
				+            _m.reset()
			
 
				+
			
 
				+        if visualize:
			
 
				+            for outs in results:
			
 
				+                batch_res = get_infer_results(outs, clsid2catid)
			
 
				+                bbox_num = outs['bbox_num']
			
 
				+
			
 
				+                start = 0
			
 
				+                for i, im_id in enumerate(outs['im_id']):
			
 
				+                    image_path = imid2path[int(im_id)]
			
 
				+                    image = Image.open(image_path).convert('RGB')
			
 
				+                    image = ImageOps.exif_transpose(image)
			
 
				+                    self.status['original_image'] = np.array(image.copy())
			
 
				+
			
 
				+                    end = start + bbox_num[i]
			
 
				+                    bbox_res = batch_res['bbox'][start:end] \
			
 
				+                            if 'bbox' in batch_res else None
			
 
				+                    mask_res = batch_res['mask'][start:end] \
			
 
				+                            if 'mask' in batch_res else None
			
 
				+                    segm_res = batch_res['segm'][start:end] \
			
 
				+                            if 'segm' in batch_res else None
			
 
				+                    keypoint_res = batch_res['keypoint'][start:end] \
			
 
				+                            if 'keypoint' in batch_res else None
			
 
				+                    image = visualize_results(
			
 
				+                        image, bbox_res, mask_res, segm_res, keypoint_res,
			
 
				+                        int(im_id), catid2name, draw_threshold)
			
 
				+                    self.status['result_image'] = np.array(image.copy())
			
 
				+                    if self._compose_callback:
			
 
				+                        self._compose_callback.on_step_end(self.status)
			
 
				+                    # save image with detection
			
 
				+                    save_name = self._get_save_image_name(output_dir,
			
 
				+                                                          image_path)
			
 
				+                    logger.info("Detection bbox results save in {}".format(
			
 
				+                        save_name))
			
 
				+                    image.save(save_name, quality=95)
			
 
				+
			
 
				+                    start = end
			
 
				 
			
 
				     def _get_save_image_name(self, output_dir, image_path):
			
 
				         """
			
 
				         Get save image name from source image path.
			
 
				         """
			
 
				-        if not os.path.exists(output_dir):
			
 
				-            os.makedirs(output_dir)
			
 
				         image_name = os.path.split(image_path)[-1]
			
 
				         name, ext = os.path.splitext(image_name)
			
 
				         return os.path.join(output_dir, "{}".format(name)) + ext
			
 
				 
			
 
				-    def _get_infer_cfg_and_input_spec(self, save_dir, prune_input=True):
			
 
				+    def _get_infer_cfg_and_input_spec(self,
			
 
				+                                      save_dir,
			
 
				+                                      prune_input=True,
			
 
				+                                      kl_quant=False):
			
 
				         image_shape = None
			
 
				         im_shape = [None, 2]
			
 
				         scale_factor = [None, 2]
			
@@ -628,9 +970,27 @@ class Trainer(object):
 
				 
			
 
				         if hasattr(self.model, 'deploy'):
			
 
				             self.model.deploy = True
			
 
				+
			
 
				+        if 'slim' not in self.cfg:
			
 
				+            for layer in self.model.sublayers():
			
 
				+                if hasattr(layer, 'convert_to_deploy'):
			
 
				+                    layer.convert_to_deploy()
			
 
				+
			
 
				+        export_post_process = self.cfg['export'].get(
			
 
				+            'post_process', False) if hasattr(self.cfg, 'export') else True
			
 
				+        export_nms = self.cfg['export'].get('nms', False) if hasattr(
			
 
				+            self.cfg, 'export') else True
			
 
				+        export_benchmark = self.cfg['export'].get(
			
 
				+            'benchmark', False) if hasattr(self.cfg, 'export') else False
			
 
				         if hasattr(self.model, 'fuse_norm'):
			
 
				             self.model.fuse_norm = self.cfg['TestReader'].get('fuse_normalize',
			
 
				                                                               False)
			
 
				+        if hasattr(self.model, 'export_post_process'):
			
 
				+            self.model.export_post_process = export_post_process if not export_benchmark else False
			
 
				+        if hasattr(self.model, 'export_nms'):
			
 
				+            self.model.export_nms = export_nms if not export_benchmark else False
			
 
				+        if export_post_process and not export_benchmark:
			
 
				+            image_shape = [None] + image_shape[1:]
			
 
				 
			
 
				         # Save infer cfg
			
 
				         _dump_infer_config(self.cfg,
			
@@ -663,16 +1023,34 @@ class Trainer(object):
 
				             pruned_input_spec = input_spec
			
 
				 
			
 
				         # TODO: Hard code, delete it when support prune input_spec.
			
 
				-        if self.cfg.architecture == 'PicoDet':
			
 
				+        if self.cfg.architecture == 'PicoDet' and not export_post_process:
			
 
				             pruned_input_spec = [{
			
 
				                 "image": InputSpec(
			
 
				                     shape=image_shape, name='image')
			
 
				             }]
			
 
				+        if kl_quant:
			
 
				+            if self.cfg.architecture == 'PicoDet' or 'ppyoloe' in self.cfg.weights:
			
 
				+                pruned_input_spec = [{
			
 
				+                    "image": InputSpec(
			
 
				+                        shape=image_shape, name='image'),
			
 
				+                    "scale_factor": InputSpec(
			
 
				+                        shape=scale_factor, name='scale_factor')
			
 
				+                }]
			
 
				+            elif 'tinypose' in self.cfg.weights:
			
 
				+                pruned_input_spec = [{
			
 
				+                    "image": InputSpec(
			
 
				+                        shape=image_shape, name='image')
			
 
				+                }]
			
 
				 
			
 
				         return static_model, pruned_input_spec
			
 
				 
			
 
				     def export(self, output_dir='output_inference'):
			
 
				         self.model.eval()
			
 
				+
			
 
				+        if hasattr(self.cfg, 'export') and 'fuse_conv_bn' in self.cfg[
			
 
				+                'export'] and self.cfg['export']['fuse_conv_bn']:
			
 
				+            self.model = fuse_conv_bn(self.model)
			
 
				+
			
 
				         model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
			
 
				         save_dir = os.path.join(output_dir, model_name)
			
 
				         if not os.path.exists(save_dir):
			
@@ -682,7 +1060,7 @@ class Trainer(object):
 
				             save_dir)
			
 
				 
			
 
				         # dy2st and save model
			
 
				-        if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
			
 
				+        if 'slim' not in self.cfg or 'QAT' not in self.cfg['slim_type']:
			
 
				             paddle.jit.save(
			
 
				                 static_model,
			
 
				                 os.path.join(save_dir, 'model'),
			
@@ -706,8 +1084,9 @@ class Trainer(object):
 
				                 break
			
 
				 
			
 
				         # TODO: support prune input_spec
			
 
				+        kl_quant = True if hasattr(self.cfg.slim, 'ptq') else False
			
 
				         _, pruned_input_spec = self._get_infer_cfg_and_input_spec(
			
 
				-            save_dir, prune_input=False)
			
 
				+            save_dir, prune_input=False, kl_quant=kl_quant)
			
 
				 
			
 
				         self.cfg.slim.save_quantized_model(
			
 
				             self.model,
			
@@ -739,3 +1118,29 @@ class Trainer(object):
 
				         flops = flops(self.model, input_spec) / (1000**3)
			
 
				         logger.info(" Model FLOPs : {:.6f}G. (image shape is {})".format(
			
 
				             flops, input_data['image'][0].unsqueeze(0).shape))
			
 
				+
			
 
				+    def parse_mot_images(self, cfg):
			
 
				+        import glob
			
 
				+        # for quant
			
 
				+        dataset_dir = cfg['EvalMOTDataset'].dataset_dir
			
 
				+        data_root = cfg['EvalMOTDataset'].data_root
			
 
				+        data_root = '{}/{}'.format(dataset_dir, data_root)
			
 
				+        seqs = os.listdir(data_root)
			
 
				+        seqs.sort()
			
 
				+        all_images = []
			
 
				+        for seq in seqs:
			
 
				+            infer_dir = os.path.join(data_root, seq)
			
 
				+            assert infer_dir is None or os.path.isdir(infer_dir), \
			
 
				+                "{} is not a directory".format(infer_dir)
			
 
				+            images = set()
			
 
				+            exts = ['jpg', 'jpeg', 'png', 'bmp']
			
 
				+            exts += [ext.upper() for ext in exts]
			
 
				+            for ext in exts:
			
 
				+                images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
			
 
				+            images = list(images)
			
 
				+            images.sort()
			
 
				+            assert len(images) > 0, "no image found in {}".format(infer_dir)
			
 
				+            all_images.extend(images)
			
 
				+            logger.info("Found {} inference images in total.".format(
			
 
				+                len(images)))
			
 
				+        return all_images
			
--- a/paddlers/models/ppdet/ext_op/README.md
+++ b/paddlers/models/ppdet/ext_op/README.md
@@ -0,0 +1,35 @@
 
				+# 自定义OP编译
			
 
				+旋转框IOU计算OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/custom_op/new_cpp_op_cn.html) 。
			
 
				+
			
 
				+## 1. 环境依赖
			
 
				+- Paddle >= 2.0.1
			
 
				+- gcc 8.2
			
 
				+
			
 
				+## 2. 安装
			
 
				+```
			
 
				+python setup.py install
			
 
				+```
			
 
				+
			
 
				+编译完成后即可使用，以下为`rbox_iou`的使用示例
			
 
				+```
			
 
				+# 引入自定义op
			
 
				+from ext_op import rbox_iou
			
 
				+
			
 
				+paddle.set_device('gpu:0')
			
 
				+paddle.disable_static()
			
 
				+
			
 
				+rbox1 = np.random.rand(13000, 5)
			
 
				+rbox2 = np.random.rand(7, 5)
			
 
				+
			
 
				+pd_rbox1 = paddle.to_tensor(rbox1)
			
 
				+pd_rbox2 = paddle.to_tensor(rbox2)
			
 
				+
			
 
				+iou = rbox_iou(pd_rbox1, pd_rbox2)
			
 
				+print('iou', iou)
			
 
				+```
			
 
				+
			
 
				+## 3. 单元测试
			
 
				+可以通过执行单元测试来确认自定义算子功能的正确性，执行单元测试的示例如下所示：
			
 
				+```
			
 
				+python unittest/test_matched_rbox_iou.py
			
 
				+```
			
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cc
@@ -0,0 +1,90 @@
 
				+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+//
			
 
				+// The code is based on
			
 
				+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
			
 
				+
			
 
				+#include "paddle/extension.h"
			
 
				+#include "rbox_iou_op.h"
			
 
				+
			
 
				+template <typename T>
			
 
				+void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr,
			
 
				+                            const T *rbox2_data_ptr, T *output_data_ptr) {
			
 
				+
			
 
				+  int i;
			
 
				+  for (i = 0; i < rbox_num; i++) {
			
 
				+    output_data_ptr[i] =
			
 
				+        rbox_iou_single<T>(rbox1_data_ptr + i * 5, rbox2_data_ptr + i * 5);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+#define CHECK_INPUT_CPU(x)                                                     \
			
 
				+  PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
			
 
				+
			
 
				+std::vector<paddle::Tensor> MatchedRboxIouCPUForward(const paddle::Tensor &rbox1,
			
 
				+                                                 const paddle::Tensor &rbox2) {
			
 
				+  CHECK_INPUT_CPU(rbox1);
			
 
				+  CHECK_INPUT_CPU(rbox2);
			
 
				+  PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
			
 
				+
			
 
				+  auto rbox_num = rbox1.shape()[0];
			
 
				+  auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox_num});
			
 
				+
			
 
				+  PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rotated_iou_cpu_kernel", ([&] {
			
 
				+                               matched_rbox_iou_cpu_kernel<data_t>(
			
 
				+                                   rbox_num, rbox1.data<data_t>(),
			
 
				+                                   rbox2.data<data_t>(),
			
 
				+                                   output.mutable_data<data_t>());
			
 
				+                             }));
			
 
				+
			
 
				+  return {output};
			
 
				+}
			
 
				+
			
 
				+#ifdef PADDLE_WITH_CUDA
			
 
				+std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
			
 
				+                                                  const paddle::Tensor &rbox2);
			
 
				+#endif
			
 
				+
			
 
				+#define CHECK_INPUT_SAME(x1, x2)                                               \
			
 
				+  PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
			
 
				+
			
 
				+std::vector<paddle::Tensor> MatchedRboxIouForward(const paddle::Tensor &rbox1,
			
 
				+                                              const paddle::Tensor &rbox2) {
			
 
				+  CHECK_INPUT_SAME(rbox1, rbox2);
			
 
				+  if (rbox1.place() == paddle::PlaceType::kCPU) {
			
 
				+    return MatchedRboxIouCPUForward(rbox1, rbox2);
			
 
				+#ifdef PADDLE_WITH_CUDA
			
 
				+  } else if (rbox1.place() == paddle::PlaceType::kGPU) {
			
 
				+    return MatchedRboxIouCUDAForward(rbox1, rbox2);
			
 
				+#endif
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+std::vector<std::vector<int64_t>>
			
 
				+MatchedRboxIouInferShape(std::vector<int64_t> rbox1_shape,
			
 
				+                     std::vector<int64_t> rbox2_shape) {
			
 
				+  return {{rbox1_shape[0]}};
			
 
				+}
			
 
				+
			
 
				+std::vector<paddle::DataType> MatchedRboxIouInferDtype(paddle::DataType t1,
			
 
				+                                                   paddle::DataType t2) {
			
 
				+  return {t1};
			
 
				+}
			
 
				+
			
 
				+PD_BUILD_OP(matched_rbox_iou)
			
 
				+    .Inputs({"RBOX1", "RBOX2"})
			
 
				+    .Outputs({"Output"})
			
 
				+    .SetKernelFn(PD_KERNEL(MatchedRboxIouForward))
			
 
				+    .SetInferShapeFn(PD_INFER_SHAPE(MatchedRboxIouInferShape))
			
 
				+    .SetInferDtypeFn(PD_INFER_DTYPE(MatchedRboxIouInferDtype));
			
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/matched_rbox_iou_op.cu
@@ -0,0 +1,63 @@
 
				+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+//
			
 
				+// The code is based on
			
 
				+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
			
 
				+
			
 
				+#include "paddle/extension.h"
			
 
				+#include "rbox_iou_op.h"
			
 
				+
			
 
				+/**
			
 
				+   Computes ceil(a / b)
			
 
				+*/
			
 
				+
			
 
				+static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
			
 
				+
			
 
				+template <typename T>
			
 
				+__global__ void
			
 
				+matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr,
			
 
				+                        const T *rbox2_data_ptr, T *output_data_ptr) {
			
 
				+  for (int tid = blockIdx.x * blockDim.x + threadIdx.x; tid < rbox_num;
			
 
				+       tid += blockDim.x * gridDim.x) {
			
 
				+    output_data_ptr[tid] =
			
 
				+        rbox_iou_single<T>(rbox1_data_ptr + tid * 5, rbox2_data_ptr + tid * 5);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+#define CHECK_INPUT_GPU(x)                                                     \
			
 
				+  PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
			
 
				+
			
 
				+std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
			
 
				+                                                  const paddle::Tensor &rbox2) {
			
 
				+  CHECK_INPUT_GPU(rbox1);
			
 
				+  CHECK_INPUT_GPU(rbox2);
			
 
				+  PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
			
 
				+
			
 
				+  auto rbox_num = rbox1.shape()[0];
			
 
				+
			
 
				+  auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox_num});
			
 
				+
			
 
				+  const int thread_per_block = 512;
			
 
				+  const int block_per_grid = CeilDiv(rbox_num, thread_per_block);
			
 
				+
			
 
				+  PD_DISPATCH_FLOATING_TYPES(
			
 
				+      rbox1.type(), "matched_rbox_iou_cuda_kernel", ([&] {
			
 
				+        matched_rbox_iou_cuda_kernel<
			
 
				+            data_t><<<block_per_grid, thread_per_block, 0, rbox1.stream()>>>(
			
 
				+            rbox_num, rbox1.data<data_t>(), rbox2.data<data_t>(),
			
 
				+            output.mutable_data<data_t>());
			
 
				+      }));
			
 
				+
			
 
				+  return {output};
			
 
				+}
			
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
@@ -0,0 +1,97 @@
 
				+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+//
			
 
				+// The code is based on https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
			
 
				+
			
 
				+#include "rbox_iou_op.h"
			
 
				+#include "paddle/extension.h"
			
 
				+
			
 
				+
			
 
				+template <typename T>
			
 
				+void rbox_iou_cpu_kernel(
			
 
				+    const int rbox1_num,
			
 
				+    const int rbox2_num,
			
 
				+    const T* rbox1_data_ptr,
			
 
				+    const T* rbox2_data_ptr,
			
 
				+    T* output_data_ptr) {
			
 
				+
			
 
				+    int i, j;
			
 
				+    for (i = 0; i < rbox1_num; i++) {
			
 
				+        for (j = 0; j < rbox2_num; j++) {
			
 
				+		int offset = i * rbox2_num + j;
			
 
				+		output_data_ptr[offset] = rbox_iou_single<T>(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#define CHECK_INPUT_CPU(x) PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
			
 
				+
			
 
				+std::vector<paddle::Tensor> RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
			
 
				+    CHECK_INPUT_CPU(rbox1);
			
 
				+    CHECK_INPUT_CPU(rbox2);
			
 
				+
			
 
				+    auto rbox1_num = rbox1.shape()[0];
			
 
				+    auto rbox2_num = rbox2.shape()[0];
			
 
				+
			
 
				+    auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox1_num, rbox2_num});
			
 
				+
			
 
				+    PD_DISPATCH_FLOATING_TYPES(
			
 
				+        rbox1.type(),
			
 
				+        "rbox_iou_cpu_kernel",
			
 
				+        ([&] {
			
 
				+            rbox_iou_cpu_kernel<data_t>(
			
 
				+                rbox1_num,
			
 
				+                rbox2_num,
			
 
				+                rbox1.data<data_t>(),
			
 
				+                rbox2.data<data_t>(),
			
 
				+                output.mutable_data<data_t>());
			
 
				+        }));
			
 
				+    
			
 
				+    return {output};
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#ifdef PADDLE_WITH_CUDA
			
 
				+std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2);
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
			
 
				+
			
 
				+std::vector<paddle::Tensor> RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
			
 
				+    CHECK_INPUT_SAME(rbox1, rbox2);
			
 
				+    if (rbox1.place() == paddle::PlaceType::kCPU) {
			
 
				+        return RboxIouCPUForward(rbox1, rbox2);
			
 
				+#ifdef PADDLE_WITH_CUDA
			
 
				+    } else if (rbox1.place() == paddle::PlaceType::kGPU) {
			
 
				+        return RboxIouCUDAForward(rbox1, rbox2);
			
 
				+#endif
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> rbox1_shape, std::vector<int64_t> rbox2_shape) {
			
 
				+    return {{rbox1_shape[0], rbox2_shape[0]}};
			
 
				+}
			
 
				+
			
 
				+std::vector<paddle::DataType> InferDtype(paddle::DataType t1, paddle::DataType t2) {
			
 
				+    return {t1};
			
 
				+}
			
 
				+
			
 
				+PD_BUILD_OP(rbox_iou)
			
 
				+    .Inputs({"RBOX1", "RBOX2"})
			
 
				+    .Outputs({"Output"})
			
 
				+    .SetKernelFn(PD_KERNEL(RboxIouForward))
			
 
				+    .SetInferShapeFn(PD_INFER_SHAPE(InferShape))
			
 
				+    .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));
			
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
@@ -0,0 +1,114 @@
 
				+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+//
			
 
				+// The code is based on
			
 
				+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
			
 
				+
			
 
				+#include "paddle/extension.h"
			
 
				+#include "rbox_iou_op.h"
			
 
				+
			
 
				+// 2D block with 32 * 16 = 512 threads per block
			
 
				+const int BLOCK_DIM_X = 32;
			
 
				+const int BLOCK_DIM_Y = 16;
			
 
				+
			
 
				+/**
			
 
				+   Computes ceil(a / b)
			
 
				+*/
			
 
				+
			
 
				+static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
			
 
				+
			
 
				+template <typename T>
			
 
				+__global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num,
			
 
				+                                     const T *rbox1_data_ptr,
			
 
				+                                     const T *rbox2_data_ptr,
			
 
				+                                     T *output_data_ptr) {
			
 
				+
			
 
				+  // get row_start and col_start
			
 
				+  const int rbox1_block_idx = blockIdx.x * blockDim.x;
			
 
				+  const int rbox2_block_idx = blockIdx.y * blockDim.y;
			
 
				+
			
 
				+  const int rbox1_thread_num = min(rbox1_num - rbox1_block_idx, blockDim.x);
			
 
				+  const int rbox2_thread_num = min(rbox2_num - rbox2_block_idx, blockDim.y);
			
 
				+
			
 
				+  __shared__ T block_boxes1[BLOCK_DIM_X * 5];
			
 
				+  __shared__ T block_boxes2[BLOCK_DIM_Y * 5];
			
 
				+
			
 
				+  // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y
			
 
				+  if (threadIdx.x < rbox1_thread_num && threadIdx.y == 0) {
			
 
				+    block_boxes1[threadIdx.x * 5 + 0] =
			
 
				+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 0];
			
 
				+    block_boxes1[threadIdx.x * 5 + 1] =
			
 
				+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 1];
			
 
				+    block_boxes1[threadIdx.x * 5 + 2] =
			
 
				+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 2];
			
 
				+    block_boxes1[threadIdx.x * 5 + 3] =
			
 
				+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 3];
			
 
				+    block_boxes1[threadIdx.x * 5 + 4] =
			
 
				+        rbox1_data_ptr[(rbox1_block_idx + threadIdx.x) * 5 + 4];
			
 
				+  }
			
 
				+
			
 
				+  // threadIdx.x < BLOCK_DIM_Y=rbox2_thread_num, just use same condition as
			
 
				+  // above: threadIdx.y == 0
			
 
				+  if (threadIdx.x < rbox2_thread_num && threadIdx.y == 0) {
			
 
				+    block_boxes2[threadIdx.x * 5 + 0] =
			
 
				+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 0];
			
 
				+    block_boxes2[threadIdx.x * 5 + 1] =
			
 
				+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 1];
			
 
				+    block_boxes2[threadIdx.x * 5 + 2] =
			
 
				+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 2];
			
 
				+    block_boxes2[threadIdx.x * 5 + 3] =
			
 
				+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 3];
			
 
				+    block_boxes2[threadIdx.x * 5 + 4] =
			
 
				+        rbox2_data_ptr[(rbox2_block_idx + threadIdx.x) * 5 + 4];
			
 
				+  }
			
 
				+
			
 
				+  // sync
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  if (threadIdx.x < rbox1_thread_num && threadIdx.y < rbox2_thread_num) {
			
 
				+    int offset = (rbox1_block_idx + threadIdx.x) * rbox2_num + rbox2_block_idx +
			
 
				+                 threadIdx.y;
			
 
				+    output_data_ptr[offset] = rbox_iou_single<T>(
			
 
				+        block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+#define CHECK_INPUT_GPU(x)                                                     \
			
 
				+  PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
			
 
				+
			
 
				+std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor &rbox1,
			
 
				+                                               const paddle::Tensor &rbox2) {
			
 
				+  CHECK_INPUT_GPU(rbox1);
			
 
				+  CHECK_INPUT_GPU(rbox2);
			
 
				+
			
 
				+  auto rbox1_num = rbox1.shape()[0];
			
 
				+  auto rbox2_num = rbox2.shape()[0];
			
 
				+
			
 
				+  auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox1_num, rbox2_num});
			
 
				+
			
 
				+  const int blocks_x = CeilDiv(rbox1_num, BLOCK_DIM_X);
			
 
				+  const int blocks_y = CeilDiv(rbox2_num, BLOCK_DIM_Y);
			
 
				+
			
 
				+  dim3 blocks(blocks_x, blocks_y);
			
 
				+  dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
			
 
				+
			
 
				+  PD_DISPATCH_FLOATING_TYPES(
			
 
				+      rbox1.type(), "rbox_iou_cuda_kernel", ([&] {
			
 
				+        rbox_iou_cuda_kernel<data_t><<<blocks, threads, 0, rbox1.stream()>>>(
			
 
				+            rbox1_num, rbox2_num, rbox1.data<data_t>(), rbox2.data<data_t>(),
			
 
				+            output.mutable_data<data_t>());
			
 
				+      }));
			
 
				+
			
 
				+  return {output};
			
 
				+}
			
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.h
@@ -0,0 +1,348 @@
 
				+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+//
			
 
				+// The code is based on
			
 
				+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <cassert>
			
 
				+#include <cmath>
			
 
				+#include <vector>
			
 
				+
			
 
				+#ifdef __CUDACC__
			
 
				+// Designates functions callable from the host (CPU) and the device (GPU)
			
 
				+#define HOST_DEVICE __host__ __device__
			
 
				+#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
			
 
				+#else
			
 
				+#include <algorithm>
			
 
				+#define HOST_DEVICE
			
 
				+#define HOST_DEVICE_INLINE HOST_DEVICE inline
			
 
				+#endif
			
 
				+
			
 
				+namespace {
			
 
				+
			
 
				+template <typename T> struct RotatedBox { T x_ctr, y_ctr, w, h, a; };
			
 
				+
			
 
				+template <typename T> struct Point {
			
 
				+  T x, y;
			
 
				+  HOST_DEVICE_INLINE Point(const T &px = 0, const T &py = 0) : x(px), y(py) {}
			
 
				+  HOST_DEVICE_INLINE Point operator+(const Point &p) const {
			
 
				+    return Point(x + p.x, y + p.y);
			
 
				+  }
			
 
				+  HOST_DEVICE_INLINE Point &operator+=(const Point &p) {
			
 
				+    x += p.x;
			
 
				+    y += p.y;
			
 
				+    return *this;
			
 
				+  }
			
 
				+  HOST_DEVICE_INLINE Point operator-(const Point &p) const {
			
 
				+    return Point(x - p.x, y - p.y);
			
 
				+  }
			
 
				+  HOST_DEVICE_INLINE Point operator*(const T coeff) const {
			
 
				+    return Point(x * coeff, y * coeff);
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+HOST_DEVICE_INLINE T dot_2d(const Point<T> &A, const Point<T> &B) {
			
 
				+  return A.x * B.x + A.y * B.y;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+HOST_DEVICE_INLINE T cross_2d(const Point<T> &A, const Point<T> &B) {
			
 
				+  return A.x * B.y - B.x * A.y;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+HOST_DEVICE_INLINE void get_rotated_vertices(const RotatedBox<T> &box,
			
 
				+                                             Point<T> (&pts)[4]) {
			
 
				+  // M_PI / 180. == 0.01745329251
			
 
				+  // double theta = box.a * 0.01745329251;
			
 
				+  // MODIFIED
			
 
				+  double theta = box.a;
			
 
				+  T cosTheta2 = (T)cos(theta) * 0.5f;
			
 
				+  T sinTheta2 = (T)sin(theta) * 0.5f;
			
 
				+
			
 
				+  // y: top --> down; x: left --> right
			
 
				+  pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w;
			
 
				+  pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
			
 
				+  pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w;
			
 
				+  pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
			
 
				+  pts[2].x = 2 * box.x_ctr - pts[0].x;
			
 
				+  pts[2].y = 2 * box.y_ctr - pts[0].y;
			
 
				+  pts[3].x = 2 * box.x_ctr - pts[1].x;
			
 
				+  pts[3].y = 2 * box.y_ctr - pts[1].y;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+HOST_DEVICE_INLINE int get_intersection_points(const Point<T> (&pts1)[4],
			
 
				+                                               const Point<T> (&pts2)[4],
			
 
				+                                               Point<T> (&intersections)[24]) {
			
 
				+  // Line vector
			
 
				+  // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
			
 
				+  Point<T> vec1[4], vec2[4];
			
 
				+  for (int i = 0; i < 4; i++) {
			
 
				+    vec1[i] = pts1[(i + 1) % 4] - pts1[i];
			
 
				+    vec2[i] = pts2[(i + 1) % 4] - pts2[i];
			
 
				+  }
			
 
				+
			
 
				+  // Line test - test all line combos for intersection
			
 
				+  int num = 0; // number of intersections
			
 
				+  for (int i = 0; i < 4; i++) {
			
 
				+    for (int j = 0; j < 4; j++) {
			
 
				+      // Solve for 2x2 Ax=b
			
 
				+      T det = cross_2d<T>(vec2[j], vec1[i]);
			
 
				+
			
 
				+      // This takes care of parallel lines
			
 
				+      if (fabs(det) <= 1e-14) {
			
 
				+        continue;
			
 
				+      }
			
 
				+
			
 
				+      auto vec12 = pts2[j] - pts1[i];
			
 
				+
			
 
				+      T t1 = cross_2d<T>(vec2[j], vec12) / det;
			
 
				+      T t2 = cross_2d<T>(vec1[i], vec12) / det;
			
 
				+
			
 
				+      if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
			
 
				+        intersections[num++] = pts1[i] + vec1[i] * t1;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Check for vertices of rect1 inside rect2
			
 
				+  {
			
 
				+    const auto &AB = vec2[0];
			
 
				+    const auto &DA = vec2[3];
			
 
				+    auto ABdotAB = dot_2d<T>(AB, AB);
			
 
				+    auto ADdotAD = dot_2d<T>(DA, DA);
			
 
				+    for (int i = 0; i < 4; i++) {
			
 
				+      // assume ABCD is the rectangle, and P is the point to be judged
			
 
				+      // P is inside ABCD iff. P's projection on AB lies within AB
			
 
				+      // and P's projection on AD lies within AD
			
 
				+
			
 
				+      auto AP = pts1[i] - pts2[0];
			
 
				+
			
 
				+      auto APdotAB = dot_2d<T>(AP, AB);
			
 
				+      auto APdotAD = -dot_2d<T>(AP, DA);
			
 
				+
			
 
				+      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
			
 
				+          (APdotAD <= ADdotAD)) {
			
 
				+        intersections[num++] = pts1[i];
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Reverse the check - check for vertices of rect2 inside rect1
			
 
				+  {
			
 
				+    const auto &AB = vec1[0];
			
 
				+    const auto &DA = vec1[3];
			
 
				+    auto ABdotAB = dot_2d<T>(AB, AB);
			
 
				+    auto ADdotAD = dot_2d<T>(DA, DA);
			
 
				+    for (int i = 0; i < 4; i++) {
			
 
				+      auto AP = pts2[i] - pts1[0];
			
 
				+
			
 
				+      auto APdotAB = dot_2d<T>(AP, AB);
			
 
				+      auto APdotAD = -dot_2d<T>(AP, DA);
			
 
				+
			
 
				+      if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
			
 
				+          (APdotAD <= ADdotAD)) {
			
 
				+        intersections[num++] = pts2[i];
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return num;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24],
			
 
				+                                          const int &num_in, Point<T> (&q)[24],
			
 
				+                                          bool shift_to_zero = false) {
			
 
				+  assert(num_in >= 2);
			
 
				+
			
 
				+  // Step 1:
			
 
				+  // Find point with minimum y
			
 
				+  // if more than 1 points have the same minimum y,
			
 
				+  // pick the one with the minimum x.
			
 
				+  int t = 0;
			
 
				+  for (int i = 1; i < num_in; i++) {
			
 
				+    if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
			
 
				+      t = i;
			
 
				+    }
			
 
				+  }
			
 
				+  auto &start = p[t]; // starting point
			
 
				+
			
 
				+  // Step 2:
			
 
				+  // Subtract starting point from every points (for sorting in the next step)
			
 
				+  for (int i = 0; i < num_in; i++) {
			
 
				+    q[i] = p[i] - start;
			
 
				+  }
			
 
				+
			
 
				+  // Swap the starting point to position 0
			
 
				+  auto tmp = q[0];
			
 
				+  q[0] = q[t];
			
 
				+  q[t] = tmp;
			
 
				+
			
 
				+  // Step 3:
			
 
				+  // Sort point 1 ~ num_in according to their relative cross-product values
			
 
				+  // (essentially sorting according to angles)
			
 
				+  // If the angles are the same, sort according to their distance to origin
			
 
				+  T dist[24];
			
 
				+  for (int i = 0; i < num_in; i++) {
			
 
				+    dist[i] = dot_2d<T>(q[i], q[i]);
			
 
				+  }
			
 
				+
			
 
				+#ifdef __CUDACC__
			
 
				+  // CUDA version
			
 
				+  // In the future, we can potentially use thrust
			
 
				+  // for sorting here to improve speed (though not guaranteed)
			
 
				+  for (int i = 1; i < num_in - 1; i++) {
			
 
				+    for (int j = i + 1; j < num_in; j++) {
			
 
				+      T crossProduct = cross_2d<T>(q[i], q[j]);
			
 
				+      if ((crossProduct < -1e-6) ||
			
 
				+          (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
			
 
				+        auto q_tmp = q[i];
			
 
				+        q[i] = q[j];
			
 
				+        q[j] = q_tmp;
			
 
				+        auto dist_tmp = dist[i];
			
 
				+        dist[i] = dist[j];
			
 
				+        dist[j] = dist_tmp;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+#else
			
 
				+  // CPU version
			
 
				+  std::sort(q + 1, q + num_in,
			
 
				+            [](const Point<T> &A, const Point<T> &B) -> bool {
			
 
				+              T temp = cross_2d<T>(A, B);
			
 
				+              if (fabs(temp) < 1e-6) {
			
 
				+                return dot_2d<T>(A, A) < dot_2d<T>(B, B);
			
 
				+              } else {
			
 
				+                return temp > 0;
			
 
				+              }
			
 
				+            });
			
 
				+#endif
			
 
				+
			
 
				+  // Step 4:
			
 
				+  // Make sure there are at least 2 points (that don't overlap with each other)
			
 
				+  // in the stack
			
 
				+  int k; // index of the non-overlapped second point
			
 
				+  for (k = 1; k < num_in; k++) {
			
 
				+    if (dist[k] > 1e-8) {
			
 
				+      break;
			
 
				+    }
			
 
				+  }
			
 
				+  if (k == num_in) {
			
 
				+    // We reach the end, which means the convex hull is just one point
			
 
				+    q[0] = p[t];
			
 
				+    return 1;
			
 
				+  }
			
 
				+  q[1] = q[k];
			
 
				+  int m = 2; // 2 points in the stack
			
 
				+  // Step 5:
			
 
				+  // Finally we can start the scanning process.
			
 
				+  // When a non-convex relationship between the 3 points is found
			
 
				+  // (either concave shape or duplicated points),
			
 
				+  // we pop the previous point from the stack
			
 
				+  // until the 3-point relationship is convex again, or
			
 
				+  // until the stack only contains two points
			
 
				+  for (int i = k + 1; i < num_in; i++) {
			
 
				+    while (m > 1 && cross_2d<T>(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
			
 
				+      m--;
			
 
				+    }
			
 
				+    q[m++] = q[i];
			
 
				+  }
			
 
				+
			
 
				+  // Step 6 (Optional):
			
 
				+  // In general sense we need the original coordinates, so we
			
 
				+  // need to shift the points back (reverting Step 2)
			
 
				+  // But if we're only interested in getting the area/perimeter of the shape
			
 
				+  // We can simply return.
			
 
				+  if (!shift_to_zero) {
			
 
				+    for (int i = 0; i < m; i++) {
			
 
				+      q[i] += start;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return m;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+HOST_DEVICE_INLINE T polygon_area(const Point<T> (&q)[24], const int &m) {
			
 
				+  if (m <= 2) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  T area = 0;
			
 
				+  for (int i = 1; i < m - 1; i++) {
			
 
				+    area += fabs(cross_2d<T>(q[i] - q[0], q[i + 1] - q[0]));
			
 
				+  }
			
 
				+
			
 
				+  return area / 2.0;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+HOST_DEVICE_INLINE T rboxes_intersection(const RotatedBox<T> &box1,
			
 
				+                                         const RotatedBox<T> &box2) {
			
 
				+  // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
			
 
				+  // from rotated_rect_intersection_pts
			
 
				+  Point<T> intersectPts[24], orderedPts[24];
			
 
				+
			
 
				+  Point<T> pts1[4];
			
 
				+  Point<T> pts2[4];
			
 
				+  get_rotated_vertices<T>(box1, pts1);
			
 
				+  get_rotated_vertices<T>(box2, pts2);
			
 
				+
			
 
				+  int num = get_intersection_points<T>(pts1, pts2, intersectPts);
			
 
				+
			
 
				+  if (num <= 2) {
			
 
				+    return 0.0;
			
 
				+  }
			
 
				+
			
 
				+  // Convex Hull to order the intersection points in clockwise order and find
			
 
				+  // the contour area.
			
 
				+  int num_convex = convex_hull_graham<T>(intersectPts, num, orderedPts, true);
			
 
				+  return polygon_area<T>(orderedPts, num_convex);
			
 
				+}
			
 
				+
			
 
				+} // namespace
			
 
				+
			
 
				+template <typename T>
			
 
				+HOST_DEVICE_INLINE T rbox_iou_single(T const *const box1_raw,
			
 
				+                                     T const *const box2_raw) {
			
 
				+  // shift center to the middle point to achieve higher precision in result
			
 
				+  RotatedBox<T> box1, box2;
			
 
				+  auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
			
 
				+  auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
			
 
				+  box1.x_ctr = box1_raw[0] - center_shift_x;
			
 
				+  box1.y_ctr = box1_raw[1] - center_shift_y;
			
 
				+  box1.w = box1_raw[2];
			
 
				+  box1.h = box1_raw[3];
			
 
				+  box1.a = box1_raw[4];
			
 
				+  box2.x_ctr = box2_raw[0] - center_shift_x;
			
 
				+  box2.y_ctr = box2_raw[1] - center_shift_y;
			
 
				+  box2.w = box2_raw[2];
			
 
				+  box2.h = box2_raw[3];
			
 
				+  box2.a = box2_raw[4];
			
 
				+
			
 
				+  const T area1 = box1.w * box1.h;
			
 
				+  const T area2 = box2.w * box2.h;
			
 
				+  if (area1 < 1e-14 || area2 < 1e-14) {
			
 
				+    return 0.f;
			
 
				+  }
			
 
				+
			
 
				+  const T intersection = rboxes_intersection<T>(box1, box2);
			
 
				+  const T iou = intersection / (area1 + area2 - intersection);
			
 
				+  return iou;
			
 
				+}
			
--- a/paddlers/models/ppdet/ext_op/setup.py
+++ b/paddlers/models/ppdet/ext_op/setup.py
@@ -0,0 +1,33 @@
 
				+import os
			
 
				+import glob
			
 
				+import paddle
			
 
				+from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup
			
 
				+
			
 
				+
			
 
				+def get_extensions():
			
 
				+    root_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+    ext_root_dir = os.path.join(root_dir, 'csrc')
			
 
				+    sources = []
			
 
				+    for ext_name in os.listdir(ext_root_dir):
			
 
				+        ext_dir = os.path.join(ext_root_dir, ext_name)
			
 
				+        source = glob.glob(os.path.join(ext_dir, '*.cc'))
			
 
				+        kwargs = dict()
			
 
				+        if paddle.device.is_compiled_with_cuda():
			
 
				+            source += glob.glob(os.path.join(ext_dir, '*.cu'))
			
 
				+
			
 
				+        if not source:
			
 
				+            continue
			
 
				+
			
 
				+        sources += source
			
 
				+
			
 
				+    if paddle.device.is_compiled_with_cuda():
			
 
				+        extension = CUDAExtension(
			
 
				+            sources, extra_compile_args={'cxx': ['-DPADDLE_WITH_CUDA']})
			
 
				+    else:
			
 
				+        extension = CppExtension(sources)
			
 
				+
			
 
				+    return extension
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    setup(name='ext_op', ext_modules=get_extensions())
			
--- a/paddlers/models/ppdet/ext_op/unittest/test_matched_rbox_iou.py
+++ b/paddlers/models/ppdet/ext_op/unittest/test_matched_rbox_iou.py
@@ -0,0 +1,149 @@
 
				+import numpy as np
			
 
				+import sys
			
 
				+import time
			
 
				+from shapely.geometry import Polygon
			
 
				+import paddle
			
 
				+import unittest
			
 
				+
			
 
				+from ext_op import matched_rbox_iou
			
 
				+
			
 
				+
			
 
				+def rbox2poly_single(rrect, get_best_begin_point=False):
			
 
				+    """
			
 
				+    rrect:[x_ctr,y_ctr,w,h,angle]
			
 
				+    to
			
 
				+    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
			
 
				+    """
			
 
				+    x_ctr, y_ctr, width, height, angle = rrect[:5]
			
 
				+    tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
			
 
				+    # rect 2x4
			
 
				+    rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
			
 
				+    R = np.array([[np.cos(angle), -np.sin(angle)],
			
 
				+                  [np.sin(angle), np.cos(angle)]])
			
 
				+    # poly
			
 
				+    poly = R.dot(rect)
			
 
				+    x0, x1, x2, x3 = poly[0, :4] + x_ctr
			
 
				+    y0, y1, y2, y3 = poly[1, :4] + y_ctr
			
 
				+    poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float64)
			
 
				+    return poly
			
 
				+
			
 
				+
			
 
				+def intersection(g, p):
			
 
				+    """
			
 
				+    Intersection.
			
 
				+    """
			
 
				+
			
 
				+    g = g[:8].reshape((4, 2))
			
 
				+    p = p[:8].reshape((4, 2))
			
 
				+
			
 
				+    a = g
			
 
				+    b = p
			
 
				+
			
 
				+    use_filter = True
			
 
				+    if use_filter:
			
 
				+        # step1:
			
 
				+        inter_x1 = np.maximum(np.min(a[:, 0]), np.min(b[:, 0]))
			
 
				+        inter_x2 = np.minimum(np.max(a[:, 0]), np.max(b[:, 0]))
			
 
				+        inter_y1 = np.maximum(np.min(a[:, 1]), np.min(b[:, 1]))
			
 
				+        inter_y2 = np.minimum(np.max(a[:, 1]), np.max(b[:, 1]))
			
 
				+        if inter_x1 >= inter_x2 or inter_y1 >= inter_y2:
			
 
				+            return 0.
			
 
				+        x1 = np.minimum(np.min(a[:, 0]), np.min(b[:, 0]))
			
 
				+        x2 = np.maximum(np.max(a[:, 0]), np.max(b[:, 0]))
			
 
				+        y1 = np.minimum(np.min(a[:, 1]), np.min(b[:, 1]))
			
 
				+        y2 = np.maximum(np.max(a[:, 1]), np.max(b[:, 1]))
			
 
				+        if x1 >= x2 or y1 >= y2 or (x2 - x1) < 2 or (y2 - y1) < 2:
			
 
				+            return 0.
			
 
				+
			
 
				+    g = Polygon(g)
			
 
				+    p = Polygon(p)
			
 
				+    if not g.is_valid or not p.is_valid:
			
 
				+        return 0
			
 
				+
			
 
				+    inter = Polygon(g).intersection(Polygon(p)).area
			
 
				+    union = g.area + p.area - inter
			
 
				+    if union == 0:
			
 
				+        return 0
			
 
				+    else:
			
 
				+        return inter / union
			
 
				+
			
 
				+
			
 
				+def matched_rbox_overlaps(anchors, gt_bboxes, use_cv2=False):
			
 
				+    """
			
 
				+
			
 
				+    Args:
			
 
				+        anchors: [M, 5]  x1,y1,x2,y2,angle
			
 
				+        gt_bboxes: [M, 5]  x1,y1,x2,y2,angle
			
 
				+
			
 
				+    Returns:
			
 
				+        macthed_iou: [M]
			
 
				+    """
			
 
				+    assert anchors.shape[1] == 5
			
 
				+    assert gt_bboxes.shape[1] == 5
			
 
				+
			
 
				+    gt_bboxes_ploy = [rbox2poly_single(e) for e in gt_bboxes]
			
 
				+    anchors_ploy = [rbox2poly_single(e) for e in anchors]
			
 
				+
			
 
				+    num = len(anchors_ploy)
			
 
				+    iou = np.zeros((num, ), dtype=np.float64)
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    for i in range(num):
			
 
				+        try:
			
 
				+            iou[i] = intersection(gt_bboxes_ploy[i], anchors_ploy[i])
			
 
				+        except Exception as e:
			
 
				+            print('cur gt_bboxes_ploy[i]', gt_bboxes_ploy[i], 'anchors_ploy[j]',
			
 
				+                  anchors_ploy[i], e)
			
 
				+    return iou
			
 
				+
			
 
				+
			
 
				+def gen_sample(n):
			
 
				+    rbox = np.random.rand(n, 5)
			
 
				+    rbox[:, 0:4] = rbox[:, 0:4] * 0.45 + 0.001
			
 
				+    rbox[:, 4] = rbox[:, 4] - 0.5
			
 
				+    return rbox
			
 
				+
			
 
				+
			
 
				+class MatchedRBoxIoUTest(unittest.TestCase):
			
 
				+    def setUp(self):
			
 
				+        self.initTestCase()
			
 
				+        self.rbox1 = gen_sample(self.n)
			
 
				+        self.rbox2 = gen_sample(self.n)
			
 
				+
			
 
				+    def initTestCase(self):
			
 
				+        self.n = 1000
			
 
				+
			
 
				+    def assertAllClose(self, x, y, msg, atol=5e-1, rtol=1e-2):
			
 
				+        self.assertTrue(np.allclose(x, y, atol=atol, rtol=rtol), msg=msg)
			
 
				+
			
 
				+    def get_places(self):
			
 
				+        places = [paddle.CPUPlace()]
			
 
				+        if paddle.device.is_compiled_with_cuda():
			
 
				+            places.append(paddle.CUDAPlace(0))
			
 
				+
			
 
				+        return places
			
 
				+
			
 
				+    def check_output(self, place):
			
 
				+        paddle.disable_static()
			
 
				+        pd_rbox1 = paddle.to_tensor(self.rbox1, place=place)
			
 
				+        pd_rbox2 = paddle.to_tensor(self.rbox2, place=place)
			
 
				+        actual_t = matched_rbox_iou(pd_rbox1, pd_rbox2).numpy()
			
 
				+        poly_rbox1 = self.rbox1
			
 
				+        poly_rbox2 = self.rbox2
			
 
				+        poly_rbox1[:, 0:4] = self.rbox1[:, 0:4] * 1024
			
 
				+        poly_rbox2[:, 0:4] = self.rbox2[:, 0:4] * 1024
			
 
				+        expect_t = matched_rbox_overlaps(poly_rbox1, poly_rbox2, use_cv2=False)
			
 
				+        self.assertAllClose(
			
 
				+            actual_t,
			
 
				+            expect_t,
			
 
				+            msg="rbox_iou has diff at {} \nExpect {}\nBut got {}".format(
			
 
				+                str(place), str(expect_t), str(actual_t)))
			
 
				+
			
 
				+    def test_output(self):
			
 
				+        places = self.get_places()
			
 
				+        for place in places:
			
 
				+            self.check_output(place)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    unittest.main()
			
--- a/paddlers/models/ppdet/ext_op/unittest/test_rbox_iou.py
+++ b/paddlers/models/ppdet/ext_op/unittest/test_rbox_iou.py
@@ -0,0 +1,151 @@
 
				+import numpy as np
			
 
				+import sys
			
 
				+import time
			
 
				+from shapely.geometry import Polygon
			
 
				+import paddle
			
 
				+import unittest
			
 
				+
			
 
				+from ext_op import rbox_iou
			
 
				+
			
 
				+
			
 
				+def rbox2poly_single(rrect, get_best_begin_point=False):
			
 
				+    """
			
 
				+    rrect:[x_ctr,y_ctr,w,h,angle]
			
 
				+    to
			
 
				+    poly:[x0,y0,x1,y1,x2,y2,x3,y3]
			
 
				+    """
			
 
				+    x_ctr, y_ctr, width, height, angle = rrect[:5]
			
 
				+    tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
			
 
				+    # rect 2x4
			
 
				+    rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
			
 
				+    R = np.array([[np.cos(angle), -np.sin(angle)],
			
 
				+                  [np.sin(angle), np.cos(angle)]])
			
 
				+    # poly
			
 
				+    poly = R.dot(rect)
			
 
				+    x0, x1, x2, x3 = poly[0, :4] + x_ctr
			
 
				+    y0, y1, y2, y3 = poly[1, :4] + y_ctr
			
 
				+    poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float64)
			
 
				+    return poly
			
 
				+
			
 
				+
			
 
				+def intersection(g, p):
			
 
				+    """
			
 
				+    Intersection.
			
 
				+    """
			
 
				+
			
 
				+    g = g[:8].reshape((4, 2))
			
 
				+    p = p[:8].reshape((4, 2))
			
 
				+
			
 
				+    a = g
			
 
				+    b = p
			
 
				+
			
 
				+    use_filter = True
			
 
				+    if use_filter:
			
 
				+        # step1:
			
 
				+        inter_x1 = np.maximum(np.min(a[:, 0]), np.min(b[:, 0]))
			
 
				+        inter_x2 = np.minimum(np.max(a[:, 0]), np.max(b[:, 0]))
			
 
				+        inter_y1 = np.maximum(np.min(a[:, 1]), np.min(b[:, 1]))
			
 
				+        inter_y2 = np.minimum(np.max(a[:, 1]), np.max(b[:, 1]))
			
 
				+        if inter_x1 >= inter_x2 or inter_y1 >= inter_y2:
			
 
				+            return 0.
			
 
				+        x1 = np.minimum(np.min(a[:, 0]), np.min(b[:, 0]))
			
 
				+        x2 = np.maximum(np.max(a[:, 0]), np.max(b[:, 0]))
			
 
				+        y1 = np.minimum(np.min(a[:, 1]), np.min(b[:, 1]))
			
 
				+        y2 = np.maximum(np.max(a[:, 1]), np.max(b[:, 1]))
			
 
				+        if x1 >= x2 or y1 >= y2 or (x2 - x1) < 2 or (y2 - y1) < 2:
			
 
				+            return 0.
			
 
				+
			
 
				+    g = Polygon(g)
			
 
				+    p = Polygon(p)
			
 
				+    if not g.is_valid or not p.is_valid:
			
 
				+        return 0
			
 
				+
			
 
				+    inter = Polygon(g).intersection(Polygon(p)).area
			
 
				+    union = g.area + p.area - inter
			
 
				+    if union == 0:
			
 
				+        return 0
			
 
				+    else:
			
 
				+        return inter / union
			
 
				+
			
 
				+
			
 
				+def rbox_overlaps(anchors, gt_bboxes, use_cv2=False):
			
 
				+    """
			
 
				+
			
 
				+    Args:
			
 
				+        anchors: [NA, 5]  x1,y1,x2,y2,angle
			
 
				+        gt_bboxes: [M, 5]  x1,y1,x2,y2,angle
			
 
				+
			
 
				+    Returns:
			
 
				+        iou: [NA, M]
			
 
				+    """
			
 
				+    assert anchors.shape[1] == 5
			
 
				+    assert gt_bboxes.shape[1] == 5
			
 
				+
			
 
				+    gt_bboxes_ploy = [rbox2poly_single(e) for e in gt_bboxes]
			
 
				+    anchors_ploy = [rbox2poly_single(e) for e in anchors]
			
 
				+
			
 
				+    num_gt, num_anchors = len(gt_bboxes_ploy), len(anchors_ploy)
			
 
				+    iou = np.zeros((num_anchors, num_gt), dtype=np.float64)
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    for i in range(num_anchors):
			
 
				+        for j in range(num_gt):
			
 
				+            try:
			
 
				+                iou[i, j] = intersection(anchors_ploy[i], gt_bboxes_ploy[j])
			
 
				+            except Exception as e:
			
 
				+                print('cur anchors_ploy[i]', anchors_ploy[i],
			
 
				+                      'gt_bboxes_ploy[j]', gt_bboxes_ploy[j], e)
			
 
				+    return iou
			
 
				+
			
 
				+
			
 
				+def gen_sample(n):
			
 
				+    rbox = np.random.rand(n, 5)
			
 
				+    rbox[:, 0:4] = rbox[:, 0:4] * 0.45 + 0.001
			
 
				+    rbox[:, 4] = rbox[:, 4] - 0.5
			
 
				+    return rbox
			
 
				+
			
 
				+
			
 
				+class RBoxIoUTest(unittest.TestCase):
			
 
				+    def setUp(self):
			
 
				+        self.initTestCase()
			
 
				+        self.rbox1 = gen_sample(self.n)
			
 
				+        self.rbox2 = gen_sample(self.m)
			
 
				+
			
 
				+    def initTestCase(self):
			
 
				+        self.n = 13000
			
 
				+        self.m = 7
			
 
				+
			
 
				+    def assertAllClose(self, x, y, msg, atol=5e-1, rtol=1e-2):
			
 
				+        self.assertTrue(np.allclose(x, y, atol=atol, rtol=rtol), msg=msg)
			
 
				+
			
 
				+    def get_places(self):
			
 
				+        places = [paddle.CPUPlace()]
			
 
				+        if paddle.device.is_compiled_with_cuda():
			
 
				+            places.append(paddle.CUDAPlace(0))
			
 
				+
			
 
				+        return places
			
 
				+
			
 
				+    def check_output(self, place):
			
 
				+        paddle.disable_static()
			
 
				+        pd_rbox1 = paddle.to_tensor(self.rbox1, place=place)
			
 
				+        pd_rbox2 = paddle.to_tensor(self.rbox2, place=place)
			
 
				+        actual_t = rbox_iou(pd_rbox1, pd_rbox2).numpy()
			
 
				+        poly_rbox1 = self.rbox1
			
 
				+        poly_rbox2 = self.rbox2
			
 
				+        poly_rbox1[:, 0:4] = self.rbox1[:, 0:4] * 1024
			
 
				+        poly_rbox2[:, 0:4] = self.rbox2[:, 0:4] * 1024
			
 
				+        expect_t = rbox_overlaps(poly_rbox1, poly_rbox2, use_cv2=False)
			
 
				+        self.assertAllClose(
			
 
				+            actual_t,
			
 
				+            expect_t,
			
 
				+            msg="rbox_iou has diff at {} \nExpect {}\nBut got {}".format(
			
 
				+                str(place), str(expect_t), str(actual_t)))
			
 
				+
			
 
				+    def test_output(self):
			
 
				+        places = self.get_places()
			
 
				+        for place in places:
			
 
				+            self.check_output(place)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    unittest.main()
			
--- a/paddlers/models/ppdet/metrics/__init__.py
+++ b/paddlers/models/ppdet/metrics/__init__.py
@@ -26,4 +26,4 @@ __all__ = metrics.__all__ + mot_metrics.__all__
 
				 
			
 
				 from . import mcmot_metrics
			
 
				 from .mcmot_metrics import *
			
 
				-__all__ = metrics.__all__ + mcmot_metrics.__all__
			
 
				+__all__ = metrics.__all__ + mcmot_metrics.__all__
			
--- a/paddlers/models/ppdet/metrics/coco_utils.py
+++ b/paddlers/models/ppdet/metrics/coco_utils.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/metrics/json_results.py
+++ b/paddlers/models/ppdet/metrics/json_results.py
@@ -65,6 +65,14 @@ def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
 
				     return det_res
			
 
				 
			
 
				 
			
 
				+def strip_mask(mask):
			
 
				+    row = mask[0, 0, :]
			
 
				+    col = mask[0, :, 0]
			
 
				+    im_h = len(col) - np.count_nonzero(col == -1)
			
 
				+    im_w = len(row) - np.count_nonzero(row == -1)
			
 
				+    return mask[:, :im_h, :im_w]
			
 
				+
			
 
				+
			
 
				 def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
			
 
				     import pycocotools.mask as mask_util
			
 
				     seg_res = []
			
@@ -72,8 +80,10 @@ def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
 
				     for i in range(len(mask_nums)):
			
 
				         cur_image_id = int(image_id[i][0])
			
 
				         det_nums = mask_nums[i]
			
 
				+        mask_i = masks[k:k + det_nums]
			
 
				+        mask_i = strip_mask(mask_i)
			
 
				         for j in range(det_nums):
			
 
				-            mask = masks[k].astype(np.uint8)
			
 
				+            mask = mask_i[j].astype(np.uint8)
			
 
				             score = float(bboxes[k][1])
			
 
				             label = int(bboxes[k][0])
			
 
				             k = k + 1
			
--- a/paddlers/models/ppdet/metrics/keypoint_metrics.py
+++ b/paddlers/models/ppdet/metrics/keypoint_metrics.py
@@ -1,21 +1,22 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 import os
			
 
				 import json
			
 
				 from collections import defaultdict, OrderedDict
			
 
				 import numpy as np
			
 
				+import paddle
			
 
				 from pycocotools.coco import COCO
			
 
				 from pycocotools.cocoeval import COCOeval
			
 
				 from ..modeling.keypoint_utils import oks_nms
			
@@ -70,15 +71,23 @@ class KeyPointTopDownCOCOEval(object):
 
				         self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
			
 
				                                   3] = kpts[:, :, 0:3]
			
 
				         self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[
			
 
				-            'center'].numpy()[:, 0:2]
			
 
				+            'center'].numpy()[:, 0:2] if isinstance(
			
 
				+                inputs['center'], paddle.Tensor) else inputs['center'][:, 0:2]
			
 
				         self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[
			
 
				-            'scale'].numpy()[:, 0:2]
			
 
				+            'scale'].numpy()[:, 0:2] if isinstance(
			
 
				+                inputs['scale'], paddle.Tensor) else inputs['scale'][:, 0:2]
			
 
				         self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
			
 
				-            inputs['scale'].numpy() * 200, 1)
			
 
				-        self.results['all_boxes'][self.idx:self.idx + num_images,
			
 
				-                                  5] = np.squeeze(inputs['score'].numpy())
			
 
				-        self.results['image_path'].extend(inputs['im_id'].numpy())
			
 
				-
			
 
				+            inputs['scale'].numpy() * 200,
			
 
				+            1) if isinstance(inputs['scale'], paddle.Tensor) else np.prod(
			
 
				+                inputs['scale'] * 200, 1)
			
 
				+        self.results['all_boxes'][
			
 
				+            self.idx:self.idx + num_images,
			
 
				+            5] = np.squeeze(inputs['score'].numpy()) if isinstance(
			
 
				+                inputs['score'], paddle.Tensor) else np.squeeze(inputs['score'])
			
 
				+        if isinstance(inputs['im_id'], paddle.Tensor):
			
 
				+            self.results['image_path'].extend(inputs['im_id'].numpy())
			
 
				+        else:
			
 
				+            self.results['image_path'].extend(inputs['im_id'])
			
 
				         self.idx += num_images
			
 
				 
			
 
				     def _write_coco_keypoint_results(self, keypoints):
			
--- a/paddlers/models/ppdet/metrics/map_utils.py
+++ b/paddlers/models/ppdet/metrics/map_utils.py
@@ -22,7 +22,7 @@ import sys
 
				 import numpy as np
			
 
				 import itertools
			
 
				 import paddle
			
 
				-from paddlers.models.ppdet.modeling.bbox_utils import poly2rbox, rbox2poly_np
			
 
				+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_np
			
 
				 
			
 
				 from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				 logger = setup_logger(__name__)
			
@@ -91,15 +91,13 @@ def jaccard_overlap(pred, gt, is_bbox_normalized=False):
 
				     return overlap
			
 
				 
			
 
				 
			
 
				-def calc_rbox_iou(pred, gt_rbox):
			
 
				+def calc_rbox_iou(pred, gt_poly):
			
 
				     """
			
 
				     calc iou between rotated bbox
			
 
				     """
			
 
				     # calc iou of bounding box for speedup
			
 
				-    pred = np.array(pred, np.float32).reshape(-1, 8)
			
 
				-    pred = pred.reshape(-1, 2)
			
 
				-    gt_poly = rbox2poly_np(np.array(gt_rbox).reshape(-1, 5))[0]
			
 
				-    gt_poly = gt_poly.reshape(-1, 2)
			
 
				+    pred = np.array(pred, np.float32).reshape(-1, 2)
			
 
				+    gt_poly = np.array(gt_poly, np.float32).reshape(-1, 2)
			
 
				     pred_rect = [
			
 
				         np.min(pred[:, 0]), np.min(pred[:, 1]), np.max(pred[:, 0]),
			
 
				         np.max(pred[:, 1])
			
@@ -114,20 +112,15 @@ def calc_rbox_iou(pred, gt_rbox):
 
				         return iou
			
 
				 
			
 
				     # calc rbox iou
			
 
				-    pred = pred.reshape(-1, 8)
			
 
				-
			
 
				-    pred = np.array(pred, np.float32).reshape(-1, 8)
			
 
				-    pred_rbox = poly2rbox(pred)
			
 
				-    pred_rbox = pred_rbox.reshape(-1, 5)
			
 
				-    pred_rbox = pred_rbox.reshape(-1, 5)
			
 
				+    pred_rbox = poly2rbox_np(pred.reshape(-1, 8)).reshape(-1, 5)
			
 
				+    gt_rbox = poly2rbox_np(gt_poly.reshape(-1, 8)).reshape(-1, 5)
			
 
				     try:
			
 
				-        from rbox_iou_ops import rbox_iou
			
 
				+        from ext_op import rbox_iou
			
 
				     except Exception as e:
			
 
				-        print("import custom_ops error, try install rbox_iou_ops " \
			
 
				+        print("import custom_ops error, try install ext_op " \
			
 
				                   "following ppdet/ext_op/README.md", e)
			
 
				         sys.stdout.flush()
			
 
				         sys.exit(-1)
			
 
				-    gt_rbox = np.array(gt_rbox, np.float32).reshape(-1, 5)
			
 
				     pd_gt_rbox = paddle.to_tensor(gt_rbox, dtype='float32')
			
 
				     pd_pred_rbox = paddle.to_tensor(pred_rbox, dtype='float32')
			
 
				     iou = rbox_iou(pd_gt_rbox, pd_pred_rbox)
			
@@ -138,8 +131,7 @@ def calc_rbox_iou(pred, gt_rbox):
 
				 def prune_zero_padding(gt_box, gt_label, difficult=None):
			
 
				     valid_cnt = 0
			
 
				     for i in range(len(gt_box)):
			
 
				-        if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
			
 
				-                gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
			
 
				+        if (gt_box[i] == 0).all():
			
 
				             break
			
 
				         valid_cnt += 1
			
 
				     return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
			
@@ -154,8 +146,8 @@ class DetectionMAP(object):
 
				     Args:
			
 
				         class_num (int): The class number.
			
 
				         overlap_thresh (float): The threshold of overlap
			
 
				-            ratio between prediction bounding box and
			
 
				-            ground truth bounding box for deciding
			
 
				+            ratio between prediction bounding box and 
			
 
				+            ground truth bounding box for deciding 
			
 
				             true/false positive. Default 0.5.
			
 
				         map_type (str): Calculation method of mean average
			
 
				             precision, currently support '11point' and
			
@@ -212,7 +204,7 @@ class DetectionMAP(object):
 
				             max_overlap = -1.0
			
 
				             for i, gl in enumerate(gt_label):
			
 
				                 if int(gl) == int(l):
			
 
				-                    if len(gt_box[i]) == 5:
			
 
				+                    if len(gt_box[i]) == 8:
			
 
				                         overlap = calc_rbox_iou(pred, gt_box[i])
			
 
				                     else:
			
 
				                         overlap = jaccard_overlap(pred, gt_box[i],
			
@@ -363,7 +355,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls):
 
				     """
			
 
				     Computes the average precision, given the recall and precision curves.
			
 
				     Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
			
 
				-
			
 
				+    
			
 
				     Args:
			
 
				         tp (list): True positives.
			
 
				         conf (list): Objectness value from 0-1.
			
@@ -417,7 +409,7 @@ def compute_ap(recall, precision):
 
				     """
			
 
				     Computes the average precision, given the recall and precision curves.
			
 
				     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
			
 
				-
			
 
				+    
			
 
				     Args:
			
 
				         recall (list): The recall curve.
			
 
				         precision (list): The precision curve.
			
--- a/paddlers/models/ppdet/metrics/mcmot_metrics.py
+++ b/paddlers/models/ppdet/metrics/mcmot_metrics.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -21,18 +21,21 @@ import copy
 
				 import sys
			
 
				 import math
			
 
				 from collections import defaultdict
			
 
				-from motmetrics.math_util import quiet_divide
			
 
				 
			
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				 
			
 
				-import paddle
			
 
				-import paddle.nn.functional as F
			
 
				 from .metrics import Metric
			
 
				-import motmetrics as mm
			
 
				-import openpyxl
			
 
				-metrics = mm.metrics.motchallenge_metrics
			
 
				-mh = mm.metrics.create()
			
 
				+try:
			
 
				+    import motmetrics as mm
			
 
				+    from motmetrics.math_util import quiet_divide
			
 
				+    metrics = mm.metrics.motchallenge_metrics
			
 
				+    mh = mm.metrics.create()
			
 
				+except:
			
 
				+    print(
			
 
				+        'Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
			
 
				+    )
			
 
				+    pass
			
 
				 from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				 logger = setup_logger(__name__)
			
 
				 
			
@@ -78,7 +81,7 @@ NAME_MAP = {
 
				 
			
 
				 def parse_accs_metrics(seq_acc, index_name, verbose=False):
			
 
				     """
			
 
				-    Parse the evaluation indicators of multiple MOTAccumulator
			
 
				+    Parse the evaluation indicators of multiple MOTAccumulator 
			
 
				     """
			
 
				     mh = mm.metrics.create()
			
 
				     summary = MCMOTEvaluator.get_summary(seq_acc, index_name, METRICS_LIST)
			
@@ -302,24 +305,30 @@ class MCMOTEvaluator(object):
 
				         self.num_classes = num_classes
			
 
				 
			
 
				         self.load_annotations()
			
 
				+        try:
			
 
				+            import motmetrics as mm
			
 
				+            mm.lap.default_solver = 'lap'
			
 
				+        except Exception as e:
			
 
				+            raise RuntimeError(
			
 
				+                'Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
			
 
				+            )
			
 
				         self.reset_accumulator()
			
 
				 
			
 
				         self.class_accs = []
			
 
				 
			
 
				     def load_annotations(self):
			
 
				         assert self.data_type == 'mcmot'
			
 
				-        self.gt_filename = os.path.join(self.data_root, '../', '../',
			
 
				-                                        'sequences',
			
 
				+        self.gt_filename = os.path.join(self.data_root, '../', 'sequences',
			
 
				                                         '{}.txt'.format(self.seq_name))
			
 
				+        if not os.path.exists(self.gt_filename):
			
 
				+            logger.warning(
			
 
				+                "gt_filename '{}' of MCMOTEvaluator is not exist, so the MOTA will be -INF."
			
 
				+            )
			
 
				 
			
 
				     def reset_accumulator(self):
			
 
				-        import motmetrics as mm
			
 
				-        mm.lap.default_solver = 'lap'
			
 
				         self.acc = mm.MOTAccumulator(auto_id=True)
			
 
				 
			
 
				     def eval_frame_dict(self, trk_objs, gt_objs, rtn_events=False, union=False):
			
 
				-        import motmetrics as mm
			
 
				-        mm.lap.default_solver = 'lap'
			
 
				         if union:
			
 
				             trk_tlwhs, trk_ids, trk_cls = unzip_objs_cls(trk_objs)[:3]
			
 
				             gt_tlwhs, gt_ids, gt_cls = unzip_objs_cls(gt_objs)[:3]
			
@@ -393,9 +402,6 @@ class MCMOTEvaluator(object):
 
				                     names,
			
 
				                     metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
			
 
				                              'precision', 'recall')):
			
 
				-        import motmetrics as mm
			
 
				-        mm.lap.default_solver = 'lap'
			
 
				-
			
 
				         names = copy.deepcopy(names)
			
 
				         if metrics is None:
			
 
				             metrics = mm.metrics.motchallenge_metrics
			
--- a/paddlers/models/ppdet/metrics/metrics.py
+++ b/paddlers/models/ppdet/metrics/metrics.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -22,11 +22,14 @@ import json
 
				 import paddle
			
 
				 import numpy as np
			
 
				 import typing
			
 
				+from collections import defaultdict
			
 
				+from pathlib import Path
			
 
				 
			
 
				 from .map_utils import prune_zero_padding, DetectionMAP
			
 
				 from .coco_utils import get_infer_results, cocoapi_eval
			
 
				 from .widerface_utils import face_eval_run
			
 
				 from paddlers.models.ppdet.data.source.category import get_categories
			
 
				+from paddlers.models.ppdet.modeling.rbox_utils import poly2rbox_np
			
 
				 
			
 
				 from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				 logger = setup_logger(__name__)
			
@@ -69,8 +72,6 @@ class Metric(paddle.metric.Metric):
 
				 
			
 
				 class COCOMetric(Metric):
			
 
				     def __init__(self, anno_file, **kwargs):
			
 
				-        assert os.path.isfile(anno_file), \
			
 
				-                "anno_file {} not a file".format(anno_file)
			
 
				         self.anno_file = anno_file
			
 
				         self.clsid2catid = kwargs.get('clsid2catid', None)
			
 
				         if self.clsid2catid is None:
			
@@ -81,6 +82,14 @@ class COCOMetric(Metric):
 
				         self.bias = kwargs.get('bias', 0)
			
 
				         self.save_prediction_only = kwargs.get('save_prediction_only', False)
			
 
				         self.iou_type = kwargs.get('IouType', 'bbox')
			
 
				+
			
 
				+        if not self.save_prediction_only:
			
 
				+            assert os.path.isfile(anno_file), \
			
 
				+                    "anno_file {} not a file".format(anno_file)
			
 
				+
			
 
				+        if self.output_eval is not None:
			
 
				+            Path(self.output_eval).mkdir(exist_ok=True)
			
 
				+
			
 
				         self.reset()
			
 
				 
			
 
				     def reset(self):
			
@@ -218,7 +227,9 @@ class VOCMetric(Metric):
 
				                  map_type='11point',
			
 
				                  is_bbox_normalized=False,
			
 
				                  evaluate_difficult=False,
			
 
				-                 classwise=False):
			
 
				+                 classwise=False,
			
 
				+                 output_eval=None,
			
 
				+                 save_prediction_only=False):
			
 
				         assert os.path.isfile(label_list), \
			
 
				                 "label_list {} not a file".format(label_list)
			
 
				         self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
			
@@ -226,6 +237,8 @@ class VOCMetric(Metric):
 
				         self.overlap_thresh = overlap_thresh
			
 
				         self.map_type = map_type
			
 
				         self.evaluate_difficult = evaluate_difficult
			
 
				+        self.output_eval = output_eval
			
 
				+        self.save_prediction_only = save_prediction_only
			
 
				         self.detection_map = DetectionMAP(
			
 
				             class_num=class_num,
			
 
				             overlap_thresh=overlap_thresh,
			
@@ -238,34 +251,52 @@ class VOCMetric(Metric):
 
				         self.reset()
			
 
				 
			
 
				     def reset(self):
			
 
				+        self.results = {'bbox': [], 'score': [], 'label': []}
			
 
				         self.detection_map.reset()
			
 
				 
			
 
				     def update(self, inputs, outputs):
			
 
				-        bbox_np = outputs['bbox'].numpy()
			
 
				+        bbox_np = outputs['bbox'].numpy() if isinstance(
			
 
				+            outputs['bbox'], paddle.Tensor) else outputs['bbox']
			
 
				         bboxes = bbox_np[:, 2:]
			
 
				         scores = bbox_np[:, 1]
			
 
				         labels = bbox_np[:, 0]
			
 
				-        bbox_lengths = outputs['bbox_num'].numpy()
			
 
				+        bbox_lengths = outputs['bbox_num'].numpy() if isinstance(
			
 
				+            outputs['bbox_num'], paddle.Tensor) else outputs['bbox_num']
			
 
				+
			
 
				+        self.results['bbox'].append(bboxes.tolist())
			
 
				+        self.results['score'].append(scores.tolist())
			
 
				+        self.results['label'].append(labels.tolist())
			
 
				 
			
 
				         if bboxes.shape == (1, 1) or bboxes is None:
			
 
				             return
			
 
				+        if self.save_prediction_only:
			
 
				+            return
			
 
				+
			
 
				         gt_boxes = inputs['gt_bbox']
			
 
				         gt_labels = inputs['gt_class']
			
 
				         difficults = inputs['difficult'] if not self.evaluate_difficult \
			
 
				                             else None
			
 
				 
			
 
				-        scale_factor = inputs['scale_factor'].numpy(
			
 
				-        ) if 'scale_factor' in inputs else np.ones(
			
 
				-            (gt_boxes.shape[0], 2)).astype('float32')
			
 
				+        if 'scale_factor' in inputs:
			
 
				+            scale_factor = inputs['scale_factor'].numpy() if isinstance(
			
 
				+                inputs['scale_factor'],
			
 
				+                paddle.Tensor) else inputs['scale_factor']
			
 
				+        else:
			
 
				+            scale_factor = np.ones((gt_boxes.shape[0], 2)).astype('float32')
			
 
				 
			
 
				         bbox_idx = 0
			
 
				         for i in range(len(gt_boxes)):
			
 
				-            gt_box = gt_boxes[i].numpy()
			
 
				+            gt_box = gt_boxes[i].numpy() if isinstance(
			
 
				+                gt_boxes[i], paddle.Tensor) else gt_boxes[i]
			
 
				             h, w = scale_factor[i]
			
 
				             gt_box = gt_box / np.array([w, h, w, h])
			
 
				-            gt_label = gt_labels[i].numpy()
			
 
				-            difficult = None if difficults is None \
			
 
				-                            else difficults[i].numpy()
			
 
				+            gt_label = gt_labels[i].numpy() if isinstance(
			
 
				+                gt_labels[i], paddle.Tensor) else gt_labels[i]
			
 
				+            if difficults is not None:
			
 
				+                difficult = difficults[i].numpy() if isinstance(
			
 
				+                    difficults[i], paddle.Tensor) else difficults[i]
			
 
				+            else:
			
 
				+                difficult = None
			
 
				             bbox_num = bbox_lengths[i]
			
 
				             bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
			
 
				             score = scores[bbox_idx:bbox_idx + bbox_num]
			
@@ -277,6 +308,15 @@ class VOCMetric(Metric):
 
				             bbox_idx += bbox_num
			
 
				 
			
 
				     def accumulate(self):
			
 
				+        output = "bbox.json"
			
 
				+        if self.output_eval:
			
 
				+            output = os.path.join(self.output_eval, output)
			
 
				+            with open(output, 'w') as f:
			
 
				+                json.dump(self.results, f)
			
 
				+                logger.info('The bbox result is saved to bbox.json.')
			
 
				+        if self.save_prediction_only:
			
 
				+            return
			
 
				+
			
 
				         logger.info("Accumulating evaluatation results...")
			
 
				         self.detection_map.accumulate()
			
 
				 
			
@@ -309,25 +349,16 @@ class WiderFaceMetric(Metric):
 
				 
			
 
				 class RBoxMetric(Metric):
			
 
				     def __init__(self, anno_file, **kwargs):
			
 
				-        assert os.path.isfile(anno_file), \
			
 
				-                "anno_file {} not a file".format(anno_file)
			
 
				-        assert os.path.exists(anno_file), "anno_file {} not exists".format(
			
 
				-            anno_file)
			
 
				         self.anno_file = anno_file
			
 
				-        self.gt_anno = json.load(open(self.anno_file))
			
 
				-        cats = self.gt_anno['categories']
			
 
				-        self.clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
			
 
				-        self.catid2clsid = {cat['id']: i for i, cat in enumerate(cats)}
			
 
				-        self.catid2name = {cat['id']: cat['name'] for cat in cats}
			
 
				+        self.clsid2catid, self.catid2name = get_categories('COCO', anno_file)
			
 
				+        self.catid2clsid = {v: k for k, v in self.clsid2catid.items()}
			
 
				         self.classwise = kwargs.get('classwise', False)
			
 
				         self.output_eval = kwargs.get('output_eval', None)
			
 
				-        # TODO: bias should be unified
			
 
				-        self.bias = kwargs.get('bias', 0)
			
 
				         self.save_prediction_only = kwargs.get('save_prediction_only', False)
			
 
				-        self.iou_type = kwargs.get('IouType', 'bbox')
			
 
				         self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
			
 
				         self.map_type = kwargs.get('map_type', '11point')
			
 
				         self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
			
 
				+        self.imid2path = kwargs.get('imid2path', None)
			
 
				         class_num = len(self.catid2name)
			
 
				         self.detection_map = DetectionMAP(
			
 
				             class_num=class_num,
			
@@ -341,7 +372,7 @@ class RBoxMetric(Metric):
 
				         self.reset()
			
 
				 
			
 
				     def reset(self):
			
 
				-        self.result_bbox = []
			
 
				+        self.results = []
			
 
				         self.detection_map.reset()
			
 
				 
			
 
				     def update(self, inputs, outputs):
			
@@ -351,43 +382,83 @@ class RBoxMetric(Metric):
 
				             outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
			
 
				 
			
 
				         im_id = inputs['im_id']
			
 
				-        outs['im_id'] = im_id.numpy() if isinstance(im_id,
			
 
				-                                                    paddle.Tensor) else im_id
			
 
				+        im_id = im_id.numpy() if isinstance(im_id, paddle.Tensor) else im_id
			
 
				+        outs['im_id'] = im_id
			
 
				 
			
 
				-        infer_results = get_infer_results(
			
 
				-            outs, self.clsid2catid, bias=self.bias)
			
 
				-        self.result_bbox += infer_results[
			
 
				-            'bbox'] if 'bbox' in infer_results else []
			
 
				-        bbox = [b['bbox'] for b in self.result_bbox]
			
 
				-        score = [b['score'] for b in self.result_bbox]
			
 
				-        label = [b['category_id'] for b in self.result_bbox]
			
 
				-        label = [self.catid2clsid[e] for e in label]
			
 
				-        gt_box = [
			
 
				-            e['bbox'] for e in self.gt_anno['annotations']
			
 
				-            if e['image_id'] == outs['im_id']
			
 
				-        ]
			
 
				-        gt_label = [
			
 
				-            e['category_id'] for e in self.gt_anno['annotations']
			
 
				-            if e['image_id'] == outs['im_id']
			
 
				-        ]
			
 
				-        gt_label = [self.catid2clsid[e] for e in gt_label]
			
 
				-        self.detection_map.update(bbox, score, label, gt_box, gt_label)
			
 
				+        infer_results = get_infer_results(outs, self.clsid2catid)
			
 
				+        infer_results = infer_results['bbox'] if 'bbox' in infer_results else []
			
 
				+        self.results += infer_results
			
 
				+        if self.save_prediction_only:
			
 
				+            return
			
 
				 
			
 
				-    def accumulate(self):
			
 
				-        if len(self.result_bbox) > 0:
			
 
				-            output = "bbox.json"
			
 
				-            if self.output_eval:
			
 
				-                output = os.path.join(self.output_eval, output)
			
 
				+        gt_boxes = inputs['gt_poly']
			
 
				+        gt_labels = inputs['gt_class']
			
 
				+
			
 
				+        if 'scale_factor' in inputs:
			
 
				+            scale_factor = inputs['scale_factor'].numpy() if isinstance(
			
 
				+                inputs['scale_factor'],
			
 
				+                paddle.Tensor) else inputs['scale_factor']
			
 
				+        else:
			
 
				+            scale_factor = np.ones((gt_boxes.shape[0], 2)).astype('float32')
			
 
				+
			
 
				+        for i in range(len(gt_boxes)):
			
 
				+            gt_box = gt_boxes[i].numpy() if isinstance(
			
 
				+                gt_boxes[i], paddle.Tensor) else gt_boxes[i]
			
 
				+            h, w = scale_factor[i]
			
 
				+            gt_box = gt_box / np.array([w, h, w, h, w, h, w, h])
			
 
				+            gt_label = gt_labels[i].numpy() if isinstance(
			
 
				+                gt_labels[i], paddle.Tensor) else gt_labels[i]
			
 
				+            gt_box, gt_label, _ = prune_zero_padding(gt_box, gt_label)
			
 
				+            bbox = [
			
 
				+                res['bbox'] for res in infer_results
			
 
				+                if int(res['image_id']) == int(im_id[i])
			
 
				+            ]
			
 
				+            score = [
			
 
				+                res['score'] for res in infer_results
			
 
				+                if int(res['image_id']) == int(im_id[i])
			
 
				+            ]
			
 
				+            label = [
			
 
				+                self.catid2clsid[int(res['category_id'])]
			
 
				+                for res in infer_results
			
 
				+                if int(res['image_id']) == int(im_id[i])
			
 
				+            ]
			
 
				+            self.detection_map.update(bbox, score, label, gt_box, gt_label)
			
 
				+
			
 
				+    def save_results(self, results, output_dir, imid2path):
			
 
				+        if imid2path:
			
 
				+            data_dicts = defaultdict(list)
			
 
				+            for result in results:
			
 
				+                image_id = result['image_id']
			
 
				+                data_dicts[image_id].append(result)
			
 
				+
			
 
				+            for image_id, image_path in imid2path.items():
			
 
				+                basename = os.path.splitext(os.path.split(image_path)[-1])[0]
			
 
				+                output = os.path.join(output_dir, "{}.txt".format(basename))
			
 
				+                dets = data_dicts.get(image_id, [])
			
 
				+                with open(output, 'w') as f:
			
 
				+                    for det in dets:
			
 
				+                        catid, bbox, score = det['category_id'], det[
			
 
				+                            'bbox'], det['score']
			
 
				+                        bbox_pred = '{} {} '.format(self.catid2name[catid],
			
 
				+                                                    score) + ' '.join(
			
 
				+                                                        [str(e) for e in bbox])
			
 
				+                        f.write(bbox_pred + '\n')
			
 
				+
			
 
				+            logger.info('The bbox result is saved to {}.'.format(output_dir))
			
 
				+        else:
			
 
				+            output = os.path.join(output_dir, "bbox.json")
			
 
				             with open(output, 'w') as f:
			
 
				-                json.dump(self.result_bbox, f)
			
 
				-                logger.info('The bbox result is saved to bbox.json.')
			
 
				+                json.dump(results, f)
			
 
				 
			
 
				-            if self.save_prediction_only:
			
 
				-                logger.info('The bbox result is saved to {} and do not '
			
 
				-                            'evaluate the mAP.'.format(output))
			
 
				-            else:
			
 
				-                logger.info("Accumulating evaluatation results...")
			
 
				-                self.detection_map.accumulate()
			
 
				+            logger.info('The bbox result is saved to {}.'.format(output))
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        if self.output_eval:
			
 
				+            self.save_results(self.results, self.output_eval, self.imid2path)
			
 
				+
			
 
				+        if not self.save_prediction_only:
			
 
				+            logger.info("Accumulating evaluatation results...")
			
 
				+            self.detection_map.accumulate()
			
 
				 
			
 
				     def log(self):
			
 
				         map_stat = 100. * self.detection_map.get_map()
			
--- a/paddlers/models/ppdet/metrics/mot_metrics.py
+++ b/paddlers/models/ppdet/metrics/mot_metrics.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -22,13 +22,21 @@ import sys
 
				 import math
			
 
				 from collections import defaultdict
			
 
				 import numpy as np
			
 
				-import paddle
			
 
				-import paddle.nn.functional as F
			
 
				+
			
 
				 from paddlers.models.ppdet.modeling.bbox_utils import bbox_iou_np_expand
			
 
				 from .map_utils import ap_per_class
			
 
				 from .metrics import Metric
			
 
				 from .munkres import Munkres
			
 
				 
			
 
				+try:
			
 
				+    import motmetrics as mm
			
 
				+    mm.lap.default_solver = 'lap'
			
 
				+except:
			
 
				+    print(
			
 
				+        'Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
			
 
				+    )
			
 
				+    pass
			
 
				+
			
 
				 from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				 logger = setup_logger(__name__)
			
 
				 
			
@@ -36,8 +44,13 @@ __all__ = ['MOTEvaluator', 'MOTMetric', 'JDEDetMetric', 'KITTIMOTMetric']
 
				 
			
 
				 
			
 
				 def read_mot_results(filename, is_gt=False, is_ignore=False):
			
 
				-    valid_labels = {1}
			
 
				-    ignore_labels = {2, 7, 8, 12}  # only in motchallenge datasets like 'MOT16'
			
 
				+    valid_label = [1]
			
 
				+    ignore_labels = [2, 7, 8, 12]  # only in motchallenge datasets like 'MOT16'
			
 
				+    if is_gt:
			
 
				+        logger.info(
			
 
				+            "In MOT16/17 dataset the valid_label of ground truth is '{}', "
			
 
				+            "in other dataset it should be '0' for single classs MOT.".format(
			
 
				+                valid_label[0]))
			
 
				     results_dict = dict()
			
 
				     if os.path.isfile(filename):
			
 
				         with open(filename, 'r') as f:
			
@@ -50,12 +63,10 @@ def read_mot_results(filename, is_gt=False, is_ignore=False):
 
				                     continue
			
 
				                 results_dict.setdefault(fid, list())
			
 
				 
			
 
				-                box_size = float(linelist[4]) * float(linelist[5])
			
 
				-
			
 
				                 if is_gt:
			
 
				                     label = int(float(linelist[7]))
			
 
				                     mark = int(float(linelist[6]))
			
 
				-                    if mark == 0 or label not in valid_labels:
			
 
				+                    if mark == 0 or label not in valid_label:
			
 
				                         continue
			
 
				                     score = 1
			
 
				                 elif is_ignore:
			
@@ -112,24 +123,31 @@ class MOTEvaluator(object):
 
				         self.data_type = data_type
			
 
				 
			
 
				         self.load_annotations()
			
 
				+        try:
			
 
				+            import motmetrics as mm
			
 
				+            mm.lap.default_solver = 'lap'
			
 
				+        except Exception as e:
			
 
				+            raise RuntimeError(
			
 
				+                'Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
			
 
				+            )
			
 
				         self.reset_accumulator()
			
 
				 
			
 
				     def load_annotations(self):
			
 
				         assert self.data_type == 'mot'
			
 
				         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
			
 
				                                    'gt.txt')
			
 
				+        if not os.path.exists(gt_filename):
			
 
				+            logger.warning(
			
 
				+                "gt_filename '{}' of MOTEvaluator is not exist, so the MOTA will be -INF."
			
 
				+            )
			
 
				         self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
			
 
				         self.gt_ignore_frame_dict = read_mot_results(
			
 
				             gt_filename, is_ignore=True)
			
 
				 
			
 
				     def reset_accumulator(self):
			
 
				-        import motmetrics as mm
			
 
				-        mm.lap.default_solver = 'lap'
			
 
				         self.acc = mm.MOTAccumulator(auto_id=True)
			
 
				 
			
 
				     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
			
 
				-        import motmetrics as mm
			
 
				-        mm.lap.default_solver = 'lap'
			
 
				         # results
			
 
				         trk_tlwhs = np.copy(trk_tlwhs)
			
 
				         trk_ids = np.copy(trk_ids)
			
@@ -187,8 +205,6 @@ class MOTEvaluator(object):
 
				                     names,
			
 
				                     metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
			
 
				                              'precision', 'recall')):
			
 
				-        import motmetrics as mm
			
 
				-        mm.lap.default_solver = 'lap'
			
 
				         names = copy.deepcopy(names)
			
 
				         if metrics is None:
			
 
				             metrics = mm.metrics.motchallenge_metrics
			
@@ -225,8 +241,6 @@ class MOTMetric(Metric):
 
				         self.result_root = result_root
			
 
				 
			
 
				     def accumulate(self):
			
 
				-        import motmetrics as mm
			
 
				-        import openpyxl
			
 
				         metrics = mm.metrics.motchallenge_metrics
			
 
				         mh = mm.metrics.create()
			
 
				         summary = self.MOTEvaluator.get_summary(self.accs, self.seqs, metrics)
			
@@ -422,7 +436,7 @@ class KITTIEvaluation(object):
 
				         self.ifn = 0  # number of ignored false negatives
			
 
				         self.ifns = []  # number of ignored false negatives PER SEQUENCE
			
 
				         self.fp = 0  # number of false positives
			
 
				-        # a bit tricky, the number of ignored false negatives and ignored true positives
			
 
				+        # a bit tricky, the number of ignored false negatives and ignored true positives 
			
 
				         # is subtracted, but if both tracker detection and ground truth detection
			
 
				         # are ignored this number is added again to avoid double counting
			
 
				         self.fps = []  # above PER SEQUENCE
			
@@ -551,7 +565,7 @@ class KITTIEvaluation(object):
 
				                             "track ids are not unique for sequence %d: frame %d"
			
 
				                             % (seq, t_data.frame))
			
 
				                         logger.info(
			
 
				-                            "track id %d occured at least twice for this frame"
			
 
				+                            "track id %d occurred at least twice for this frame"
			
 
				                             % t_data.track_id)
			
 
				                         logger.info("Exiting...")
			
 
				                         #continue # this allows to evaluate non-unique result files
			
--- a/paddlers/models/ppdet/metrics/munkres.py
+++ b/paddlers/models/ppdet/metrics/munkres.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 """
			
 
				 This code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/munkres.py
			
--- a/paddlers/models/ppdet/model_zoo/.gitignore
+++ b/paddlers/models/ppdet/model_zoo/.gitignore
@@ -0,0 +1 @@
 
				+MODEL_ZOO
			
--- a/paddlers/models/ppdet/model_zoo/__init__.py
+++ b/paddlers/models/ppdet/model_zoo/__init__.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from . import model_zoo
			
--- a/paddlers/models/ppdet/model_zoo/model_zoo.py
+++ b/paddlers/models/ppdet/model_zoo/model_zoo.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 import os.path as osp
			
--- a/paddlers/models/ppdet/model_zoo/tests/__init__.py
+++ b/paddlers/models/ppdet/model_zoo/tests/__init__.py
@@ -0,0 +1,13 @@
 
				+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
--- a/paddlers/models/ppdet/model_zoo/tests/test_get_model.py
+++ b/paddlers/models/ppdet/model_zoo/tests/test_get_model.py
@@ -0,0 +1,48 @@
 
				+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import paddle
			
 
				+import paddlers.models.ppdet as ppdet
			
 
				+import unittest
			
 
				+
			
 
				+# NOTE: weights downloading costs time, we choose
			
 
				+#       a small model for unittesting
			
 
				+MODEL_NAME = 'ppyolo/ppyolo_tiny_650e_coco'
			
 
				+
			
 
				+
			
 
				+class TestGetConfigFile(unittest.TestCase):
			
 
				+    def test_main(self):
			
 
				+        try:
			
 
				+            cfg_file = ppdet.model_zoo.get_config_file(MODEL_NAME)
			
 
				+            assert os.path.isfile(cfg_file)
			
 
				+        except:
			
 
				+            self.assertTrue(False)
			
 
				+
			
 
				+
			
 
				+class TestGetModel(unittest.TestCase):
			
 
				+    def test_main(self):
			
 
				+        try:
			
 
				+            model = ppdet.model_zoo.get_model(MODEL_NAME)
			
 
				+            assert isinstance(model, paddle.nn.Layer)
			
 
				+        except:
			
 
				+            self.assertTrue(False)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    unittest.main()
			
--- a/paddlers/models/ppdet/model_zoo/tests/test_list_model.py
+++ b/paddlers/models/ppdet/model_zoo/tests/test_list_model.py
@@ -0,0 +1,68 @@
 
				+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import unittest
			
 
				+import paddlers.models.ppdet as ppdet
			
 
				+
			
 
				+
			
 
				+class TestListModel(unittest.TestCase):
			
 
				+    def setUp(self):
			
 
				+        self._filter = []
			
 
				+
			
 
				+    def test_main(self):
			
 
				+        try:
			
 
				+            ppdet.model_zoo.list_model(self._filter)
			
 
				+            self.assertTrue(True)
			
 
				+        except:
			
 
				+            self.assertTrue(False)
			
 
				+
			
 
				+
			
 
				+class TestListModelYOLO(TestListModel):
			
 
				+    def setUp(self):
			
 
				+        self._filter = ['yolo']
			
 
				+
			
 
				+
			
 
				+class TestListModelRCNN(TestListModel):
			
 
				+    def setUp(self):
			
 
				+        self._filter = ['rcnn']
			
 
				+
			
 
				+
			
 
				+class TestListModelSSD(TestListModel):
			
 
				+    def setUp(self):
			
 
				+        self._filter = ['ssd']
			
 
				+
			
 
				+
			
 
				+class TestListModelMultiFilter(TestListModel):
			
 
				+    def setUp(self):
			
 
				+        self._filter = ['yolo', 'darknet']
			
 
				+
			
 
				+
			
 
				+class TestListModelError(unittest.TestCase):
			
 
				+    def setUp(self):
			
 
				+        self._filter = ['xxx']
			
 
				+
			
 
				+    def test_main(self):
			
 
				+        try:
			
 
				+            ppdet.model_zoo.list_model(self._filter)
			
 
				+            self.assertTrue(False)
			
 
				+        except ValueError:
			
 
				+            self.assertTrue(True)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    unittest.main()
			
--- a/paddlers/models/ppdet/modeling/__init__.py
+++ b/paddlers/models/ppdet/modeling/__init__.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 import warnings
			
@@ -29,6 +29,7 @@ from . import reid
 
				 from . import mot
			
 
				 from . import transformers
			
 
				 from . import assigners
			
 
				+from . import rbox_utils
			
 
				 
			
 
				 from .ops import *
			
 
				 from .backbones import *
			
@@ -43,3 +44,4 @@ from .reid import *
 
				 from .mot import *
			
 
				 from .transformers import *
			
 
				 from .assigners import *
			
 
				+from .rbox_utils import *
			
--- a/paddlers/models/ppdet/modeling/architectures/__init__.py
+++ b/paddlers/models/ppdet/modeling/architectures/__init__.py
@@ -1,10 +1,17 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				+# limitations under the License.
			
 
				+
			
 
				 from . import meta_arch
			
 
				 from . import faster_rcnn
			
 
				 from . import mask_rcnn
			
@@ -26,6 +33,9 @@ from . import picodet
 
				 from . import detr
			
 
				 from . import sparse_rcnn
			
 
				 from . import tood
			
 
				+from . import retinanet
			
 
				+from . import bytetrack
			
 
				+from . import yolox
			
 
				 
			
 
				 from .meta_arch import *
			
 
				 from .faster_rcnn import *
			
@@ -49,3 +59,6 @@ from .picodet import *
 
				 from .detr import *
			
 
				 from .sparse_rcnn import *
			
 
				 from .tood import *
			
 
				+from .retinanet import *
			
 
				+from .bytetrack import *
			
 
				+from .yolox import *
			
--- a/paddlers/models/ppdet/modeling/architectures/bytetrack.py
+++ b/paddlers/models/ppdet/modeling/architectures/bytetrack.py
@@ -0,0 +1,79 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+# 
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['ByteTrack']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class ByteTrack(BaseArch):
			
 
				+    """
			
 
				+    ByteTrack network, see https://arxiv.org/abs/2110.06864
			
 
				+
			
 
				+    Args:
			
 
				+        detector (object): detector model instance
			
 
				+        reid (object): reid model instance, default None
			
 
				+        tracker (object): tracker instance
			
 
				+    """
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self, detector='YOLOX', reid=None, tracker='JDETracker'):
			
 
				+        super(ByteTrack, self).__init__()
			
 
				+        self.detector = detector
			
 
				+        self.reid = reid
			
 
				+        self.tracker = tracker
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        detector = create(cfg['detector'])
			
 
				+
			
 
				+        if cfg['reid'] != 'None':
			
 
				+            reid = create(cfg['reid'])
			
 
				+        else:
			
 
				+            reid = None
			
 
				+
			
 
				+        tracker = create(cfg['tracker'])
			
 
				+
			
 
				+        return {
			
 
				+            "detector": detector,
			
 
				+            "reid": reid,
			
 
				+            "tracker": tracker,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        det_outs = self.detector(self.inputs)
			
 
				+
			
 
				+        if self.training:
			
 
				+            return det_outs
			
 
				+        else:
			
 
				+            if self.reid is not None:
			
 
				+                assert 'crops' in self.inputs
			
 
				+                crops = self.inputs['crops']
			
 
				+                pred_embs = self.reid(crops)
			
 
				+            else:
			
 
				+                pred_embs = None
			
 
				+            det_outs['embeddings'] = pred_embs
			
 
				+            return det_outs
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        return self._forward()
			
--- a/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -111,14 +111,14 @@ class CascadeRCNN(BaseArch):
 
				             bbox, bbox_num = self.bbox_post_process(
			
 
				                 preds, (refined_rois, rois_num), im_shape, scale_factor)
			
 
				             # rescale the prediction back to origin image
			
 
				-            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
			
 
				-                                                        im_shape, scale_factor)
			
 
				+            bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
			
 
				+                bbox, bbox_num, im_shape, scale_factor)
			
 
				             if not self.with_mask:
			
 
				                 return bbox_pred, bbox_num, None
			
 
				             mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
			
 
				             origin_shape = self.bbox_post_process.get_origin_shape()
			
 
				-            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
			
 
				-                                               bbox_num, origin_shape)
			
 
				+            mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
			
 
				+                                               origin_shape)
			
 
				             return bbox_pred, bbox_num, mask_pred
			
 
				 
			
 
				     def get_loss(self, ):
			
--- a/paddlers/models/ppdet/modeling/architectures/centernet.py
+++ b/paddlers/models/ppdet/modeling/architectures/centernet.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/modeling/architectures/deepsort.py
+++ b/paddlers/models/ppdet/modeling/architectures/deepsort.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+# 
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -62,8 +62,9 @@ class DeepSORT(BaseArch):
 
				 
			
 
				     def _forward(self):
			
 
				         crops = self.inputs['crops']
			
 
				-        features = self.reid(crops)
			
 
				-        return features
			
 
				+        outs = {}
			
 
				+        outs['embeddings'] = self.reid(crops)
			
 
				+        return outs
			
 
				 
			
 
				     def get_pred(self):
			
 
				         return self._forward()
			
--- a/paddlers/models/ppdet/modeling/architectures/fairmot.py
+++ b/paddlers/models/ppdet/modeling/architectures/fairmot.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -87,8 +87,8 @@ class FasterRCNN(BaseArch):
 
				                                                     im_shape, scale_factor)
			
 
				 
			
 
				             # rescale the prediction back to origin image
			
 
				-            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
			
 
				-                                                        im_shape, scale_factor)
			
 
				+            bboxes, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
			
 
				+                bbox, bbox_num, im_shape, scale_factor)
			
 
				             return bbox_pred, bbox_num
			
 
				 
			
 
				     def get_loss(self, ):
			
--- a/paddlers/models/ppdet/modeling/architectures/fcos.py
+++ b/paddlers/models/ppdet/modeling/architectures/fcos.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/modeling/architectures/gfl.py
+++ b/paddlers/models/ppdet/modeling/architectures/gfl.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/modeling/architectures/jde.py
+++ b/paddlers/models/ppdet/modeling/architectures/jde.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+# 
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -153,7 +153,7 @@ class HrHRNetPostProcess(object):
 
				         heat_thresh (float): value of topk below this threshhold will be ignored
			
 
				         tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
			
 
				 
			
 
				-        inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
			
 
				+        inputs(list[heatmap]): the output list of model, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
			
 
				         original_height, original_width (float): the original image size
			
 
				     '''
			
 
				 
			
--- a/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				+# you may not use this file except in compliance with the License. 
			
 
				+# You may obtain a copy of the License at 
			
 
				 #
			
 
				 #     http://www.apache.org/licenses/LICENSE-2.0
			
 
				 #
			
 
				 # Unless required by applicable law or agreed to in writing, software
			
 
				 # distributed under the License is distributed on an "AS IS" BASIS,
			
 
				 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# See the License for the specific language governing permissions and 
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -112,11 +112,11 @@ class MaskRCNN(BaseArch):
 
				                 body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
			
 
				 
			
 
				             # rescale the prediction back to origin image
			
 
				-            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
			
 
				-                                                        im_shape, scale_factor)
			
 
				+            bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
			
 
				+                bbox, bbox_num, im_shape, scale_factor)
			
 
				             origin_shape = self.bbox_post_process.get_origin_shape()
			
 
				-            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
			
 
				-                                               bbox_num, origin_shape)
			
 
				+            mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
			
 
				+                                               origin_shape)
			
 
				             return bbox_pred, bbox_num, mask_pred
			
 
				 
			
 
				     def get_loss(self, ):
			
--- a/paddlers/models/ppdet/modeling/architectures/meta_arch.py
+++ b/paddlers/models/ppdet/modeling/architectures/meta_arch.py
@@ -22,22 +22,23 @@ class BaseArch(nn.Layer):
 
				         self.fuse_norm = False
			
 
				 
			
 
				     def load_meanstd(self, cfg_transform):
			
 
				-        self.scale = 1.
			
 
				-        self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape(
			
 
				-            (1, 3, 1, 1))
			
 
				-        self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1))
			
 
				+        scale = 1.
			
 
				+        mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
			
 
				+        std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
			
 
				         for item in cfg_transform:
			
 
				             if 'NormalizeImage' in item:
			
 
				-                self.mean = paddle.to_tensor(item['NormalizeImage'][
			
 
				-                    'mean']).reshape((1, 3, 1, 1))
			
 
				-                self.std = paddle.to_tensor(item['NormalizeImage'][
			
 
				-                    'std']).reshape((1, 3, 1, 1))
			
 
				+                mean = np.array(
			
 
				+                    item['NormalizeImage']['mean'], dtype=np.float32)
			
 
				+                std = np.array(item['NormalizeImage']['std'], dtype=np.float32)
			
 
				                 if item['NormalizeImage'].get('is_scale', True):
			
 
				-                    self.scale = 1. / 255.
			
 
				+                    scale = 1. / 255.
			
 
				                 break
			
 
				         if self.data_format == 'NHWC':
			
 
				-            self.mean = self.mean.reshape(1, 1, 1, 3)
			
 
				-            self.std = self.std.reshape(1, 1, 1, 3)
			
 
				+            self.scale = paddle.to_tensor(scale / std).reshape((1, 1, 1, 3))
			
 
				+            self.bias = paddle.to_tensor(-mean / std).reshape((1, 1, 1, 3))
			
 
				+        else:
			
 
				+            self.scale = paddle.to_tensor(scale / std).reshape((1, 3, 1, 1))
			
 
				+            self.bias = paddle.to_tensor(-mean / std).reshape((1, 3, 1, 1))
			
 
				 
			
 
				     def forward(self, inputs):
			
 
				         if self.data_format == 'NHWC':
			
@@ -46,7 +47,7 @@ class BaseArch(nn.Layer):
 
				 
			
 
				         if self.fuse_norm:
			
 
				             image = inputs['image']
			
 
				-            self.inputs['image'] = (image * self.scale - self.mean) / self.std
			
 
				+            self.inputs['image'] = image * self.scale + self.bias
			
 
				             self.inputs['im_shape'] = inputs['im_shape']
			
 
				             self.inputs['scale_factor'] = inputs['scale_factor']
			
 
				         else:
			
@@ -63,10 +64,14 @@ class BaseArch(nn.Layer):
 
				                 inputs_list.append(inputs)
			
 
				             else:
			
 
				                 inputs_list.extend(inputs)
			
 
				-
			
 
				             outs = []
			
 
				             for inp in inputs_list:
			
 
				-                self.inputs = inp
			
 
				+                if self.fuse_norm:
			
 
				+                    self.inputs['image'] = inp['image'] * self.scale + self.bias
			
 
				+                    self.inputs['im_shape'] = inp['im_shape']
			
 
				+                    self.inputs['scale_factor'] = inp['scale_factor']
			
 
				+                else:
			
 
				+                    self.inputs = inp
			
 
				                 outs.append(self.get_pred())
			
 
				 
			
 
				             # multi-scale test
			
@@ -124,16 +129,3 @@ class BaseArch(nn.Layer):
 
				 
			
 
				     def get_pred(self, ):
			
 
				         raise NotImplementedError("Should implement get_pred method!")
			
 
				-
			
 
				-    @classmethod
			
 
				-    def convert_sync_batchnorm(cls, layer):
			
 
				-        layer_output = layer
			
 
				-        if getattr(layer, 'norm_type', None) == 'sync_bn':
			
 
				-            layer_output = nn.SyncBatchNorm.convert_sync_batchnorm(layer)
			
 
				-        else:
			
 
				-            for name, sublayer in layer.named_children():
			
 
				-                layer_output.add_sublayer(name,
			
 
				-                                          cls.convert_sync_batchnorm(sublayer))
			
 
				-
			
 
				-        del layer
			
 
				-        return layer_output
			
--- a/paddlers/models/ppdet/modeling/architectures/picodet.py
+++ b/paddlers/models/ppdet/modeling/architectures/picodet.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -41,7 +41,8 @@ class PicoDet(BaseArch):
 
				         self.backbone = backbone
			
 
				         self.neck = neck
			
 
				         self.head = head
			
 
				-        self.deploy = False
			
 
				+        self.export_post_process = True
			
 
				+        self.export_nms = True
			
 
				 
			
 
				     @classmethod
			
 
				     def from_config(cls, cfg, *args, **kwargs):
			
@@ -62,14 +63,13 @@ class PicoDet(BaseArch):
 
				     def _forward(self):
			
 
				         body_feats = self.backbone(self.inputs)
			
 
				         fpn_feats = self.neck(body_feats)
			
 
				-        head_outs = self.head(fpn_feats, self.deploy)
			
 
				-        if self.training or self.deploy:
			
 
				+        head_outs = self.head(fpn_feats, self.export_post_process)
			
 
				+        if self.training or not self.export_post_process:
			
 
				             return head_outs, None
			
 
				         else:
			
 
				-            im_shape = self.inputs['im_shape']
			
 
				             scale_factor = self.inputs['scale_factor']
			
 
				-            bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
			
 
				-                                                      scale_factor)
			
 
				+            bboxes, bbox_num = self.head.post_process(
			
 
				+                head_outs, scale_factor, export_nms=self.export_nms)
			
 
				             return bboxes, bbox_num
			
 
				 
			
 
				     def get_loss(self, ):
			
@@ -83,9 +83,13 @@ class PicoDet(BaseArch):
 
				         return loss
			
 
				 
			
 
				     def get_pred(self):
			
 
				-        if self.deploy:
			
 
				+        if not self.export_post_process:
			
 
				             return {'picodet': self._forward()[0]}
			
 
				-        else:
			
 
				+        elif self.export_nms:
			
 
				             bbox_pred, bbox_num = self._forward()
			
 
				             output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
			
 
				             return output
			
 
				+        else:
			
 
				+            bboxes, mlvl_scores = self._forward()
			
 
				+            output = {'bbox': bboxes, 'scores': mlvl_scores}
			
 
				+            return output
			
--- a/paddlers/models/ppdet/modeling/architectures/retinanet.py
+++ b/paddlers/models/ppdet/modeling/architectures/retinanet.py
@@ -0,0 +1,68 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+import paddle
			
 
				+
			
 
				+__all__ = ['RetinaNet']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class RetinaNet(BaseArch):
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self, backbone, neck, head):
			
 
				+        super(RetinaNet, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.head = head
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        head = create(cfg['head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            'head': head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        neck_feats = self.neck(body_feats)
			
 
				+
			
 
				+        if self.training:
			
 
				+            return self.head(neck_feats, self.inputs)
			
 
				+        else:
			
 
				+            head_outs = self.head(neck_feats)
			
 
				+            bbox, bbox_num = self.head.post_process(
			
 
				+                head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
			
 
				+            return {'bbox': bbox, 'bbox_num': bbox_num}
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        return self._forward()
			
--- a/paddlers/models/ppdet/modeling/architectures/s2anet.py
+++ b/paddlers/models/ppdet/modeling/architectures/s2anet.py
@@ -1,15 +1,15 @@
 
				 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -26,26 +26,21 @@ __all__ = ['S2ANet']
 
				 @register
			
 
				 class S2ANet(BaseArch):
			
 
				     __category__ = 'architecture'
			
 
				-    __inject__ = [
			
 
				-        's2anet_head',
			
 
				-        's2anet_bbox_post_process',
			
 
				-    ]
			
 
				+    __inject__ = ['head']
			
 
				 
			
 
				-    def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process):
			
 
				+    def __init__(self, backbone, neck, head):
			
 
				         """
			
 
				         S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
			
 
				 
			
 
				         Args:
			
 
				             backbone (object): backbone instance
			
 
				             neck (object): `FPN` instance
			
 
				-            s2anet_head (object): `S2ANetHead` instance
			
 
				-            s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
			
 
				+            head (object): `Head` instance
			
 
				         """
			
 
				         super(S2ANet, self).__init__()
			
 
				         self.backbone = backbone
			
 
				         self.neck = neck
			
 
				-        self.s2anet_head = s2anet_head
			
 
				-        self.s2anet_bbox_post_process = s2anet_bbox_post_process
			
 
				+        self.s2anet_head = head
			
 
				 
			
 
				     @classmethod
			
 
				     def from_config(cls, cfg, *args, **kwargs):
			
@@ -55,42 +50,28 @@ class S2ANet(BaseArch):
 
				 
			
 
				         out_shape = neck and neck.out_shape or backbone.out_shape
			
 
				         kwargs = {'input_shape': out_shape}
			
 
				-        s2anet_head = create(cfg['s2anet_head'], **kwargs)
			
 
				-        s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
			
 
				-                                          **kwargs)
			
 
				+        head = create(cfg['head'], **kwargs)
			
 
				 
			
 
				-        return {
			
 
				-            'backbone': backbone,
			
 
				-            'neck': neck,
			
 
				-            "s2anet_head": s2anet_head,
			
 
				-            "s2anet_bbox_post_process": s2anet_bbox_post_process,
			
 
				-        }
			
 
				+        return {'backbone': backbone, 'neck': neck, "head": head}
			
 
				 
			
 
				     def _forward(self):
			
 
				         body_feats = self.backbone(self.inputs)
			
 
				         if self.neck is not None:
			
 
				             body_feats = self.neck(body_feats)
			
 
				-        self.s2anet_head(body_feats)
			
 
				         if self.training:
			
 
				-            loss = self.s2anet_head.get_loss(self.inputs)
			
 
				-            total_loss = paddle.add_n(list(loss.values()))
			
 
				-            loss.update({'loss': total_loss})
			
 
				+            loss = self.s2anet_head(body_feats, self.inputs)
			
 
				             return loss
			
 
				         else:
			
 
				-            im_shape = self.inputs['im_shape']
			
 
				-            scale_factor = self.inputs['scale_factor']
			
 
				-            nms_pre = self.s2anet_bbox_post_process.nms_pre
			
 
				-            pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
			
 
				-
			
 
				+            head_outs = self.s2anet_head(body_feats)
			
 
				             # post_process
			
 
				-            pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores,
			
 
				-                                                                  pred_bboxes)
			
 
				+            bboxes, bbox_num = self.s2anet_head.get_bboxes(head_outs)
			
 
				             # rescale the prediction back to origin image
			
 
				-            pred_bboxes = self.s2anet_bbox_post_process.get_pred(
			
 
				-                pred_bboxes, bbox_num, im_shape, scale_factor)
			
 
				-
			
 
				+            im_shape = self.inputs['im_shape']
			
 
				+            scale_factor = self.inputs['scale_factor']
			
 
				+            bboxes = self.s2anet_head.get_pred(bboxes, bbox_num, im_shape,
			
 
				+                                               scale_factor)
			
 
				             # output
			
 
				-            output = {'bbox': pred_bboxes, 'bbox_num': bbox_num}
			
 
				+            output = {'bbox': bboxes, 'bbox_num': bbox_num}
			
 
				             return output
			
 
				 
			
 
				     def get_loss(self, ):
			
--- a/paddlers/models/ppdet/modeling/architectures/ttfnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/ttfnet.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
--- a/paddlers/models/ppdet/modeling/architectures/yolo.py
+++ b/paddlers/models/ppdet/modeling/architectures/yolo.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from __future__ import absolute_import
			
@@ -109,10 +109,13 @@ class YOLOv3(BaseArch):
 
				                 if self.return_idx:
			
 
				                     _, bbox, bbox_num, _ = self.post_process(
			
 
				                         yolo_head_outs, self.yolo_head.mask_anchors)
			
 
				-                else:
			
 
				+                elif self.post_process is not None:
			
 
				                     bbox, bbox_num = self.post_process(
			
 
				                         yolo_head_outs, self.yolo_head.mask_anchors,
			
 
				                         self.inputs['im_shape'], self.inputs['scale_factor'])
			
 
				+                else:
			
 
				+                    bbox, bbox_num = self.yolo_head.post_process(
			
 
				+                        yolo_head_outs, self.inputs['scale_factor'])
			
 
				                 output = {'bbox': bbox, 'bbox_num': bbox_num}
			
 
				 
			
 
				             return output
			
--- a/paddlers/models/ppdet/modeling/architectures/yolox.py
+++ b/paddlers/models/ppdet/modeling/architectures/yolox.py
@@ -0,0 +1,138 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+import random
			
 
				+import paddle
			
 
				+import paddle.nn.functional as F
			
 
				+import paddle.distributed as dist
			
 
				+
			
 
				+__all__ = ['YOLOX']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class YOLOX(BaseArch):
			
 
				+    """
			
 
				+    YOLOX network, see https://arxiv.org/abs/2107.08430
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (nn.Layer): backbone instance
			
 
				+        neck (nn.Layer): neck instance
			
 
				+        head (nn.Layer): head instance
			
 
				+        for_mot (bool): whether used for MOT or not
			
 
				+        input_size (list[int]): initial scale, will be reset by self._preprocess()
			
 
				+        size_stride (int): stride of the size range
			
 
				+        size_range (list[int]): multi-scale range for training
			
 
				+        random_interval (int): interval of iter to change self._input_size
			
 
				+    """
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone='CSPDarkNet',
			
 
				+                 neck='YOLOCSPPAN',
			
 
				+                 head='YOLOXHead',
			
 
				+                 for_mot=False,
			
 
				+                 input_size=[640, 640],
			
 
				+                 size_stride=32,
			
 
				+                 size_range=[15, 25],
			
 
				+                 random_interval=10):
			
 
				+        super(YOLOX, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.head = head
			
 
				+        self.for_mot = for_mot
			
 
				+
			
 
				+        self.input_size = input_size
			
 
				+        self._input_size = paddle.to_tensor(input_size)
			
 
				+        self.size_stride = size_stride
			
 
				+        self.size_range = size_range
			
 
				+        self.random_interval = random_interval
			
 
				+        self._step = 0
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        # backbone
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        # fpn
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        # head
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        head = create(cfg['head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "head": head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        if self.training:
			
 
				+            self._preprocess()
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        neck_feats = self.neck(body_feats, self.for_mot)
			
 
				+
			
 
				+        if self.training:
			
 
				+            yolox_losses = self.head(neck_feats, self.inputs)
			
 
				+            yolox_losses.update({'size': self._input_size[0]})
			
 
				+            return yolox_losses
			
 
				+        else:
			
 
				+            head_outs = self.head(neck_feats)
			
 
				+            bbox, bbox_num = self.head.post_process(
			
 
				+                head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
			
 
				+            return {'bbox': bbox, 'bbox_num': bbox_num}
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def _preprocess(self):
			
 
				+        # YOLOX multi-scale training, interpolate resize before inputs of the network.
			
 
				+        self._get_size()
			
 
				+        scale_y = self._input_size[0] / self.input_size[0]
			
 
				+        scale_x = self._input_size[1] / self.input_size[1]
			
 
				+        if scale_x != 1 or scale_y != 1:
			
 
				+            self.inputs['image'] = F.interpolate(
			
 
				+                self.inputs['image'],
			
 
				+                size=self._input_size,
			
 
				+                mode='bilinear',
			
 
				+                align_corners=False)
			
 
				+            gt_bboxes = self.inputs['gt_bbox']
			
 
				+            for i in range(len(gt_bboxes)):
			
 
				+                if len(gt_bboxes[i]) > 0:
			
 
				+                    gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x
			
 
				+                    gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y
			
 
				+            self.inputs['gt_bbox'] = gt_bboxes
			
 
				+
			
 
				+    def _get_size(self):
			
 
				+        # random_interval = 10 as default, every 10 iters to change self._input_size
			
 
				+        image_ratio = self.input_size[1] * 1.0 / self.input_size[0]
			
 
				+        if self._step % self.random_interval == 0:
			
 
				+            size_factor = random.randint(*self.size_range)
			
 
				+            size = [
			
 
				+                self.size_stride * size_factor,
			
 
				+                self.size_stride * int(size_factor * image_ratio)
			
 
				+            ]
			
 
				+            self._input_size = paddle.to_tensor(size)
			
 
				+        self._step += 1
			
--- a/paddlers/models/ppdet/modeling/assigners/__init__.py
+++ b/paddlers/models/ppdet/modeling/assigners/__init__.py
@@ -16,8 +16,10 @@ from . import utils
 
				 from . import task_aligned_assigner
			
 
				 from . import atss_assigner
			
 
				 from . import simota_assigner
			
 
				+from . import max_iou_assigner
			
 
				 
			
 
				 from .utils import *
			
 
				 from .task_aligned_assigner import *
			
 
				 from .atss_assigner import *
			
 
				 from .simota_assigner import *
			
 
				+from .max_iou_assigner import *
			
--- a/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
@@ -22,11 +22,13 @@ import paddle.nn as nn
 
				 import paddle.nn.functional as F
			
 
				 
			
 
				 from paddlers.models.ppdet.core.workspace import register
			
 
				-from ..ops import iou_similarity
			
 
				+from ..bbox_utils import iou_similarity, batch_iou_similarity
			
 
				 from ..bbox_utils import bbox_center
			
 
				-from .utils import (pad_gt, check_points_inside_bboxes, compute_max_iou_anchor,
			
 
				+from .utils import (check_points_inside_bboxes, compute_max_iou_anchor,
			
 
				                     compute_max_iou_gt)
			
 
				 
			
 
				+__all__ = ['ATSSAssigner']
			
 
				+
			
 
				 
			
 
				 @register
			
 
				 class ATSSAssigner(nn.Layer):
			
@@ -48,7 +50,6 @@ class ATSSAssigner(nn.Layer):
 
				 
			
 
				     def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
			
 
				                              pad_gt_mask):
			
 
				-        pad_gt_mask = pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool)
			
 
				         gt2anchor_distances_list = paddle.split(
			
 
				             gt2anchor_distances, num_anchors_list, axis=-1)
			
 
				         num_anchors_index = np.cumsum(num_anchors_list).tolist()
			
@@ -58,15 +59,12 @@ class ATSSAssigner(nn.Layer):
 
				         for distances, anchors_index in zip(gt2anchor_distances_list,
			
 
				                                             num_anchors_index):
			
 
				             num_anchors = distances.shape[-1]
			
 
				-            topk_metrics, topk_idxs = paddle.topk(
			
 
				+            _, topk_idxs = paddle.topk(
			
 
				                 distances, self.topk, axis=-1, largest=False)
			
 
				             topk_idxs_list.append(topk_idxs + anchors_index)
			
 
				-            topk_idxs = paddle.where(pad_gt_mask, topk_idxs,
			
 
				-                                     paddle.zeros_like(topk_idxs))
			
 
				-            is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
			
 
				-            is_in_topk = paddle.where(is_in_topk > 1,
			
 
				-                                      paddle.zeros_like(is_in_topk), is_in_topk)
			
 
				-            is_in_topk_list.append(is_in_topk.astype(gt2anchor_distances.dtype))
			
 
				+            is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
			
 
				+                axis=-2).astype(gt2anchor_distances.dtype)
			
 
				+            is_in_topk_list.append(is_in_topk * pad_gt_mask)
			
 
				         is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
			
 
				         topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
			
 
				         return is_in_topk_list, topk_idxs_list
			
@@ -77,8 +75,10 @@ class ATSSAssigner(nn.Layer):
 
				                 num_anchors_list,
			
 
				                 gt_labels,
			
 
				                 gt_bboxes,
			
 
				+                pad_gt_mask,
			
 
				                 bg_index,
			
 
				-                gt_scores=None):
			
 
				+                gt_scores=None,
			
 
				+                pred_bboxes=None):
			
 
				         r"""This code is based on
			
 
				             https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
			
 
				 
			
@@ -99,18 +99,18 @@ class ATSSAssigner(nn.Layer):
 
				             anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
			
 
				                     "xmin, xmax, ymin, ymax" format
			
 
				             num_anchors_list (List): num of anchors in each level
			
 
				-            gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
			
 
				-            gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
			
 
				+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
			
 
				+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
			
 
				+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
			
 
				             bg_index (int): background index
			
 
				-            gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
			
 
				+            gt_scores (Tensor|None, float32) Score of gt_bboxes,
			
 
				                     shape(B, n, 1), if None, then it will initialize with one_hot label
			
 
				+            pred_bboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 4)
			
 
				         Returns:
			
 
				             assigned_labels (Tensor): (B, L)
			
 
				             assigned_bboxes (Tensor): (B, L, 4)
			
 
				-            assigned_scores (Tensor): (B, L, C)
			
 
				+            assigned_scores (Tensor): (B, L, C), if pred_bboxes is not None, then output ious
			
 
				         """
			
 
				-        gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
			
 
				-            gt_labels, gt_bboxes, gt_scores)
			
 
				         assert gt_labels.ndim == gt_bboxes.ndim and \
			
 
				                gt_bboxes.ndim == 3
			
 
				 
			
@@ -119,7 +119,8 @@ class ATSSAssigner(nn.Layer):
 
				 
			
 
				         # negative batch
			
 
				         if num_max_boxes == 0:
			
 
				-            assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
			
 
				+            assigned_labels = paddle.full(
			
 
				+                [batch_size, num_anchors], bg_index, dtype='int32')
			
 
				             assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
			
 
				             assigned_scores = paddle.zeros(
			
 
				                 [batch_size, num_anchors, self.num_classes])
			
@@ -149,9 +150,8 @@ class ATSSAssigner(nn.Layer):
 
				         iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
			
 
				         iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
			
 
				                         iou_threshold.std(axis=-1, keepdim=True)
			
 
				-        is_in_topk = paddle.where(
			
 
				-            iou_candidates > iou_threshold.tile([1, 1, num_anchors]),
			
 
				-            is_in_topk, paddle.zeros_like(is_in_topk))
			
 
				+        is_in_topk = paddle.where(iou_candidates > iou_threshold, is_in_topk,
			
 
				+                                  paddle.zeros_like(is_in_topk))
			
 
				 
			
 
				         # 6. check the positive sample's center in gt, [B, n, L]
			
 
				         is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
			
@@ -178,9 +178,6 @@ class ATSSAssigner(nn.Layer):
 
				                                          mask_positive)
			
 
				             mask_positive_sum = mask_positive.sum(axis=-2)
			
 
				         assigned_gt_index = mask_positive.argmax(axis=-2)
			
 
				-        assert mask_positive_sum.max() == 1, \
			
 
				-            ("one anchor just assign one gt, but received not equals 1. "
			
 
				-             "Received: %f" % mask_positive_sum.max().item())
			
 
				 
			
 
				         # assigned target
			
 
				         batch_ind = paddle.arange(
			
@@ -197,10 +194,19 @@ class ATSSAssigner(nn.Layer):
 
				             gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
			
 
				         assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
			
 
				 
			
 
				-        assigned_scores = F.one_hot(assigned_labels, self.num_classes)
			
 
				-        if gt_scores is not None:
			
 
				+        assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
			
 
				+        ind = list(range(self.num_classes + 1))
			
 
				+        ind.remove(bg_index)
			
 
				+        assigned_scores = paddle.index_select(
			
 
				+            assigned_scores, paddle.to_tensor(ind), axis=-1)
			
 
				+        if pred_bboxes is not None:
			
 
				+            # assigned iou
			
 
				+            ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
			
 
				+            ious = ious.max(axis=-2).unsqueeze(-1)
			
 
				+            assigned_scores *= ious
			
 
				+        elif gt_scores is not None:
			
 
				             gather_scores = paddle.gather(
			
 
				-                pad_gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
			
 
				+                gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
			
 
				             gather_scores = gather_scores.reshape([batch_size, num_anchors])
			
 
				             gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
			
 
				                                          paddle.zeros_like(gather_scores))
			
--- a/paddlers/models/ppdet/modeling/assigners/max_iou_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/max_iou_assigner.py
@@ -0,0 +1,54 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+from paddlers.models.ppdet.modeling.proposal_generator.target import label_box
			
 
				+
			
 
				+__all__ = ['MaxIoUAssigner']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class MaxIoUAssigner(object):
			
 
				+    """a standard bbox assigner based on max IoU, use ppdet's label_box 
			
 
				+    as backend.
			
 
				+    Args:
			
 
				+        positive_overlap (float): threshold for defining positive samples 
			
 
				+        negative_overlap (float): threshold for denining negative samples
			
 
				+        allow_low_quality (bool): whether to lower IoU thr if a GT poorly
			
 
				+            overlaps with candidate bboxes
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 positive_overlap,
			
 
				+                 negative_overlap,
			
 
				+                 allow_low_quality=True):
			
 
				+        self.positive_overlap = positive_overlap
			
 
				+        self.negative_overlap = negative_overlap
			
 
				+        self.allow_low_quality = allow_low_quality
			
 
				+
			
 
				+    def __call__(self, bboxes, gt_bboxes):
			
 
				+        matches, match_labels = label_box(
			
 
				+            bboxes,
			
 
				+            gt_bboxes,
			
 
				+            positive_overlap=self.positive_overlap,
			
 
				+            negative_overlap=self.negative_overlap,
			
 
				+            allow_low_quality=self.allow_low_quality,
			
 
				+            ignore_thresh=-1,
			
 
				+            is_crowd=None,
			
 
				+            assign_on_cpu=False)
			
 
				+        return matches, match_labels
			
--- a/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
@@ -115,7 +115,10 @@ class SimOTAAssigner(object):
 
				     def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
			
 
				         match_matrix = np.zeros_like(cost_matrix.numpy())
			
 
				         # select candidate topk ious for dynamic-k calculation
			
 
				-        topk_ious, _ = paddle.topk(pairwise_ious, self.candidate_topk, axis=0)
			
 
				+        topk_ious, _ = paddle.topk(
			
 
				+            pairwise_ious,
			
 
				+            min(self.candidate_topk, pairwise_ious.shape[0]),
			
 
				+            axis=0)
			
 
				         # calculate dynamic k for each gt
			
 
				         dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
			
 
				         for gt_idx in range(num_gt):
			
--- a/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
@@ -21,10 +21,12 @@ import paddle.nn as nn
 
				 import paddle.nn.functional as F
			
 
				 
			
 
				 from paddlers.models.ppdet.core.workspace import register
			
 
				-from ..bbox_utils import iou_similarity
			
 
				-from .utils import (pad_gt, gather_topk_anchors, check_points_inside_bboxes,
			
 
				+from ..bbox_utils import batch_iou_similarity
			
 
				+from .utils import (gather_topk_anchors, check_points_inside_bboxes,
			
 
				                     compute_max_iou_anchor)
			
 
				 
			
 
				+__all__ = ['TaskAlignedAssigner']
			
 
				+
			
 
				 
			
 
				 @register
			
 
				 class TaskAlignedAssigner(nn.Layer):
			
@@ -43,8 +45,10 @@ class TaskAlignedAssigner(nn.Layer):
 
				                 pred_scores,
			
 
				                 pred_bboxes,
			
 
				                 anchor_points,
			
 
				+                num_anchors_list,
			
 
				                 gt_labels,
			
 
				                 gt_bboxes,
			
 
				+                pad_gt_mask,
			
 
				                 bg_index,
			
 
				                 gt_scores=None):
			
 
				         r"""This code is based on
			
@@ -61,20 +65,18 @@ class TaskAlignedAssigner(nn.Layer):
 
				             pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
			
 
				             pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
			
 
				             anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
			
 
				-            gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
			
 
				-            gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
			
 
				+            num_anchors_list (List): num of anchors in each level, shape(L)
			
 
				+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
			
 
				+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
			
 
				+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
			
 
				             bg_index (int): background index
			
 
				-            gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
			
 
				-                    shape(B, n, 1), if None, then it will initialize with one_hot label
			
 
				+            gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
			
 
				         Returns:
			
 
				             assigned_labels (Tensor): (B, L)
			
 
				             assigned_bboxes (Tensor): (B, L, 4)
			
 
				             assigned_scores (Tensor): (B, L, C)
			
 
				         """
			
 
				         assert pred_scores.ndim == pred_bboxes.ndim
			
 
				-
			
 
				-        gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
			
 
				-            gt_labels, gt_bboxes, gt_scores)
			
 
				         assert gt_labels.ndim == gt_bboxes.ndim and \
			
 
				                gt_bboxes.ndim == 3
			
 
				 
			
@@ -83,14 +85,15 @@ class TaskAlignedAssigner(nn.Layer):
 
				 
			
 
				         # negative batch
			
 
				         if num_max_boxes == 0:
			
 
				-            assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
			
 
				+            assigned_labels = paddle.full(
			
 
				+                [batch_size, num_anchors], bg_index, dtype='int32')
			
 
				             assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
			
 
				             assigned_scores = paddle.zeros(
			
 
				                 [batch_size, num_anchors, num_classes])
			
 
				             return assigned_labels, assigned_bboxes, assigned_scores
			
 
				 
			
 
				         # compute iou between gt and pred bbox, [B, n, L]
			
 
				-        ious = iou_similarity(gt_bboxes, pred_bboxes)
			
 
				+        ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
			
 
				         # gather pred bboxes class score
			
 
				         pred_scores = pred_scores.transpose([0, 2, 1])
			
 
				         batch_ind = paddle.arange(
			
@@ -109,9 +112,7 @@ class TaskAlignedAssigner(nn.Layer):
 
				         # select topk largest alignment metrics pred bbox as candidates
			
 
				         # for each gt, [B, n, L]
			
 
				         is_in_topk = gather_topk_anchors(
			
 
				-            alignment_metrics * is_in_gts,
			
 
				-            self.topk,
			
 
				-            topk_mask=pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool))
			
 
				+            alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
			
 
				 
			
 
				         # select positive sample, [B, n, L]
			
 
				         mask_positive = is_in_topk * is_in_gts * pad_gt_mask
			
@@ -127,9 +128,6 @@ class TaskAlignedAssigner(nn.Layer):
 
				                                          mask_positive)
			
 
				             mask_positive_sum = mask_positive.sum(axis=-2)
			
 
				         assigned_gt_index = mask_positive.argmax(axis=-2)
			
 
				-        assert mask_positive_sum.max() == 1, \
			
 
				-            ("one anchor just assign one gt, but received not equals 1. "
			
 
				-             "Received: %f" % mask_positive_sum.max().item())
			
 
				 
			
 
				         # assigned target
			
 
				         assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
			
@@ -144,7 +142,11 @@ class TaskAlignedAssigner(nn.Layer):
 
				             gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
			
 
				         assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
			
 
				 
			
 
				-        assigned_scores = F.one_hot(assigned_labels, num_classes)
			
 
				+        assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
			
 
				+        ind = list(range(num_classes + 1))
			
 
				+        ind.remove(bg_index)
			
 
				+        assigned_scores = paddle.index_select(
			
 
				+            assigned_scores, paddle.to_tensor(ind), axis=-1)
			
 
				         # rescale alignment metrics
			
 
				         alignment_metrics *= mask_positive
			
 
				         max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
			
--- a/paddlers/models/ppdet/modeling/assigners/utils.py
+++ b/paddlers/models/ppdet/modeling/assigners/utils.py
@@ -88,7 +88,7 @@ def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
 
				         largest (bool) : largest is a flag, if set to true,
			
 
				             algorithm will sort by descending order, otherwise sort by
			
 
				             ascending order. Default: True
			
 
				-        topk_mask (Tensor, bool|None): shape[B, n, topk], ignore bbox mask,
			
 
				+        topk_mask (Tensor, float32): shape[B, n, 1], ignore bbox mask,
			
 
				             Default: None
			
 
				         eps (float): Default: 1e-9
			
 
				     Returns:
			
@@ -98,20 +98,22 @@ def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
 
				     topk_metrics, topk_idxs = paddle.topk(
			
 
				         metrics, topk, axis=-1, largest=largest)
			
 
				     if topk_mask is None:
			
 
				-        topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > eps).tile(
			
 
				-            [1, 1, topk])
			
 
				-    topk_idxs = paddle.where(topk_mask, topk_idxs, paddle.zeros_like(topk_idxs))
			
 
				-    is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
			
 
				-    is_in_topk = paddle.where(is_in_topk > 1,
			
 
				-                              paddle.zeros_like(is_in_topk), is_in_topk)
			
 
				-    return is_in_topk.astype(metrics.dtype)
			
 
				+        topk_mask = (
			
 
				+            topk_metrics.max(axis=-1, keepdim=True) > eps).astype(metrics.dtype)
			
 
				+    is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
			
 
				+        axis=-2).astype(metrics.dtype)
			
 
				+    return is_in_topk * topk_mask
			
 
				 
			
 
				 
			
 
				-def check_points_inside_bboxes(points, bboxes, eps=1e-9):
			
 
				+def check_points_inside_bboxes(points,
			
 
				+                               bboxes,
			
 
				+                               center_radius_tensor=None,
			
 
				+                               eps=1e-9):
			
 
				     r"""
			
 
				     Args:
			
 
				         points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
			
 
				         bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
			
 
				+        center_radius_tensor (Tensor, float32): shape [L, 1]. Default: None.
			
 
				         eps (float): Default: 1e-9
			
 
				     Returns:
			
 
				         is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
			
@@ -119,12 +121,28 @@ def check_points_inside_bboxes(points, bboxes, eps=1e-9):
 
				     points = points.unsqueeze([0, 1])
			
 
				     x, y = points.chunk(2, axis=-1)
			
 
				     xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
			
 
				+    # check whether `points` is in `bboxes`
			
 
				     l = x - xmin
			
 
				     t = y - ymin
			
 
				     r = xmax - x
			
 
				     b = ymax - y
			
 
				-    bbox_ltrb = paddle.concat([l, t, r, b], axis=-1)
			
 
				-    return (bbox_ltrb.min(axis=-1) > eps).astype(bboxes.dtype)
			
 
				+    delta_ltrb = paddle.concat([l, t, r, b], axis=-1)
			
 
				+    is_in_bboxes = (delta_ltrb.min(axis=-1) > eps)
			
 
				+    if center_radius_tensor is not None:
			
 
				+        # check whether `points` is in `center_radius`
			
 
				+        center_radius_tensor = center_radius_tensor.unsqueeze([0, 1])
			
 
				+        cx = (xmin + xmax) * 0.5
			
 
				+        cy = (ymin + ymax) * 0.5
			
 
				+        l = x - (cx - center_radius_tensor)
			
 
				+        t = y - (cy - center_radius_tensor)
			
 
				+        r = (cx + center_radius_tensor) - x
			
 
				+        b = (cy + center_radius_tensor) - y
			
 
				+        delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1)
			
 
				+        is_in_center = (delta_ltrb_c.min(axis=-1) > eps)
			
 
				+        return (paddle.logical_and(is_in_bboxes, is_in_center),
			
 
				+                paddle.logical_or(is_in_bboxes, is_in_center))
			
 
				+
			
 
				+    return is_in_bboxes.astype(bboxes.dtype)
			
 
				 
			
 
				 
			
 
				 def compute_max_iou_anchor(ious):
			
@@ -158,7 +176,8 @@ def compute_max_iou_gt(ious):
 
				 def generate_anchors_for_grid_cell(feats,
			
 
				                                    fpn_strides,
			
 
				                                    grid_cell_size=5.0,
			
 
				-                                   grid_cell_offset=0.5):
			
 
				+                                   grid_cell_offset=0.5,
			
 
				+                                   dtype='float32'):
			
 
				     r"""
			
 
				     Like ATSS, generate anchors based on grid size.
			
 
				     Args:
			
@@ -167,14 +186,16 @@ def generate_anchors_for_grid_cell(feats,
 
				         grid_cell_size (float): anchor size
			
 
				         grid_cell_offset (float): The range is between 0 and 1.
			
 
				     Returns:
			
 
				-        anchors (List[Tensor]): shape[s, (l, 4)]
			
 
				-        num_anchors_list (List[int]): shape[s]
			
 
				-        stride_tensor_list (List[Tensor]): shape[s, (l, 1)]
			
 
				+        anchors (Tensor): shape[l, 4], "xmin, ymin, xmax, ymax" format.
			
 
				+        anchor_points (Tensor): shape[l, 2], "x, y" format.
			
 
				+        num_anchors_list (List[int]): shape[s], contains [s_1, s_2, ...].
			
 
				+        stride_tensor (Tensor): shape[l, 1], contains the stride for each scale.
			
 
				     """
			
 
				     assert len(feats) == len(fpn_strides)
			
 
				     anchors = []
			
 
				+    anchor_points = []
			
 
				     num_anchors_list = []
			
 
				-    stride_tensor_list = []
			
 
				+    stride_tensor = []
			
 
				     for feat, stride in zip(feats, fpn_strides):
			
 
				         _, _, h, w = feat.shape
			
 
				         cell_half_size = grid_cell_size * stride * 0.5
			
@@ -186,9 +207,19 @@ def generate_anchors_for_grid_cell(feats,
 
				                 shift_x - cell_half_size, shift_y - cell_half_size,
			
 
				                 shift_x + cell_half_size, shift_y + cell_half_size
			
 
				             ],
			
 
				-            axis=-1).astype(feat.dtype)
			
 
				+            axis=-1).astype(dtype)
			
 
				+        anchor_point = paddle.stack([shift_x, shift_y], axis=-1).astype(dtype)
			
 
				+
			
 
				         anchors.append(anchor.reshape([-1, 4]))
			
 
				+        anchor_points.append(anchor_point.reshape([-1, 2]))
			
 
				         num_anchors_list.append(len(anchors[-1]))
			
 
				-        stride_tensor_list.append(
			
 
				-            paddle.full([num_anchors_list[-1], 1], stride))
			
 
				-    return anchors, num_anchors_list, stride_tensor_list
			
 
				+        stride_tensor.append(
			
 
				+            paddle.full(
			
 
				+                [num_anchors_list[-1], 1], stride, dtype=dtype))
			
 
				+    anchors = paddle.concat(anchors)
			
 
				+    anchors.stop_gradient = True
			
 
				+    anchor_points = paddle.concat(anchor_points)
			
 
				+    anchor_points.stop_gradient = True
			
 
				+    stride_tensor = paddle.concat(stride_tensor)
			
 
				+    stride_tensor.stop_gradient = True
			
 
				+    return anchors, anchor_points, num_anchors_list, stride_tensor
			
--- a/paddlers/models/ppdet/modeling/backbones/__init__.py
+++ b/paddlers/models/ppdet/modeling/backbones/__init__.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+# 
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 from . import vgg
			
@@ -29,6 +29,11 @@ from . import swin_transformer
 
				 from . import lcnet
			
 
				 from . import hardnet
			
 
				 from . import esnet
			
 
				+from . import cspresnet
			
 
				+from . import csp_darknet
			
 
				+from . import convnext
			
 
				+from . import vision_transformer
			
 
				+from . import mobileone
			
 
				 
			
 
				 from .vgg import *
			
 
				 from .resnet import *
			
@@ -47,3 +52,9 @@ from .swin_transformer import *
 
				 from .lcnet import *
			
 
				 from .hardnet import *
			
 
				 from .esnet import *
			
 
				+from .cspresnet import *
			
 
				+from .csp_darknet import *
			
 
				+from .convnext import *
			
 
				+from .vision_transformer import *
			
 
				+from .vision_transformer import *
			
 
				+from .mobileone import *
			
--- a/paddlers/models/ppdet/modeling/backbones/blazenet.py
+++ b/paddlers/models/ppdet/modeling/backbones/blazenet.py
@@ -1,4 +1,4 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
--- a/paddlers/models/ppdet/modeling/backbones/convnext.py
+++ b/paddlers/models/ppdet/modeling/backbones/convnext.py
@@ -0,0 +1,245 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				+# limitations under the License.
			
 
				+'''
			
 
				+Modified from https://github.com/facebookresearch/ConvNeXt
			
 
				+Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				+All rights reserved.
			
 
				+This source code is licensed under the license found in the
			
 
				+LICENSE file in the root directory of this source tree.
			
 
				+'''
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn.initializer import Constant
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+from .transformer_utils import DropPath, trunc_normal_, zeros_
			
 
				+
			
 
				+__all__ = ['ConvNeXt']
			
 
				+
			
 
				+
			
 
				+class Block(nn.Layer):
			
 
				+    r""" ConvNeXt Block. There are two equivalent implementations:
			
 
				+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
			
 
				+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
			
 
				+    We use (2) as we find it slightly faster in Pypaddle
			
 
				+    
			
 
				+    Args:
			
 
				+        dim (int): Number of input channels.
			
 
				+        drop_path (float): Stochastic depth rate. Default: 0.0
			
 
				+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
			
 
				+        super().__init__()
			
 
				+        self.dwconv = nn.Conv2D(
			
 
				+            dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv
			
 
				+        self.norm = LayerNorm(dim, eps=1e-6)
			
 
				+        self.pwconv1 = nn.Linear(
			
 
				+            dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
			
 
				+        self.act = nn.GELU()
			
 
				+        self.pwconv2 = nn.Linear(4 * dim, dim)
			
 
				+
			
 
				+        if layer_scale_init_value > 0:
			
 
				+            self.gamma = self.create_parameter(
			
 
				+                shape=(dim, ),
			
 
				+                attr=ParamAttr(initializer=Constant(layer_scale_init_value)))
			
 
				+        else:
			
 
				+            self.gamma = None
			
 
				+
			
 
				+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity(
			
 
				+        )
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        input = x
			
 
				+        x = self.dwconv(x)
			
 
				+        x = x.transpose([0, 2, 3, 1])
			
 
				+        x = self.norm(x)
			
 
				+        x = self.pwconv1(x)
			
 
				+        x = self.act(x)
			
 
				+        x = self.pwconv2(x)
			
 
				+        if self.gamma is not None:
			
 
				+            x = self.gamma * x
			
 
				+        x = x.transpose([0, 3, 1, 2])
			
 
				+        x = input + self.drop_path(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class LayerNorm(nn.Layer):
			
 
				+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 
			
 
				+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 
			
 
				+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs 
			
 
				+    with shape (batch_size, channels, height, width).
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.weight = self.create_parameter(
			
 
				+            shape=(normalized_shape, ),
			
 
				+            attr=ParamAttr(initializer=Constant(1.)))
			
 
				+        self.bias = self.create_parameter(
			
 
				+            shape=(normalized_shape, ),
			
 
				+            attr=ParamAttr(initializer=Constant(0.)))
			
 
				+
			
 
				+        self.eps = eps
			
 
				+        self.data_format = data_format
			
 
				+        if self.data_format not in ["channels_last", "channels_first"]:
			
 
				+            raise NotImplementedError
			
 
				+        self.normalized_shape = (normalized_shape, )
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.data_format == "channels_last":
			
 
				+            return F.layer_norm(x, self.normalized_shape, self.weight,
			
 
				+                                self.bias, self.eps)
			
 
				+        elif self.data_format == "channels_first":
			
 
				+            u = x.mean(1, keepdim=True)
			
 
				+            s = (x - u).pow(2).mean(1, keepdim=True)
			
 
				+            x = (x - u) / paddle.sqrt(s + self.eps)
			
 
				+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
			
 
				+            return x
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class ConvNeXt(nn.Layer):
			
 
				+    r""" ConvNeXt
			
 
				+        A Pypaddle impl of : `A ConvNet for the 2020s`  -
			
 
				+          https://arxiv.org/pdf/2201.03545.pdf
			
 
				+
			
 
				+    Args:
			
 
				+        in_chans (int): Number of input image channels. Default: 3
			
 
				+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
			
 
				+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
			
 
				+        drop_path_rate (float): Stochastic depth rate. Default: 0.
			
 
				+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
			
 
				+    """
			
 
				+
			
 
				+    arch_settings = {
			
 
				+        'tiny': {
			
 
				+            'depths': [3, 3, 9, 3],
			
 
				+            'dims': [96, 192, 384, 768]
			
 
				+        },
			
 
				+        'small': {
			
 
				+            'depths': [3, 3, 27, 3],
			
 
				+            'dims': [96, 192, 384, 768]
			
 
				+        },
			
 
				+        'base': {
			
 
				+            'depths': [3, 3, 27, 3],
			
 
				+            'dims': [128, 256, 512, 1024]
			
 
				+        },
			
 
				+        'large': {
			
 
				+            'depths': [3, 3, 27, 3],
			
 
				+            'dims': [192, 384, 768, 1536]
			
 
				+        },
			
 
				+        'xlarge': {
			
 
				+            'depths': [3, 3, 27, 3],
			
 
				+            'dims': [256, 512, 1024, 2048]
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            arch='tiny',
			
 
				+            in_chans=3,
			
 
				+            drop_path_rate=0.,
			
 
				+            layer_scale_init_value=1e-6,
			
 
				+            return_idx=[1, 2, 3],
			
 
				+            norm_output=True,
			
 
				+            pretrained=None, ):
			
 
				+        super().__init__()
			
 
				+        depths = self.arch_settings[arch]['depths']
			
 
				+        dims = self.arch_settings[arch]['dims']
			
 
				+        self.downsample_layers = nn.LayerList(
			
 
				+        )  # stem and 3 intermediate downsampling conv layers
			
 
				+        stem = nn.Sequential(
			
 
				+            nn.Conv2D(
			
 
				+                in_chans, dims[0], kernel_size=4, stride=4),
			
 
				+            LayerNorm(
			
 
				+                dims[0], eps=1e-6, data_format="channels_first"))
			
 
				+        self.downsample_layers.append(stem)
			
 
				+        for i in range(3):
			
 
				+            downsample_layer = nn.Sequential(
			
 
				+                LayerNorm(
			
 
				+                    dims[i], eps=1e-6, data_format="channels_first"),
			
 
				+                nn.Conv2D(
			
 
				+                    dims[i], dims[i + 1], kernel_size=2, stride=2), )
			
 
				+            self.downsample_layers.append(downsample_layer)
			
 
				+
			
 
				+        self.stages = nn.LayerList(
			
 
				+        )  # 4 feature resolution stages, each consisting of multiple residual blocks
			
 
				+        dp_rates = [x for x in np.linspace(0, drop_path_rate, sum(depths))]
			
 
				+        cur = 0
			
 
				+        for i in range(4):
			
 
				+            stage = nn.Sequential(*[
			
 
				+                Block(
			
 
				+                    dim=dims[i],
			
 
				+                    drop_path=dp_rates[cur + j],
			
 
				+                    layer_scale_init_value=layer_scale_init_value)
			
 
				+                for j in range(depths[i])
			
 
				+            ])
			
 
				+            self.stages.append(stage)
			
 
				+            cur += depths[i]
			
 
				+
			
 
				+        self.return_idx = return_idx
			
 
				+        self.dims = [dims[i] for i in return_idx]  # [::-1]
			
 
				+
			
 
				+        self.norm_output = norm_output
			
 
				+        if norm_output:
			
 
				+            self.norms = nn.LayerList([
			
 
				+                LayerNorm(
			
 
				+                    c, eps=1e-6, data_format="channels_first")
			
 
				+                for c in self.dims
			
 
				+            ])
			
 
				+
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+        if pretrained is not None:
			
 
				+            if 'http' in pretrained:  #URL
			
 
				+                path = paddle.utils.download.get_weights_path_from_url(
			
 
				+                    pretrained)
			
 
				+            else:  #model in local path
			
 
				+                path = pretrained
			
 
				+            self.set_state_dict(paddle.load(path))
			
 
				+
			
 
				+    def _init_weights(self, m):
			
 
				+        if isinstance(m, (nn.Conv2D, nn.Linear)):
			
 
				+            trunc_normal_(m.weight)
			
 
				+            zeros_(m.bias)
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        output = []
			
 
				+        for i in range(4):
			
 
				+            x = self.downsample_layers[i](x)
			
 
				+            x = self.stages[i](x)
			
 
				+            output.append(x)
			
 
				+
			
 
				+        outputs = [output[i] for i in self.return_idx]
			
 
				+        if self.norm_output:
			
 
				+            outputs = [self.norms[i](out) for i, out in enumerate(outputs)]
			
 
				+
			
 
				+        return outputs
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.forward_features(x['image'])
			
 
				+        return x
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=c) for c in self.dims]
			
--- a/paddlers/models/ppdet/modeling/backbones/csp_darknet.py
+++ b/paddlers/models/ppdet/modeling/backbones/csp_darknet.py
@@ -0,0 +1,404 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+# 
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from paddlers.models.ppdet.modeling.initializer import conv_init_
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = [
			
 
				+    'CSPDarkNet', 'BaseConv', 'DWConv', 'BottleNeck', 'SPPLayer', 'SPPFLayer'
			
 
				+]
			
 
				+
			
 
				+
			
 
				+class BaseConv(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 ksize,
			
 
				+                 stride,
			
 
				+                 groups=1,
			
 
				+                 bias=False,
			
 
				+                 act="silu"):
			
 
				+        super(BaseConv, self).__init__()
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            kernel_size=ksize,
			
 
				+            stride=stride,
			
 
				+            padding=(ksize - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            bias_attr=bias)
			
 
				+        self.bn = nn.BatchNorm2D(
			
 
				+            out_channels,
			
 
				+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+
			
 
				+        self._init_weights()
			
 
				+
			
 
				+    def _init_weights(self):
			
 
				+        conv_init_(self.conv)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        # use 'x * F.sigmoid(x)' replace 'silu'
			
 
				+        x = self.bn(self.conv(x))
			
 
				+        y = x * F.sigmoid(x)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class DWConv(nn.Layer):
			
 
				+    """Depthwise Conv"""
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 ksize,
			
 
				+                 stride=1,
			
 
				+                 bias=False,
			
 
				+                 act="silu"):
			
 
				+        super(DWConv, self).__init__()
			
 
				+        self.dw_conv = BaseConv(
			
 
				+            in_channels,
			
 
				+            in_channels,
			
 
				+            ksize=ksize,
			
 
				+            stride=stride,
			
 
				+            groups=in_channels,
			
 
				+            bias=bias,
			
 
				+            act=act)
			
 
				+        self.pw_conv = BaseConv(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            ksize=1,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            bias=bias,
			
 
				+            act=act)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        return self.pw_conv(self.dw_conv(x))
			
 
				+
			
 
				+
			
 
				+class Focus(nn.Layer):
			
 
				+    """Focus width and height information into channel space, used in YOLOX."""
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 ksize=3,
			
 
				+                 stride=1,
			
 
				+                 bias=False,
			
 
				+                 act="silu"):
			
 
				+        super(Focus, self).__init__()
			
 
				+        self.conv = BaseConv(
			
 
				+            in_channels * 4,
			
 
				+            out_channels,
			
 
				+            ksize=ksize,
			
 
				+            stride=stride,
			
 
				+            bias=bias,
			
 
				+            act=act)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        # inputs [bs, C, H, W] -> outputs [bs, 4C, W/2, H/2]
			
 
				+        top_left = inputs[:, :, 0::2, 0::2]
			
 
				+        top_right = inputs[:, :, 0::2, 1::2]
			
 
				+        bottom_left = inputs[:, :, 1::2, 0::2]
			
 
				+        bottom_right = inputs[:, :, 1::2, 1::2]
			
 
				+        outputs = paddle.concat(
			
 
				+            [top_left, bottom_left, top_right, bottom_right], 1)
			
 
				+        return self.conv(outputs)
			
 
				+
			
 
				+
			
 
				+class BottleNeck(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 shortcut=True,
			
 
				+                 expansion=0.5,
			
 
				+                 depthwise=False,
			
 
				+                 bias=False,
			
 
				+                 act="silu"):
			
 
				+        super(BottleNeck, self).__init__()
			
 
				+        hidden_channels = int(out_channels * expansion)
			
 
				+        Conv = DWConv if depthwise else BaseConv
			
 
				+        self.conv1 = BaseConv(
			
 
				+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
			
 
				+        self.conv2 = Conv(
			
 
				+            hidden_channels,
			
 
				+            out_channels,
			
 
				+            ksize=3,
			
 
				+            stride=1,
			
 
				+            bias=bias,
			
 
				+            act=act)
			
 
				+        self.add_shortcut = shortcut and in_channels == out_channels
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        y = self.conv2(self.conv1(x))
			
 
				+        if self.add_shortcut:
			
 
				+            y = y + x
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class SPPLayer(nn.Layer):
			
 
				+    """Spatial Pyramid Pooling (SPP) layer used in YOLOv3-SPP and YOLOX"""
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_sizes=(5, 9, 13),
			
 
				+                 bias=False,
			
 
				+                 act="silu"):
			
 
				+        super(SPPLayer, self).__init__()
			
 
				+        hidden_channels = in_channels // 2
			
 
				+        self.conv1 = BaseConv(
			
 
				+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
			
 
				+        self.maxpoolings = nn.LayerList([
			
 
				+            nn.MaxPool2D(
			
 
				+                kernel_size=ks, stride=1, padding=ks // 2)
			
 
				+            for ks in kernel_sizes
			
 
				+        ])
			
 
				+        conv2_channels = hidden_channels * (len(kernel_sizes) + 1)
			
 
				+        self.conv2 = BaseConv(
			
 
				+            conv2_channels, out_channels, ksize=1, stride=1, bias=bias, act=act)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv1(x)
			
 
				+        x = paddle.concat([x] + [mp(x) for mp in self.maxpoolings], axis=1)
			
 
				+        x = self.conv2(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SPPFLayer(nn.Layer):
			
 
				+    """ Spatial Pyramid Pooling - Fast (SPPF) layer used in YOLOv5 by Glenn Jocher,
			
 
				+        equivalent to SPP(k=(5, 9, 13))
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 ksize=5,
			
 
				+                 bias=False,
			
 
				+                 act='silu'):
			
 
				+        super(SPPFLayer, self).__init__()
			
 
				+        hidden_channels = in_channels // 2
			
 
				+        self.conv1 = BaseConv(
			
 
				+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
			
 
				+        self.maxpooling = nn.MaxPool2D(
			
 
				+            kernel_size=ksize, stride=1, padding=ksize // 2)
			
 
				+        conv2_channels = hidden_channels * 4
			
 
				+        self.conv2 = BaseConv(
			
 
				+            conv2_channels, out_channels, ksize=1, stride=1, bias=bias, act=act)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv1(x)
			
 
				+        y1 = self.maxpooling(x)
			
 
				+        y2 = self.maxpooling(y1)
			
 
				+        y3 = self.maxpooling(y2)
			
 
				+        concats = paddle.concat([x, y1, y2, y3], axis=1)
			
 
				+        out = self.conv2(concats)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class CSPLayer(nn.Layer):
			
 
				+    """CSP (Cross Stage Partial) layer with 3 convs, named C3 in YOLOv5"""
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 num_blocks=1,
			
 
				+                 shortcut=True,
			
 
				+                 expansion=0.5,
			
 
				+                 depthwise=False,
			
 
				+                 bias=False,
			
 
				+                 act="silu"):
			
 
				+        super(CSPLayer, self).__init__()
			
 
				+        hidden_channels = int(out_channels * expansion)
			
 
				+        self.conv1 = BaseConv(
			
 
				+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
			
 
				+        self.conv2 = BaseConv(
			
 
				+            in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
			
 
				+        self.bottlenecks = nn.Sequential(*[
			
 
				+            BottleNeck(
			
 
				+                hidden_channels,
			
 
				+                hidden_channels,
			
 
				+                shortcut=shortcut,
			
 
				+                expansion=1.0,
			
 
				+                depthwise=depthwise,
			
 
				+                bias=bias,
			
 
				+                act=act) for _ in range(num_blocks)
			
 
				+        ])
			
 
				+        self.conv3 = BaseConv(
			
 
				+            hidden_channels * 2,
			
 
				+            out_channels,
			
 
				+            ksize=1,
			
 
				+            stride=1,
			
 
				+            bias=bias,
			
 
				+            act=act)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x_1 = self.conv1(x)
			
 
				+        x_1 = self.bottlenecks(x_1)
			
 
				+        x_2 = self.conv2(x)
			
 
				+        x = paddle.concat([x_1, x_2], axis=1)
			
 
				+        x = self.conv3(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class CSPDarkNet(nn.Layer):
			
 
				+    """
			
 
				+    CSPDarkNet backbone.
			
 
				+    Args:
			
 
				+        arch (str): Architecture of CSPDarkNet, from {P5, P6, X}, default as X,
			
 
				+            and 'X' means used in YOLOX, 'P5/P6' means used in YOLOv5.
			
 
				+        depth_mult (float): Depth multiplier, multiply number of channels in
			
 
				+            each layer, default as 1.0.
			
 
				+        width_mult (float): Width multiplier, multiply number of blocks in
			
 
				+            CSPLayer, default as 1.0.
			
 
				+        depthwise (bool): Whether to use depth-wise conv layer.
			
 
				+        act (str): Activation function type, default as 'silu'.
			
 
				+        return_idx (list): Index of stages whose feature maps are returned.
			
 
				+    """
			
 
				+
			
 
				+    __shared__ = ['depth_mult', 'width_mult', 'act', 'trt']
			
 
				+
			
 
				+    # in_channels, out_channels, num_blocks, add_shortcut, use_spp(use_sppf)
			
 
				+    # 'X' means setting used in YOLOX, 'P5/P6' means setting used in YOLOv5.
			
 
				+    arch_settings = {
			
 
				+        'X': [[64, 128, 3, True, False], [128, 256, 9, True, False],
			
 
				+              [256, 512, 9, True, False], [512, 1024, 3, False, True]],
			
 
				+        'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False],
			
 
				+               [256, 512, 9, True, False], [512, 1024, 3, True, True]],
			
 
				+        'P6': [[64, 128, 3, True, False], [128, 256, 6, True, False],
			
 
				+               [256, 512, 9, True, False], [512, 768, 3, True, False],
			
 
				+               [768, 1024, 3, True, True]],
			
 
				+    }
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 arch='X',
			
 
				+                 depth_mult=1.0,
			
 
				+                 width_mult=1.0,
			
 
				+                 depthwise=False,
			
 
				+                 act='silu',
			
 
				+                 trt=False,
			
 
				+                 return_idx=[2, 3, 4]):
			
 
				+        super(CSPDarkNet, self).__init__()
			
 
				+        self.arch = arch
			
 
				+        self.return_idx = return_idx
			
 
				+        Conv = DWConv if depthwise else BaseConv
			
 
				+        arch_setting = self.arch_settings[arch]
			
 
				+        base_channels = int(arch_setting[0][0] * width_mult)
			
 
				+
			
 
				+        # Note: differences between the latest YOLOv5 and the original YOLOX
			
 
				+        # 1. self.stem, use SPPF(in YOLOv5) or SPP(in YOLOX)
			
 
				+        # 2. use SPPF(in YOLOv5) or SPP(in YOLOX)
			
 
				+        # 3. put SPPF before(YOLOv5) or SPP after(YOLOX) the last cspdark block's CSPLayer
			
 
				+        # 4. whether SPPF(SPP)'CSPLayer add shortcut, True in YOLOv5, False in YOLOX
			
 
				+        if arch in ['P5', 'P6']:
			
 
				+            # in the latest YOLOv5, use Conv stem, and SPPF (fast, only single spp kernal size)
			
 
				+            self.stem = Conv(
			
 
				+                3, base_channels, ksize=6, stride=2, bias=False, act=act)
			
 
				+            spp_kernal_sizes = 5
			
 
				+        elif arch in ['X']:
			
 
				+            # in the original YOLOX, use Focus stem, and SPP (three spp kernal sizes)
			
 
				+            self.stem = Focus(
			
 
				+                3, base_channels, ksize=3, stride=1, bias=False, act=act)
			
 
				+            spp_kernal_sizes = (5, 9, 13)
			
 
				+        else:
			
 
				+            raise AttributeError("Unsupported arch type: {}".format(arch))
			
 
				+
			
 
				+        _out_channels = [base_channels]
			
 
				+        layers_num = 1
			
 
				+        self.csp_dark_blocks = []
			
 
				+
			
 
				+        for i, (in_channels, out_channels, num_blocks, shortcut,
			
 
				+                use_spp) in enumerate(arch_setting):
			
 
				+            in_channels = int(in_channels * width_mult)
			
 
				+            out_channels = int(out_channels * width_mult)
			
 
				+            _out_channels.append(out_channels)
			
 
				+            num_blocks = max(round(num_blocks * depth_mult), 1)
			
 
				+            stage = []
			
 
				+
			
 
				+            conv_layer = self.add_sublayer(
			
 
				+                'layers{}.stage{}.conv_layer'.format(layers_num, i + 1),
			
 
				+                Conv(
			
 
				+                    in_channels, out_channels, 3, 2, bias=False, act=act))
			
 
				+            stage.append(conv_layer)
			
 
				+            layers_num += 1
			
 
				+
			
 
				+            if use_spp and arch in ['X']:
			
 
				+                # in YOLOX use SPPLayer
			
 
				+                spp_layer = self.add_sublayer(
			
 
				+                    'layers{}.stage{}.spp_layer'.format(layers_num, i + 1),
			
 
				+                    SPPLayer(
			
 
				+                        out_channels,
			
 
				+                        out_channels,
			
 
				+                        kernel_sizes=spp_kernal_sizes,
			
 
				+                        bias=False,
			
 
				+                        act=act))
			
 
				+                stage.append(spp_layer)
			
 
				+                layers_num += 1
			
 
				+
			
 
				+            csp_layer = self.add_sublayer(
			
 
				+                'layers{}.stage{}.csp_layer'.format(layers_num, i + 1),
			
 
				+                CSPLayer(
			
 
				+                    out_channels,
			
 
				+                    out_channels,
			
 
				+                    num_blocks=num_blocks,
			
 
				+                    shortcut=shortcut,
			
 
				+                    depthwise=depthwise,
			
 
				+                    bias=False,
			
 
				+                    act=act))
			
 
				+            stage.append(csp_layer)
			
 
				+            layers_num += 1
			
 
				+
			
 
				+            if use_spp and arch in ['P5', 'P6']:
			
 
				+                # in latest YOLOv5 use SPPFLayer instead of SPPLayer
			
 
				+                sppf_layer = self.add_sublayer(
			
 
				+                    'layers{}.stage{}.sppf_layer'.format(layers_num, i + 1),
			
 
				+                    SPPFLayer(
			
 
				+                        out_channels,
			
 
				+                        out_channels,
			
 
				+                        ksize=5,
			
 
				+                        bias=False,
			
 
				+                        act=act))
			
 
				+                stage.append(sppf_layer)
			
 
				+                layers_num += 1
			
 
				+
			
 
				+            self.csp_dark_blocks.append(nn.Sequential(*stage))
			
 
				+
			
 
				+        self._out_channels = [_out_channels[i] for i in self.return_idx]
			
 
				+        self.strides = [[2, 4, 8, 16, 32, 64][i] for i in self.return_idx]
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+        outputs = []
			
 
				+        x = self.stem(x)
			
 
				+        for i, layer in enumerate(self.csp_dark_blocks):
			
 
				+            x = layer(x)
			
 
				+            if i + 1 in self.return_idx:
			
 
				+                outputs.append(x)
			
 
				+        return outputs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [
			
 
				+            ShapeSpec(
			
 
				+                channels=c, stride=s)
			
 
				+            for c, s in zip(self._out_channels, self.strides)
			
 
				+        ]
			
--- a/paddlers/models/ppdet/modeling/backbones/cspresnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/cspresnet.py
@@ -0,0 +1,321 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle.nn.initializer import Constant
			
 
				+
			
 
				+from paddlers.models.ppdet.modeling.ops import get_act_fn
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['CSPResNet', 'BasicBlock', 'EffectiveSELayer', 'ConvBNLayer']
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 filter_size=3,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 padding=0,
			
 
				+                 act=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=ch_in,
			
 
				+            out_channels=ch_out,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self.bn = nn.BatchNorm2D(
			
 
				+            ch_out,
			
 
				+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+        self.act = get_act_fn(act) if act is None or isinstance(act, (
			
 
				+            str, dict)) else act
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        x = self.act(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class RepVggBlock(nn.Layer):
			
 
				+    def __init__(self, ch_in, ch_out, act='relu', alpha=False):
			
 
				+        super(RepVggBlock, self).__init__()
			
 
				+        self.ch_in = ch_in
			
 
				+        self.ch_out = ch_out
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            ch_in, ch_out, 3, stride=1, padding=1, act=None)
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            ch_in, ch_out, 1, stride=1, padding=0, act=None)
			
 
				+        self.act = get_act_fn(act) if act is None or isinstance(act, (
			
 
				+            str, dict)) else act
			
 
				+        if alpha:
			
 
				+            self.alpha = self.create_parameter(
			
 
				+                shape=[1],
			
 
				+                attr=ParamAttr(initializer=Constant(value=1.)),
			
 
				+                dtype="float32")
			
 
				+        else:
			
 
				+            self.alpha = None
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if hasattr(self, 'conv'):
			
 
				+            y = self.conv(x)
			
 
				+        else:
			
 
				+            if self.alpha:
			
 
				+                y = self.conv1(x) + self.alpha * self.conv2(x)
			
 
				+            else:
			
 
				+                y = self.conv1(x) + self.conv2(x)
			
 
				+        y = self.act(y)
			
 
				+        return y
			
 
				+
			
 
				+    def convert_to_deploy(self):
			
 
				+        if not hasattr(self, 'conv'):
			
 
				+            self.conv = nn.Conv2D(
			
 
				+                in_channels=self.ch_in,
			
 
				+                out_channels=self.ch_out,
			
 
				+                kernel_size=3,
			
 
				+                stride=1,
			
 
				+                padding=1,
			
 
				+                groups=1)
			
 
				+        kernel, bias = self.get_equivalent_kernel_bias()
			
 
				+        self.conv.weight.set_value(kernel)
			
 
				+        self.conv.bias.set_value(bias)
			
 
				+        self.__delattr__('conv1')
			
 
				+        self.__delattr__('conv2')
			
 
				+
			
 
				+    def get_equivalent_kernel_bias(self):
			
 
				+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
			
 
				+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
			
 
				+        if self.alpha:
			
 
				+            return kernel3x3 + self.alpha * self._pad_1x1_to_3x3_tensor(
			
 
				+                kernel1x1), bias3x3 + self.alpha * bias1x1
			
 
				+        else:
			
 
				+            return kernel3x3 + self._pad_1x1_to_3x3_tensor(
			
 
				+                kernel1x1), bias3x3 + bias1x1
			
 
				+
			
 
				+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
			
 
				+        if kernel1x1 is None:
			
 
				+            return 0
			
 
				+        else:
			
 
				+            return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
			
 
				+
			
 
				+    def _fuse_bn_tensor(self, branch):
			
 
				+        if branch is None:
			
 
				+            return 0, 0
			
 
				+        kernel = branch.conv.weight
			
 
				+        running_mean = branch.bn._mean
			
 
				+        running_var = branch.bn._variance
			
 
				+        gamma = branch.bn.weight
			
 
				+        beta = branch.bn.bias
			
 
				+        eps = branch.bn._epsilon
			
 
				+        std = (running_var + eps).sqrt()
			
 
				+        t = (gamma / std).reshape((-1, 1, 1, 1))
			
 
				+        return kernel * t, beta - running_mean * gamma / std
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 act='relu',
			
 
				+                 shortcut=True,
			
 
				+                 use_alpha=False):
			
 
				+        super(BasicBlock, self).__init__()
			
 
				+        assert ch_in == ch_out
			
 
				+        self.conv1 = ConvBNLayer(ch_in, ch_out, 3, stride=1, padding=1, act=act)
			
 
				+        self.conv2 = RepVggBlock(ch_out, ch_out, act=act, alpha=use_alpha)
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        y = self.conv1(x)
			
 
				+        y = self.conv2(y)
			
 
				+        if self.shortcut:
			
 
				+            return paddle.add(x, y)
			
 
				+        else:
			
 
				+            return y
			
 
				+
			
 
				+
			
 
				+class EffectiveSELayer(nn.Layer):
			
 
				+    """ Effective Squeeze-Excitation
			
 
				+    From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, channels, act='hardsigmoid'):
			
 
				+        super(EffectiveSELayer, self).__init__()
			
 
				+        self.fc = nn.Conv2D(channels, channels, kernel_size=1, padding=0)
			
 
				+        self.act = get_act_fn(act) if act is None or isinstance(act, (
			
 
				+            str, dict)) else act
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x_se = x.mean((2, 3), keepdim=True)
			
 
				+        x_se = self.fc(x_se)
			
 
				+        return x * self.act(x_se)
			
 
				+
			
 
				+
			
 
				+class CSPResStage(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 block_fn,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 n,
			
 
				+                 stride,
			
 
				+                 act='relu',
			
 
				+                 attn='eca',
			
 
				+                 use_alpha=False):
			
 
				+        super(CSPResStage, self).__init__()
			
 
				+
			
 
				+        ch_mid = (ch_in + ch_out) // 2
			
 
				+        if stride == 2:
			
 
				+            self.conv_down = ConvBNLayer(
			
 
				+                ch_in, ch_mid, 3, stride=2, padding=1, act=act)
			
 
				+        else:
			
 
				+            self.conv_down = None
			
 
				+        self.conv1 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act)
			
 
				+        self.conv2 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act)
			
 
				+        self.blocks = nn.Sequential(*[
			
 
				+            block_fn(
			
 
				+                ch_mid // 2,
			
 
				+                ch_mid // 2,
			
 
				+                act=act,
			
 
				+                shortcut=True,
			
 
				+                use_alpha=use_alpha) for i in range(n)
			
 
				+        ])
			
 
				+        if attn:
			
 
				+            self.attn = EffectiveSELayer(ch_mid, act='hardsigmoid')
			
 
				+        else:
			
 
				+            self.attn = None
			
 
				+
			
 
				+        self.conv3 = ConvBNLayer(ch_mid, ch_out, 1, act=act)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.conv_down is not None:
			
 
				+            x = self.conv_down(x)
			
 
				+        y1 = self.conv1(x)
			
 
				+        y2 = self.blocks(self.conv2(x))
			
 
				+        y = paddle.concat([y1, y2], axis=1)
			
 
				+        if self.attn is not None:
			
 
				+            y = self.attn(y)
			
 
				+        y = self.conv3(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class CSPResNet(nn.Layer):
			
 
				+    __shared__ = ['width_mult', 'depth_mult', 'trt']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 layers=[3, 6, 6, 3],
			
 
				+                 channels=[64, 128, 256, 512, 1024],
			
 
				+                 act='swish',
			
 
				+                 return_idx=[1, 2, 3],
			
 
				+                 depth_wise=False,
			
 
				+                 use_large_stem=False,
			
 
				+                 width_mult=1.0,
			
 
				+                 depth_mult=1.0,
			
 
				+                 trt=False,
			
 
				+                 use_checkpoint=False,
			
 
				+                 use_alpha=False,
			
 
				+                 **args):
			
 
				+        super(CSPResNet, self).__init__()
			
 
				+        self.use_checkpoint = use_checkpoint
			
 
				+        channels = [max(round(c * width_mult), 1) for c in channels]
			
 
				+        layers = [max(round(l * depth_mult), 1) for l in layers]
			
 
				+        act = get_act_fn(
			
 
				+            act, trt=trt) if act is None or isinstance(act,
			
 
				+                                                       (str, dict)) else act
			
 
				+
			
 
				+        if use_large_stem:
			
 
				+            self.stem = nn.Sequential(
			
 
				+                ('conv1', ConvBNLayer(
			
 
				+                    3, channels[0] // 2, 3, stride=2, padding=1, act=act)),
			
 
				+                ('conv2', ConvBNLayer(
			
 
				+                    channels[0] // 2,
			
 
				+                    channels[0] // 2,
			
 
				+                    3,
			
 
				+                    stride=1,
			
 
				+                    padding=1,
			
 
				+                    act=act)), ('conv3', ConvBNLayer(
			
 
				+                        channels[0] // 2,
			
 
				+                        channels[0],
			
 
				+                        3,
			
 
				+                        stride=1,
			
 
				+                        padding=1,
			
 
				+                        act=act)))
			
 
				+        else:
			
 
				+            self.stem = nn.Sequential(
			
 
				+                ('conv1', ConvBNLayer(
			
 
				+                    3, channels[0] // 2, 3, stride=2, padding=1, act=act)),
			
 
				+                ('conv2', ConvBNLayer(
			
 
				+                    channels[0] // 2,
			
 
				+                    channels[0],
			
 
				+                    3,
			
 
				+                    stride=1,
			
 
				+                    padding=1,
			
 
				+                    act=act)))
			
 
				+
			
 
				+        n = len(channels) - 1
			
 
				+        self.stages = nn.Sequential(*[(str(i), CSPResStage(
			
 
				+            BasicBlock,
			
 
				+            channels[i],
			
 
				+            channels[i + 1],
			
 
				+            layers[i],
			
 
				+            2,
			
 
				+            act=act,
			
 
				+            use_alpha=use_alpha)) for i in range(n)])
			
 
				+
			
 
				+        self._out_channels = channels[1:]
			
 
				+        self._out_strides = [4 * 2**i for i in range(n)]
			
 
				+        self.return_idx = return_idx
			
 
				+        if use_checkpoint:
			
 
				+            paddle.seed(0)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+        x = self.stem(x)
			
 
				+        outs = []
			
 
				+        for idx, stage in enumerate(self.stages):
			
 
				+            if self.use_checkpoint and self.training:
			
 
				+                x = paddle.distributed.fleet.utils.recompute(
			
 
				+                    stage, x, **{"preserve_rng_state": True})
			
 
				+            else:
			
 
				+                x = stage(x)
			
 
				+            if idx in self.return_idx:
			
 
				+                outs.append(x)
			
 
				+
			
 
				+        return outs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [
			
 
				+            ShapeSpec(
			
 
				+                channels=self._out_channels[i], stride=self._out_strides[i])
			
 
				+            for i in self.return_idx
			
 
				+        ]
			
--- a/paddlers/models/ppdet/modeling/backbones/darknet.py
+++ b/paddlers/models/ppdet/modeling/backbones/darknet.py
@@ -77,8 +77,8 @@ class ConvBNLayer(nn.Layer):
 
				         out = self.batch_norm(out)
			
 
				         if self.act == 'leaky':
			
 
				             out = F.leaky_relu(out, 0.1)
			
 
				-        elif self.act == 'mish':
			
 
				-            out = mish(out)
			
 
				+        else:
			
 
				+            out = getattr(F, self.act)(out)
			
 
				         return out
			
 
				 
			
 
				 
			
@@ -149,9 +149,14 @@ class BasicBlock(nn.Layer):
 
				 
			
 
				         super(BasicBlock, self).__init__()
			
 
				 
			
 
				+        assert ch_in == ch_out and (ch_in % 2) == 0, \
			
 
				+            f"ch_in and ch_out should be the same even int, but the input \'ch_in is {ch_in}, \'ch_out is {ch_out}"
			
 
				+        # example:
			
 
				+        # --------------{conv1} --> {conv2}
			
 
				+        # channel route: 10-->5 --> 5-->10
			
 
				         self.conv1 = ConvBNLayer(
			
 
				             ch_in=ch_in,
			
 
				-            ch_out=ch_out,
			
 
				+            ch_out=int(ch_out / 2),
			
 
				             filter_size=1,
			
 
				             stride=1,
			
 
				             padding=0,
			
@@ -160,8 +165,8 @@ class BasicBlock(nn.Layer):
 
				             freeze_norm=freeze_norm,
			
 
				             data_format=data_format)
			
 
				         self.conv2 = ConvBNLayer(
			
 
				-            ch_in=ch_out,
			
 
				-            ch_out=ch_out * 2,
			
 
				+            ch_in=int(ch_out / 2),
			
 
				+            ch_out=ch_out,
			
 
				             filter_size=3,
			
 
				             stride=1,
			
 
				             padding=1,
			
@@ -215,7 +220,7 @@ class Blocks(nn.Layer):
 
				             res_out = self.add_sublayer(
			
 
				                 block_name,
			
 
				                 BasicBlock(
			
 
				-                    ch_out * 2,
			
 
				+                    ch_out,
			
 
				                     ch_out,
			
 
				                     norm_type=norm_type,
			
 
				                     norm_decay=norm_decay,
			
@@ -296,7 +301,7 @@ class DarkNet(nn.Layer):
 
				                 name,
			
 
				                 Blocks(
			
 
				                     int(ch_in[i]),
			
 
				-                    32 * (2**i),
			
 
				+                    int(ch_in[i]),
			
 
				                     stage,
			
 
				                     norm_type=norm_type,
			
 
				                     norm_decay=norm_decay,
			
@@ -305,14 +310,14 @@ class DarkNet(nn.Layer):
 
				                     name=name))
			
 
				             self.darknet_conv_block_list.append(conv_block)
			
 
				             if i in return_idx:
			
 
				-                self._out_channels.append(64 * (2**i))
			
 
				+                self._out_channels.append(int(ch_in[i]))
			
 
				         for i in range(num_stages - 1):
			
 
				             down_name = 'stage.{}.downsample'.format(i)
			
 
				             downsample = self.add_sublayer(
			
 
				                 down_name,
			
 
				                 DownSample(
			
 
				-                    ch_in=32 * (2**(i + 1)),
			
 
				-                    ch_out=32 * (2**(i + 2)),
			
 
				+                    ch_in=int(ch_in[i]),
			
 
				+                    ch_out=int(ch_in[i + 1]),
			
 
				                     norm_type=norm_type,
			
 
				                     norm_decay=norm_decay,
			
 
				                     freeze_norm=freeze_norm,
			
--- a/paddlers/models/ppdet/modeling/backbones/dla.py
+++ b/paddlers/models/ppdet/modeling/backbones/dla.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 import paddle
			
--- a/paddlers/models/ppdet/modeling/backbones/esnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/esnet.py
@@ -1,4 +1,4 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
@@ -20,7 +20,7 @@ import paddle
 
				 import paddle.nn as nn
			
 
				 import paddle.nn.functional as F
			
 
				 from paddle import ParamAttr
			
 
				-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D
			
 
				+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
			
 
				 from paddle.nn.initializer import KaimingNormal
			
 
				 from paddle.regularizer import L2Decay
			
 
				 
			
--- a/paddlers/models/ppdet/modeling/backbones/ghostnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/ghostnet.py
@@ -1,4 +1,4 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
@@ -299,17 +299,17 @@ class GhostBottleneck(nn.Layer):
 
				 class GhostNet(nn.Layer):
			
 
				     __shared__ = ['norm_type']
			
 
				 
			
 
				-    def __init__(self,
			
 
				-                 scale=1.3,
			
 
				-                 feature_maps=[6, 12, 15],
			
 
				-                 with_extra_blocks=False,
			
 
				-                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
			
 
				-                                      [64, 128]],
			
 
				-                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
			
 
				-                 conv_decay=0.,
			
 
				-                 norm_type='bn',
			
 
				-                 norm_decay=0.0,
			
 
				-                 freeze_norm=False):
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            scale=1.3,
			
 
				+            feature_maps=[6, 12, 15],
			
 
				+            with_extra_blocks=False,
			
 
				+            extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
			
 
				+            lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
			
 
				+            conv_decay=0.,
			
 
				+            norm_type='bn',
			
 
				+            norm_decay=0.0,
			
 
				+            freeze_norm=False):
			
 
				         super(GhostNet, self).__init__()
			
 
				         if isinstance(feature_maps, Integral):
			
 
				             feature_maps = [feature_maps]
			
--- a/paddlers/models/ppdet/modeling/backbones/hardnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/hardnet.py
@@ -1,4 +1,4 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
@@ -146,7 +146,7 @@ class HarDBlock(nn.Layer):
 
				 class HarDNet(nn.Layer):
			
 
				     def __init__(self, depth_wise=False, return_idx=[1, 3, 8, 13], arch=85):
			
 
				         super(HarDNet, self).__init__()
			
 
				-        assert arch in [39, 68, 85], "HarDNet-{} not support.".format(arch)
			
 
				+        assert arch in [68, 85], "HarDNet-{} is not supported.".format(arch)
			
 
				         if arch == 85:
			
 
				             first_ch = [48, 96]
			
 
				             second_kernel = 3
			
@@ -161,6 +161,8 @@ class HarDNet(nn.Layer):
 
				             grmul = 1.7
			
 
				             gr = [14, 16, 20, 40]
			
 
				             n_layers = [8, 16, 16, 16]
			
 
				+        else:
			
 
				+            raise ValueError("HarDNet-{} is not supported.".format(arch))
			
 
				 
			
 
				         self.return_idx = return_idx
			
 
				         self._out_channels = [96, 214, 458, 784]
			
--- a/paddlers/models/ppdet/modeling/backbones/lcnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/lcnet.py
@@ -1,4 +1,4 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
@@ -68,7 +68,8 @@ class ConvBNLayer(nn.Layer):
 
				                  filter_size,
			
 
				                  num_filters,
			
 
				                  stride,
			
 
				-                 num_groups=1):
			
 
				+                 num_groups=1,
			
 
				+                 act='hard_swish'):
			
 
				         super().__init__()
			
 
				 
			
 
				         self.conv = Conv2D(
			
@@ -85,12 +86,15 @@ class ConvBNLayer(nn.Layer):
 
				             num_filters,
			
 
				             weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
			
 
				-        self.hardswish = nn.Hardswish()
			
 
				+        if act == 'hard_swish':
			
 
				+            self.act = nn.Hardswish()
			
 
				+        elif act == 'relu6':
			
 
				+            self.act = nn.ReLU6()
			
 
				 
			
 
				     def forward(self, x):
			
 
				         x = self.conv(x)
			
 
				         x = self.bn(x)
			
 
				-        x = self.hardswish(x)
			
 
				+        x = self.act(x)
			
 
				         return x
			
 
				 
			
 
				 
			
@@ -100,7 +104,8 @@ class DepthwiseSeparable(nn.Layer):
 
				                  num_filters,
			
 
				                  stride,
			
 
				                  dw_size=3,
			
 
				-                 use_se=False):
			
 
				+                 use_se=False,
			
 
				+                 act='hard_swish'):
			
 
				         super().__init__()
			
 
				         self.use_se = use_se
			
 
				         self.dw_conv = ConvBNLayer(
			
@@ -108,14 +113,16 @@ class DepthwiseSeparable(nn.Layer):
 
				             num_filters=num_channels,
			
 
				             filter_size=dw_size,
			
 
				             stride=stride,
			
 
				-            num_groups=num_channels)
			
 
				+            num_groups=num_channels,
			
 
				+            act=act)
			
 
				         if use_se:
			
 
				             self.se = SEModule(num_channels)
			
 
				         self.pw_conv = ConvBNLayer(
			
 
				             num_channels=num_channels,
			
 
				             filter_size=1,
			
 
				             num_filters=num_filters,
			
 
				-            stride=1)
			
 
				+            stride=1,
			
 
				+            act=act)
			
 
				 
			
 
				     def forward(self, x):
			
 
				         x = self.dw_conv(x)
			
@@ -158,7 +165,7 @@ class SEModule(nn.Layer):
 
				 @register
			
 
				 @serializable
			
 
				 class LCNet(nn.Layer):
			
 
				-    def __init__(self, scale=1.0, feature_maps=[3, 4, 5]):
			
 
				+    def __init__(self, scale=1.0, feature_maps=[3, 4, 5], act='hard_swish'):
			
 
				         super().__init__()
			
 
				         self.scale = scale
			
 
				         self.feature_maps = feature_maps
			
@@ -169,7 +176,8 @@ class LCNet(nn.Layer):
 
				             num_channels=3,
			
 
				             filter_size=3,
			
 
				             num_filters=make_divisible(16 * scale),
			
 
				-            stride=2)
			
 
				+            stride=2,
			
 
				+            act=act)
			
 
				 
			
 
				         self.blocks2 = nn.Sequential(*[
			
 
				             DepthwiseSeparable(
			
@@ -177,7 +185,8 @@ class LCNet(nn.Layer):
 
				                 num_filters=make_divisible(out_c * scale),
			
 
				                 dw_size=k,
			
 
				                 stride=s,
			
 
				-                use_se=se)
			
 
				+                use_se=se,
			
 
				+                act=act)
			
 
				             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
			
 
				         ])
			
 
				 
			
@@ -187,7 +196,8 @@ class LCNet(nn.Layer):
 
				                 num_filters=make_divisible(out_c * scale),
			
 
				                 dw_size=k,
			
 
				                 stride=s,
			
 
				-                use_se=se)
			
 
				+                use_se=se,
			
 
				+                act=act)
			
 
				             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
			
 
				         ])
			
 
				 
			
@@ -200,7 +210,8 @@ class LCNet(nn.Layer):
 
				                 num_filters=make_divisible(out_c * scale),
			
 
				                 dw_size=k,
			
 
				                 stride=s,
			
 
				-                use_se=se)
			
 
				+                use_se=se,
			
 
				+                act=act)
			
 
				             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
			
 
				         ])
			
 
				 
			
@@ -213,7 +224,8 @@ class LCNet(nn.Layer):
 
				                 num_filters=make_divisible(out_c * scale),
			
 
				                 dw_size=k,
			
 
				                 stride=s,
			
 
				-                use_se=se)
			
 
				+                use_se=se,
			
 
				+                act=act)
			
 
				             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
			
 
				         ])
			
 
				 
			
@@ -226,7 +238,8 @@ class LCNet(nn.Layer):
 
				                 num_filters=make_divisible(out_c * scale),
			
 
				                 dw_size=k,
			
 
				                 stride=s,
			
 
				-                use_se=se)
			
 
				+                use_se=se,
			
 
				+                act=act)
			
 
				             for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
			
 
				         ])
			
 
				 
			
--- a/paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
+++ b/paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
@@ -1,4 +1,4 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
--- a/paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
+++ b/paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
@@ -1,4 +1,4 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
@@ -282,19 +282,19 @@ class ExtraBlockDW(nn.Layer):
 
				 class MobileNetV3(nn.Layer):
			
 
				     __shared__ = ['norm_type']
			
 
				 
			
 
				-    def __init__(self,
			
 
				-                 scale=1.0,
			
 
				-                 model_name="large",
			
 
				-                 feature_maps=[6, 12, 15],
			
 
				-                 with_extra_blocks=False,
			
 
				-                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
			
 
				-                                      [64, 128]],
			
 
				-                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
			
 
				-                 conv_decay=0.0,
			
 
				-                 multiplier=1.0,
			
 
				-                 norm_type='bn',
			
 
				-                 norm_decay=0.0,
			
 
				-                 freeze_norm=False):
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            scale=1.0,
			
 
				+            model_name="large",
			
 
				+            feature_maps=[6, 12, 15],
			
 
				+            with_extra_blocks=False,
			
 
				+            extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
			
 
				+            lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
			
 
				+            conv_decay=0.0,
			
 
				+            multiplier=1.0,
			
 
				+            norm_type='bn',
			
 
				+            norm_decay=0.0,
			
 
				+            freeze_norm=False):
			
 
				         super(MobileNetV3, self).__init__()
			
 
				         if isinstance(feature_maps, Integral):
			
 
				             feature_maps = [feature_maps]
			
--- a/paddlers/models/ppdet/modeling/backbones/mobileone.py
+++ b/paddlers/models/ppdet/modeling/backbones/mobileone.py
@@ -0,0 +1,266 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+"""
			
 
				+This code is the paddle implementation of MobileOne block, see: https://arxiv.org/pdf/2206.04040.pdf. 
			
 
				+Some codes are based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
			
 
				+Ths copyright of microsoft/Swin-Transformer is as follows:
			
 
				+MIT License [see LICENSE for details]
			
 
				+"""
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle.nn.initializer import Normal, Constant
			
 
				+
			
 
				+from paddlers.models.ppdet.modeling.ops import get_act_fn
			
 
				+from paddlers.models.ppdet.modeling.layers import ConvNormLayer
			
 
				+
			
 
				+
			
 
				+class MobileOneBlock(nn.Layer):
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            ch_in,
			
 
				+            ch_out,
			
 
				+            stride,
			
 
				+            kernel_size,
			
 
				+            conv_num=1,
			
 
				+            norm_type='bn',
			
 
				+            norm_decay=0.,
			
 
				+            norm_groups=32,
			
 
				+            bias_on=False,
			
 
				+            lr_scale=1.,
			
 
				+            freeze_norm=False,
			
 
				+            initializer=Normal(
			
 
				+                mean=0., std=0.01),
			
 
				+            skip_quant=False,
			
 
				+            act='relu', ):
			
 
				+        super(MobileOneBlock, self).__init__()
			
 
				+
			
 
				+        self.ch_in = ch_in
			
 
				+        self.ch_out = ch_out
			
 
				+        self.kernel_size = kernel_size
			
 
				+        self.stride = stride
			
 
				+        self.padding = (kernel_size - 1) // 2
			
 
				+        self.k = conv_num
			
 
				+
			
 
				+        self.depth_conv = nn.LayerList()
			
 
				+        self.point_conv = nn.LayerList()
			
 
				+        for _ in range(self.k):
			
 
				+            self.depth_conv.append(
			
 
				+                ConvNormLayer(
			
 
				+                    ch_in,
			
 
				+                    ch_in,
			
 
				+                    kernel_size,
			
 
				+                    stride=stride,
			
 
				+                    groups=ch_in,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    norm_groups=norm_groups,
			
 
				+                    bias_on=bias_on,
			
 
				+                    lr_scale=lr_scale,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    initializer=initializer,
			
 
				+                    skip_quant=skip_quant))
			
 
				+            self.point_conv.append(
			
 
				+                ConvNormLayer(
			
 
				+                    ch_in,
			
 
				+                    ch_out,
			
 
				+                    1,
			
 
				+                    stride=1,
			
 
				+                    groups=1,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    norm_groups=norm_groups,
			
 
				+                    bias_on=bias_on,
			
 
				+                    lr_scale=lr_scale,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    initializer=initializer,
			
 
				+                    skip_quant=skip_quant))
			
 
				+        self.rbr_1x1 = ConvNormLayer(
			
 
				+            ch_in,
			
 
				+            ch_in,
			
 
				+            1,
			
 
				+            stride=self.stride,
			
 
				+            groups=ch_in,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            norm_groups=norm_groups,
			
 
				+            bias_on=bias_on,
			
 
				+            lr_scale=lr_scale,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            initializer=initializer,
			
 
				+            skip_quant=skip_quant)
			
 
				+        self.rbr_identity_st1 = nn.BatchNorm2D(
			
 
				+            num_features=ch_in,
			
 
				+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(
			
 
				+                0.0))) if ch_in == ch_out and self.stride == 1 else None
			
 
				+        self.rbr_identity_st2 = nn.BatchNorm2D(
			
 
				+            num_features=ch_out,
			
 
				+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(
			
 
				+                0.0))) if ch_in == ch_out and self.stride == 1 else None
			
 
				+        self.act = get_act_fn(act) if act is None or isinstance(act, (
			
 
				+            str, dict)) else act
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if hasattr(self, "conv1") and hasattr(self, "conv2"):
			
 
				+            y = self.act(self.conv2(self.act(self.conv1(x))))
			
 
				+        else:
			
 
				+            if self.rbr_identity_st1 is None:
			
 
				+                id_out_st1 = 0
			
 
				+            else:
			
 
				+                id_out_st1 = self.rbr_identity_st1(x)
			
 
				+
			
 
				+            x1_1 = 0
			
 
				+            for i in range(self.k):
			
 
				+                x1_1 += self.depth_conv[i](x)
			
 
				+
			
 
				+            x1_2 = self.rbr_1x1(x)
			
 
				+            x1 = self.act(x1_1 + x1_2 + id_out_st1)
			
 
				+
			
 
				+            if self.rbr_identity_st2 is None:
			
 
				+                id_out_st2 = 0
			
 
				+            else:
			
 
				+                id_out_st2 = self.rbr_identity_st2(x1)
			
 
				+
			
 
				+            x2_1 = 0
			
 
				+            for i in range(self.k):
			
 
				+                x2_1 += self.point_conv[i](x1)
			
 
				+            y = self.act(x2_1 + id_out_st2)
			
 
				+
			
 
				+        return y
			
 
				+
			
 
				+    def convert_to_deploy(self):
			
 
				+        if not hasattr(self, 'conv1'):
			
 
				+            self.conv1 = nn.Conv2D(
			
 
				+                in_channels=self.ch_in,
			
 
				+                out_channels=self.ch_in,
			
 
				+                kernel_size=self.kernel_size,
			
 
				+                stride=self.stride,
			
 
				+                padding=self.padding,
			
 
				+                groups=self.ch_in,
			
 
				+                bias_attr=ParamAttr(
			
 
				+                    initializer=Constant(value=0.), learning_rate=1.))
			
 
				+        if not hasattr(self, 'conv2'):
			
 
				+            self.conv2 = nn.Conv2D(
			
 
				+                in_channels=self.ch_in,
			
 
				+                out_channels=self.ch_out,
			
 
				+                kernel_size=1,
			
 
				+                stride=1,
			
 
				+                padding='SAME',
			
 
				+                groups=1,
			
 
				+                bias_attr=ParamAttr(
			
 
				+                    initializer=Constant(value=0.), learning_rate=1.))
			
 
				+
			
 
				+        conv1_kernel, conv1_bias, conv2_kernel, conv2_bias = self.get_equivalent_kernel_bias(
			
 
				+        )
			
 
				+        self.conv1.weight.set_value(conv1_kernel)
			
 
				+        self.conv1.bias.set_value(conv1_bias)
			
 
				+        self.conv2.weight.set_value(conv2_kernel)
			
 
				+        self.conv2.bias.set_value(conv2_bias)
			
 
				+        self.__delattr__('depth_conv')
			
 
				+        self.__delattr__('point_conv')
			
 
				+        self.__delattr__('rbr_1x1')
			
 
				+        if hasattr(self, 'rbr_identity_st1'):
			
 
				+            self.__delattr__('rbr_identity_st1')
			
 
				+        if hasattr(self, 'rbr_identity_st2'):
			
 
				+            self.__delattr__('rbr_identity_st2')
			
 
				+
			
 
				+    def get_equivalent_kernel_bias(self):
			
 
				+        st1_kernel3x3, st1_bias3x3 = self._fuse_bn_tensor(self.depth_conv)
			
 
				+        st1_kernel1x1, st1_bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
			
 
				+        st1_kernelid, st1_biasid = self._fuse_bn_tensor(
			
 
				+            self.rbr_identity_st1, kernel_size=self.kernel_size)
			
 
				+
			
 
				+        st2_kernel1x1, st2_bias1x1 = self._fuse_bn_tensor(self.point_conv)
			
 
				+        st2_kernelid, st2_biasid = self._fuse_bn_tensor(
			
 
				+            self.rbr_identity_st2, kernel_size=1)
			
 
				+
			
 
				+        conv1_kernel = st1_kernel3x3 + self._pad_1x1_to_3x3_tensor(
			
 
				+            st1_kernel1x1) + st1_kernelid
			
 
				+
			
 
				+        conv1_bias = st1_bias3x3 + st1_bias1x1 + st1_biasid
			
 
				+
			
 
				+        conv2_kernel = st2_kernel1x1 + st2_kernelid
			
 
				+        conv2_bias = st2_bias1x1 + st2_biasid
			
 
				+
			
 
				+        return conv1_kernel, conv1_bias, conv2_kernel, conv2_bias
			
 
				+
			
 
				+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
			
 
				+        if kernel1x1 is None:
			
 
				+            return 0
			
 
				+        else:
			
 
				+            padding_size = (self.kernel_size - 1) // 2
			
 
				+            return nn.functional.pad(
			
 
				+                kernel1x1,
			
 
				+                [padding_size, padding_size, padding_size, padding_size])
			
 
				+
			
 
				+    def _fuse_bn_tensor(self, branch, kernel_size=3):
			
 
				+        if branch is None:
			
 
				+            return 0, 0
			
 
				+
			
 
				+        if isinstance(branch, nn.LayerList):
			
 
				+            fused_kernels = []
			
 
				+            fused_bias = []
			
 
				+            for block in branch:
			
 
				+                kernel = block.conv.weight
			
 
				+                running_mean = block.norm._mean
			
 
				+                running_var = block.norm._variance
			
 
				+                gamma = block.norm.weight
			
 
				+                beta = block.norm.bias
			
 
				+                eps = block.norm._epsilon
			
 
				+
			
 
				+                std = (running_var + eps).sqrt()
			
 
				+                t = (gamma / std).reshape((-1, 1, 1, 1))
			
 
				+
			
 
				+                fused_kernels.append(kernel * t)
			
 
				+                fused_bias.append(beta - running_mean * gamma / std)
			
 
				+
			
 
				+            return sum(fused_kernels), sum(fused_bias)
			
 
				+
			
 
				+        elif isinstance(branch, ConvNormLayer):
			
 
				+            kernel = branch.conv.weight
			
 
				+            running_mean = branch.norm._mean
			
 
				+            running_var = branch.norm._variance
			
 
				+            gamma = branch.norm.weight
			
 
				+            beta = branch.norm.bias
			
 
				+            eps = branch.norm._epsilon
			
 
				+        else:
			
 
				+            assert isinstance(branch, nn.BatchNorm2D)
			
 
				+            input_dim = self.ch_in if kernel_size == 1 else 1
			
 
				+            kernel_value = paddle.zeros(
			
 
				+                shape=[self.ch_in, input_dim, kernel_size, kernel_size],
			
 
				+                dtype='float32')
			
 
				+            if kernel_size > 1:
			
 
				+                for i in range(self.ch_in):
			
 
				+                    kernel_value[i, i % input_dim, (kernel_size - 1) // 2, (
			
 
				+                        kernel_size - 1) // 2] = 1
			
 
				+            elif kernel_size == 1:
			
 
				+                for i in range(self.ch_in):
			
 
				+                    kernel_value[i, i % input_dim, 0, 0] = 1
			
 
				+            else:
			
 
				+                raise ValueError("Invalid kernel size recieved!")
			
 
				+            kernel = paddle.to_tensor(kernel_value, place=branch.weight.place)
			
 
				+            running_mean = branch._mean
			
 
				+            running_var = branch._variance
			
 
				+            gamma = branch.weight
			
 
				+            beta = branch.bias
			
 
				+            eps = branch._epsilon
			
 
				+
			
 
				+        std = (running_var + eps).sqrt()
			
 
				+        t = (gamma / std).reshape((-1, 1, 1, 1))
			
 
				+
			
 
				+        return kernel * t, beta - running_mean * gamma / std
			
--- a/paddlers/models/ppdet/modeling/backbones/resnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/resnet.py
@@ -1,15 +1,15 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 import math
			
@@ -446,13 +446,13 @@ class ResNet(nn.Layer):
 
				                  std_senet=False):
			
 
				         """
			
 
				         Residual Network, see https://arxiv.org/abs/1512.03385
			
 
				-
			
 
				+        
			
 
				         Args:
			
 
				             depth (int): ResNet depth, should be 18, 34, 50, 101, 152.
			
 
				             ch_in (int): output channel of first stage, default 64
			
 
				             variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
			
 
				             lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
			
 
				-                                 lower learning rate ratio is need for pretrained model
			
 
				+                                 lower learning rate ratio is need for pretrained model 
			
 
				                                  got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
			
 
				             groups (int): group convolution cardinality
			
 
				             base_width (int): base width of each group convolution
			
--- a/paddlers/models/ppdet/modeling/backbones/senet.py
+++ b/paddlers/models/ppdet/modeling/backbones/senet.py
@@ -1,21 +1,23 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
			
 
				+#   
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");   
			
 
				+# you may not use this file except in compliance with the License.  
			
 
				+# You may obtain a copy of the License at   
			
 
				+#   
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0    
			
 
				+#   
			
 
				+# Unless required by applicable law or agreed to in writing, software   
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
			
 
				+# See the License for the specific language governing permissions and   
			
 
				 # limitations under the License.
			
 
				 
			
 
				 import paddle.nn as nn
			
 
				 
			
 
				 from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				 from .resnet import ResNet, Blocks, BasicBlock, BottleNeck
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+from .name_adapter import NameAdapter
			
 
				 
			
 
				 __all__ = ['SENet', 'SERes5Head']
			
 
				 
			
@@ -41,12 +43,12 @@ class SENet(ResNet):
 
				                  num_stages=4):
			
 
				         """
			
 
				         Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507
			
 
				-
			
 
				+        
			
 
				         Args:
			
 
				             depth (int): SENet depth, should be 50, 101, 152
			
 
				             variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
			
 
				             lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
			
 
				-                                 lower learning rate ratio is need for pretrained model
			
 
				+                                 lower learning rate ratio is need for pretrained model 
			
 
				                                  got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
			
 
				             groups (int): group convolution cardinality
			
 
				             base_width (int): base width of each group convolution
			
@@ -103,7 +105,7 @@ class SERes5Head(nn.Layer):
 
				             norm_decay (float): weight decay for normalization layer weights
			
 
				             dcn_v2_stages (list): index of stages who select deformable conv v2
			
 
				             std_senet (bool): whether use senet, default True
			
 
				-
			
 
				+            
			
 
				         """
			
 
				         super(SERes5Head, self).__init__()
			
 
				         ch_out = 512
			
--- a/paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
+++ b/paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
@@ -1,4 +1,4 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 # you may not use this file except in compliance with the License.
			
@@ -188,11 +188,10 @@ class ShuffleNetV2(nn.Layer):
 
				         elif scale == 1.5:
			
 
				             stage_out_channels = [-1, 24, 176, 352, 704, 1024]
			
 
				         elif scale == 2.0:
			
 
				-            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
			
 
				+            stage_out_channels = [-1, 24, 244, 488, 976, 2048]
			
 
				         else:
			
 
				             raise NotImplementedError("This scale size:[" + str(scale) +
			
 
				                                       "] is not implemented!")
			
 
				-
			
 
				         self._out_channels = []
			
 
				         self._feature_idx = 0
			
 
				         # 1. conv1
			
--- a/paddlers/models/ppdet/modeling/backbones/swin_transformer.py
+++ b/paddlers/models/ppdet/modeling/backbones/swin_transformer.py
@@ -20,62 +20,13 @@ MIT License [see LICENSE for details]
 
				 import paddle
			
 
				 import paddle.nn as nn
			
 
				 import paddle.nn.functional as F
			
 
				-from paddle.nn.initializer import TruncatedNormal, Constant, Assign
			
 
				 from paddlers.models.ppdet.modeling.shape_spec import ShapeSpec
			
 
				 from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				 import numpy as np
			
 
				 
			
 
				-# Common initializations
			
 
				-ones_ = Constant(value=1.)
			
 
				-zeros_ = Constant(value=0.)
			
 
				-trunc_normal_ = TruncatedNormal(std=.02)
			
 
				-
			
 
				-
			
 
				-# Common Functions
			
 
				-def to_2tuple(x):
			
 
				-    return tuple([x] * 2)
			
 
				-
			
 
				-
			
 
				-def add_parameter(layer, datas, name=None):
			
 
				-    parameter = layer.create_parameter(
			
 
				-        shape=(datas.shape), default_initializer=Assign(datas))
			
 
				-    if name:
			
 
				-        layer.add_parameter(name, parameter)
			
 
				-    return parameter
			
 
				-
			
 
				-
			
 
				-# Common Layers
			
 
				-def drop_path(x, drop_prob=0., training=False):
			
 
				-    """
			
 
				-        Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
			
 
				-        the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
			
 
				-        See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
			
 
				-    """
			
 
				-    if drop_prob == 0. or not training:
			
 
				-        return x
			
 
				-    keep_prob = paddle.to_tensor(1 - drop_prob)
			
 
				-    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
			
 
				-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
			
 
				-    random_tensor = paddle.floor(random_tensor)  # binarize
			
 
				-    output = x.divide(keep_prob) * random_tensor
			
 
				-    return output
			
 
				-
			
 
				-
			
 
				-class DropPath(nn.Layer):
			
 
				-    def __init__(self, drop_prob=None):
			
 
				-        super(DropPath, self).__init__()
			
 
				-        self.drop_prob = drop_prob
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        return drop_path(x, self.drop_prob, self.training)
			
 
				-
			
 
				-
			
 
				-class Identity(nn.Layer):
			
 
				-    def __init__(self):
			
 
				-        super(Identity, self).__init__()
			
 
				-
			
 
				-    def forward(self, input):
			
 
				-        return input
			
 
				+from .transformer_utils import DropPath, Identity
			
 
				+from .transformer_utils import add_parameter, to_2tuple
			
 
				+from .transformer_utils import ones_, zeros_, trunc_normal_
			
 
				 
			
 
				 
			
 
				 class Mlp(nn.Layer):
			
@@ -112,7 +63,7 @@ def window_partition(x, window_size):
 
				     """
			
 
				     B, H, W, C = x.shape
			
 
				     x = x.reshape(
			
 
				-        [B, H // window_size, window_size, W // window_size, window_size, C])
			
 
				+        [-1, H // window_size, window_size, W // window_size, window_size, C])
			
 
				     windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape(
			
 
				         [-1, window_size, window_size, C])
			
 
				     return windows
			
@@ -128,10 +79,11 @@ def window_reverse(windows, window_size, H, W):
 
				     Returns:
			
 
				         x: (B, H, W, C)
			
 
				     """
			
 
				+    _, _, _, C = windows.shape
			
 
				     B = int(windows.shape[0] / (H * W / window_size / window_size))
			
 
				     x = windows.reshape(
			
 
				-        [B, H // window_size, W // window_size, window_size, window_size, -1])
			
 
				-    x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1])
			
 
				+        [-1, H // window_size, W // window_size, window_size, window_size, C])
			
 
				+    x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H, W, C])
			
 
				     return x
			
 
				 
			
 
				 
			
@@ -206,14 +158,14 @@ class WindowAttention(nn.Layer):
 
				         """
			
 
				         B_, N, C = x.shape
			
 
				         qkv = self.qkv(x).reshape(
			
 
				-            [B_, N, 3, self.num_heads, C // self.num_heads]).transpose(
			
 
				+            [-1, N, 3, self.num_heads, C // self.num_heads]).transpose(
			
 
				                 [2, 0, 3, 1, 4])
			
 
				         q, k, v = qkv[0], qkv[1], qkv[2]
			
 
				 
			
 
				         q = q * self.scale
			
 
				         attn = paddle.mm(q, k.transpose([0, 1, 3, 2]))
			
 
				 
			
 
				-        index = self.relative_position_index.reshape([-1])
			
 
				+        index = self.relative_position_index.flatten()
			
 
				 
			
 
				         relative_position_bias = paddle.index_select(
			
 
				             self.relative_position_bias_table, index)
			
@@ -227,7 +179,7 @@ class WindowAttention(nn.Layer):
 
				 
			
 
				         if mask is not None:
			
 
				             nW = mask.shape[0]
			
 
				-            attn = attn.reshape([B_ // nW, nW, self.num_heads, N, N
			
 
				+            attn = attn.reshape([-1, nW, self.num_heads, N, N
			
 
				                                  ]) + mask.unsqueeze(1).unsqueeze(0)
			
 
				             attn = attn.reshape([-1, self.num_heads, N, N])
			
 
				             attn = self.softmax(attn)
			
@@ -237,7 +189,7 @@ class WindowAttention(nn.Layer):
 
				         attn = self.attn_drop(attn)
			
 
				 
			
 
				         # x = (attn @ v).transpose(1, 2).reshape([B_, N, C])
			
 
				-        x = paddle.mm(attn, v).transpose([0, 2, 1, 3]).reshape([B_, N, C])
			
 
				+        x = paddle.mm(attn, v).transpose([0, 2, 1, 3]).reshape([-1, N, C])
			
 
				         x = self.proj(x)
			
 
				         x = self.proj_drop(x)
			
 
				         return x
			
@@ -315,7 +267,7 @@ class SwinTransformerBlock(nn.Layer):
 
				 
			
 
				         shortcut = x
			
 
				         x = self.norm1(x)
			
 
				-        x = x.reshape([B, H, W, C])
			
 
				+        x = x.reshape([-1, H, W, C])
			
 
				 
			
 
				         # pad feature maps to multiples of window size
			
 
				         pad_l = pad_t = 0
			
@@ -337,7 +289,7 @@ class SwinTransformerBlock(nn.Layer):
 
				         x_windows = window_partition(
			
 
				             shifted_x, self.window_size)  # nW*B, window_size, window_size, C
			
 
				         x_windows = x_windows.reshape(
			
 
				-            [-1, self.window_size * self.window_size,
			
 
				+            [x_windows.shape[0], self.window_size * self.window_size,
			
 
				              C])  # nW*B, window_size*window_size, C
			
 
				 
			
 
				         # W-MSA/SW-MSA
			
@@ -346,7 +298,7 @@ class SwinTransformerBlock(nn.Layer):
 
				 
			
 
				         # merge windows
			
 
				         attn_windows = attn_windows.reshape(
			
 
				-            [-1, self.window_size, self.window_size, C])
			
 
				+            [x_windows.shape[0], self.window_size, self.window_size, C])
			
 
				         shifted_x = window_reverse(attn_windows, self.window_size, Hp,
			
 
				                                    Wp)  # B H' W' C
			
 
				 
			
@@ -362,7 +314,7 @@ class SwinTransformerBlock(nn.Layer):
 
				         if pad_r > 0 or pad_b > 0:
			
 
				             x = x[:, :H, :W, :]
			
 
				 
			
 
				-        x = x.reshape([B, H * W, C])
			
 
				+        x = x.reshape([-1, H * W, C])
			
 
				 
			
 
				         # FFN
			
 
				         x = shortcut + self.drop_path(x)
			
@@ -393,7 +345,7 @@ class PatchMerging(nn.Layer):
 
				         B, L, C = x.shape
			
 
				         assert L == H * W, "input feature has wrong size"
			
 
				 
			
 
				-        x = x.reshape([B, H, W, C])
			
 
				+        x = x.reshape([-1, H, W, C])
			
 
				 
			
 
				         # padding
			
 
				         pad_input = (H % 2 == 1) or (W % 2 == 1)
			
@@ -405,7 +357,7 @@ class PatchMerging(nn.Layer):
 
				         x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
			
 
				         x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
			
 
				         x = paddle.concat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
			
 
				-        x = x.reshape([B, H * W // 4, 4 * C])  # B H/2*W/2 4*C
			
 
				+        x = x.reshape([-1, H * W // 4, 4 * C])  # B H/2*W/2 4*C
			
 
				 
			
 
				         x = self.norm(x)
			
 
				         x = self.reduction(x)
			
@@ -482,8 +434,7 @@ class BasicLayer(nn.Layer):
 
				         # calculate attention mask for SW-MSA
			
 
				         Hp = int(np.ceil(H / self.window_size)) * self.window_size
			
 
				         Wp = int(np.ceil(W / self.window_size)) * self.window_size
			
 
				-        img_mask = paddle.fluid.layers.zeros(
			
 
				-            [1, Hp, Wp, 1], dtype='float32')  # 1 Hp Wp 1
			
 
				+        img_mask = paddle.zeros([1, Hp, Wp, 1], dtype='float32')  # 1 Hp Wp 1
			
 
				         h_slices = (slice(0, -self.window_size),
			
 
				                     slice(-self.window_size, -self.shift_size),
			
 
				                     slice(-self.shift_size, None))
			
@@ -688,10 +639,10 @@ class SwinTransformer(nn.Layer):
 
				         if self.frozen_stages >= 0:
			
 
				             self.patch_embed.eval()
			
 
				             for param in self.patch_embed.parameters():
			
 
				-                param.requires_grad = False
			
 
				+                param.stop_gradient = True
			
 
				 
			
 
				         if self.frozen_stages >= 1 and self.ape:
			
 
				-            self.absolute_pos_embed.requires_grad = False
			
 
				+            self.absolute_pos_embed.stop_gradient = True
			
 
				 
			
 
				         if self.frozen_stages >= 2:
			
 
				             self.pos_drop.eval()
			
@@ -699,7 +650,7 @@ class SwinTransformer(nn.Layer):
 
				                 m = self.layers[i]
			
 
				                 m.eval()
			
 
				                 for param in m.parameters():
			
 
				-                    param.requires_grad = False
			
 
				+                    param.stop_gradient = True
			
 
				 
			
 
				     def _init_weights(self, m):
			
 
				         if isinstance(m, nn.Linear):
			
@@ -713,7 +664,7 @@ class SwinTransformer(nn.Layer):
 
				     def forward(self, x):
			
 
				         """Forward function."""
			
 
				         x = self.patch_embed(x['image'])
			
 
				-        _, _, Wh, Ww = x.shape
			
 
				+        B, _, Wh, Ww = x.shape
			
 
				         if self.ape:
			
 
				             # interpolate the position embedding to the corresponding size
			
 
				             absolute_pos_embed = F.interpolate(
			
--- a/paddlers/models/ppdet/modeling/backbones/transformer_utils.py
+++ b/paddlers/models/ppdet/modeling/backbones/transformer_utils.py
@@ -0,0 +1,74 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+from paddle.nn.initializer import TruncatedNormal, Constant, Assign
			
 
				+
			
 
				+# Common initializations
			
 
				+ones_ = Constant(value=1.)
			
 
				+zeros_ = Constant(value=0.)
			
 
				+trunc_normal_ = TruncatedNormal(std=.02)
			
 
				+
			
 
				+
			
 
				+# Common Layers
			
 
				+def drop_path(x, drop_prob=0., training=False):
			
 
				+    """
			
 
				+        Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
			
 
				+        the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
			
 
				+        See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
			
 
				+    """
			
 
				+    if drop_prob == 0. or not training:
			
 
				+        return x
			
 
				+    keep_prob = paddle.to_tensor(1 - drop_prob)
			
 
				+    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
			
 
				+    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
			
 
				+    random_tensor = paddle.floor(random_tensor)  # binarize
			
 
				+    output = x.divide(keep_prob) * random_tensor
			
 
				+    return output
			
 
				+
			
 
				+
			
 
				+class DropPath(nn.Layer):
			
 
				+    def __init__(self, drop_prob=None):
			
 
				+        super(DropPath, self).__init__()
			
 
				+        self.drop_prob = drop_prob
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        return drop_path(x, self.drop_prob, self.training)
			
 
				+
			
 
				+
			
 
				+class Identity(nn.Layer):
			
 
				+    def __init__(self):
			
 
				+        super(Identity, self).__init__()
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        return input
			
 
				+
			
 
				+
			
 
				+# common funcs
			
 
				+
			
 
				+
			
 
				+def to_2tuple(x):
			
 
				+    if isinstance(x, (list, tuple)):
			
 
				+        return x
			
 
				+    return tuple([x] * 2)
			
 
				+
			
 
				+
			
 
				+def add_parameter(layer, datas, name=None):
			
 
				+    parameter = layer.create_parameter(
			
 
				+        shape=(datas.shape), default_initializer=Assign(datas))
			
 
				+    if name:
			
 
				+        layer.add_parameter(name, parameter)
			
 
				+    return parameter