3 ani în urmă · c6f15f726f
--- a/docs/apis/data_cn.md
+++ b/docs/apis/data_cn.md
@@ -57,13 +57,14 @@
 
															 |-------|----|--------|-----|
														
 
															 |`data_dir`|`str`|数据集存放目录。||
														
 
															 |`image_dir`|`str`|输入图像存放目录。||
														
 
															-|`ann_path`|`str`|[COCO格式](https://cocodataset.org/#home)标注文件路径。||
														
 
															+|`anno_path`|`str`|[COCO格式](https://cocodataset.org/#home)标注文件路径。||
														
 
															 |`transforms`|`paddlers.transforms.Compose`|对输入数据应用的数据变换算子。||
														
 
															 |`label_list`|`str` \| `None`|label list文件。label list是一个文本文件，其中每一行包含一个类别的名称。|`None`|
														
 
															 |`num_workers`|`int` \| `str`|加载数据时使用的辅助进程数。若设置为`'auto'`，则按照如下规则确定使用进程数：当CPU核心数大于16时，使用8个数据读取辅助进程；否则，使用CPU核心数一半数量的辅助进程。|`'auto'`|
														
 
															 |`shuffle`|`bool`|是否随机打乱数据集中的样本。|`False`|
														
 
															 |`allow_empty`|`bool`|是否向数据集中添加负样本。|`False`|
														
 
															 |`empty_ratio`|`float`|负样本占比，仅当`allow_empty`为`True`时生效。若`empty_ratio`为负值或大于等于1，则保留所有生成的负样本。|`1.0`|
														
 
															+|`batch_transforms`|`paddlers.transforms.BatchCompose`|对输入数据应用的批数据变换算子。||
														
 
															 ### VOC格式目标检测数据集`VOCDetDataset`
														
@@ -81,6 +82,7 @@
 
															 |`shuffle`|`bool`|是否随机打乱数据集中的样本。|`False`|
														
 
															 |`allow_empty`|`bool`|是否向数据集中添加负样本。|`False`|
														
 
															 |`empty_ratio`|`float`|负样本占比，仅当`allow_empty`为`True`时生效。若`empty_ratio`为负值或大于等于1，则保留所有生成的负样本。|`1.0`|
														
 
															+|`batch_transforms`|`paddlers.transforms.BatchCompose`|对输入数据应用的批数据变换算子。||
														
 
															 `VOCDetDataset`对file list的要求如下：
														
--- a/docs/apis/data_en.md
+++ b/docs/apis/data_en.md
@@ -57,13 +57,14 @@ The initialization parameter list is as follows:
 
															 |-------|----|--------|-----|
														
 
															 |`data_dir`|`str`|Directory that stores the dataset.||
														
 
															 |`image_dir`|`str`|Directory of input images.||
														
 
															-|`ann_path`|`str`|[COCO Format](https://cocodataset.org/#home)label file path.||
														
 
															+|`anno_path`|`str`|[COCO Format](https://cocodataset.org/#home)label file path.||
														
 
															 |`transforms`|`paddlers.transforms.Compose`|Data transformation operators applied to input data.||
														
 
															 |`label_list`|`str` \| `None`|Label list path. Label list is a text file, in which each line contains the name of class.|`None`|
														
 
															 |`num_workers`|`int` \| `str`|Number of auxiliary processes used when loading data. If it is set to `'auto'`, use the following rules to determine the number of processes to use: When the number of CPU cores is greater than 16, 8 data read auxiliary processes are used; otherwise, the number of auxiliary processes is set to half the counts of CPU cores.|`'auto'`|
														
 
															 |`shuffle`|`bool`|Whether to randomly shuffle the samples in the dataset.|`False`|
														
 
															 |`allow_empty`|`bool`|Whether to add negative samples to the dataset.|`False`|
														
 
															 |`empty_ratio`|`float`|Negative sample ratio. Take effect only if `allow_empty` is `True`. If `empty_ratio` is negative or greater than or equal to 1, all negative samples generated are retained.|`1.0`|
														
 
															+|`batch_transforms`|`paddlers.transforms.BatchCompose`|Data batch transformation operators applied to input data.||
														
 
															 ### VOC Format Object Detection Dataset `VOCDetDataset`
														
@@ -81,6 +82,7 @@ The initialization parameter list is as follows:
 
															 |`shuffle`|`bool`|Whether to randomly shuffle the samples in the dataset.|`False`|
														
 
															 |`allow_empty`|`bool`|Whether to add negative samples to the dataset.|`False`|
														
 
															 |`empty_ratio`|`float`|Negative sample ratio. Takes effect only if `allow_empty` is `True`. If `empty_ratio` is negative or greater than or equal to `1`, all negative samples generated will be retained.|`1.0`|
														
 
															+|`batch_transforms`|`paddlers.transforms.BatchCompose`|Data batch transformation operators applied to input data.||
														
 
															 The requirements of `VOCDetDataset` for the file list are as follows:
														
--- a/docs/apis/train_cn.md
+++ b/docs/apis/train_cn.md
@@ -166,6 +166,7 @@ def train(self,
 
															           warmup_start_lr=0.0,
														
 
															           lr_decay_epochs=(216, 243),
														
 
															           lr_decay_gamma=0.1,
														
 
															+          cosine_decay_num_epochs=1000,
														
 
															           metric=None,
														
 
															           use_ema=False,
														
 
															           early_stop=False,
														
@@ -196,7 +197,8 @@ def train(self,
 
															 |`warmup_start_lr`|`int`|默认优化器warm-up阶段使用的初始学习率。|`0`|
														
 
															 |`lr_decay_epochs`|`list` \| `tuple`|默认优化器学习率衰减的milestones，以epoch计。即，在第几个epoch执行学习率的衰减。|`(216, 243)`|
														
 
															 |`lr_decay_gamma`|`float`|学习率衰减系数，适用于默认优化器。|`0.1`|
														
 
															-|`metric`|`str` \| `None`|评价指标，可以为`'VOC'`、`COCO`或`None`。若为`None`，则根据数据集格式自动确定使用的评价指标。|`None`|
														
 
															+|`cosine_decay_num_epochs`|`int`|使用余弦退火学习率调度器时计算退火周期的参数。|`1000`|
														
 
															+|`metric`|`str` \| `None`|评价指标，可以为`'VOC'`、`'COCO'`、`'RBOX'`或`None`。若为`None`，则根据数据集格式自动确定使用的评价指标。|`None`|
														
 
															 |`use_ema`|`bool`|是否启用[指数滑动平均策略](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/models/ppdet/optimizer.py)更新模型权重参数。|`False`|
														
 
															 |`early_stop`|`bool`|训练过程是否启用早停策略。|`False`|
														
 
															 |`early_stop_patience`|`int`|启用早停策略时的`patience`参数（参见[`EarlyStop`](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/utils/utils.py)）。|`5`|
														
--- a/docs/apis/train_en.md
+++ b/docs/apis/train_en.md
@@ -166,6 +166,7 @@ def train(self,
 
															           warmup_start_lr=0.0,
														
 
															           lr_decay_epochs=(216, 243),
														
 
															           lr_decay_gamma=0.1,
														
 
															+          cosine_decay_num_epochs=1000,
														
 
															           metric=None,
														
 
															           use_ema=False,
														
 
															           early_stop=False,
														
@@ -196,7 +197,8 @@ The meaning of each parameter is as follows:
 
															 |`warmup_start_lr`|`int`|Default initial learning rate used in the warm-up phase of the optimizer.|`0`|
														
 
															 |`lr_decay_epochs`|`list` \| `tuple`|Milestones of learning rate decline of the default optimizer, in terms of epochs. That is, which epoch the decay of the learning rate occurs.|`(216, 243)`|
														
 
															 |`lr_decay_gamma`|`float`|Learning rate attenuation coefficient, for default optimizer.|`0.1`|
														
 
															-|`metric`|`str` \| `None`|Evaluation metrics, which can be `'VOC'`, `COCO`, or `None`. If `None`, the evaluation metrics will be automatically determined according to the format of the dataset.|`None`|
														
 
															+|`cosine_decay_num_epochs`|`int`|Parameter to determine the annealing cycle when a cosine annealing learning rate scheduler is used.|`1000`|
														
 
															+|`metric`|`str` \| `None`|Evaluation metrics, which can be `'VOC'`, `'COCO'`, `'RBOX'`, or `None`. If `None`, the evaluation metrics will be automatically determined according to the format of the dataset.|`None`|
														
 
															 |`use_ema`|`bool`|Whether to enable [exponential moving average strategy](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/models/ppdet/optimizer.py) to update model weights.|`False`|
														
 
															 |`early_stop`|`bool`|Whether to enable the early stopping policy during training.|`False`|
														
 
															 |`early_stop_patience`|`int`|`patience` parameter when the early stopping policy is enabled. Please refer to [`EarlyStop`](https://github.com/PaddlePaddle/PaddleRS/blob/develop/paddlers/utils/utils.py) for more details.|`5`|
														
--- a/docs/intro/data_prep_cn.md
+++ b/docs/intro/data_prep_cn.md
@@ -9,5 +9,6 @@
 
															 | 变化检测 | LEVIR-CD | https://justchenhao.github.io/LEVIR/ | [prepare_levircd.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_levircd.py) |
														
 
															 | 变化检测 | Season-varying | https://paperswithcode.com/dataset/cdd-dataset-season-varying | [prepare_svcd.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_svcd.py) |
														
 
															 | 场景分类 | UC Merced | http://weegee.vision.ucmerced.edu/datasets/landuse.html | [prepare_ucmerced.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_ucmerced.py) |
														
 
															-| 目标检测 | RSOD | https://github.com/RSIA-LIESMARS-WHU/RSOD-Dataset- | [prepare_rsod](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_rsod.py) |
														
 
															-| 图像分割 | iSAID | https://captain-whu.github.io/iSAID/ | [prepare_isaid](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_isaid.py) |
														
 
															+| 目标检测 | DOTA | https://captain-whu.github.io/DOTA/ | [prepare_dota.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_dota.py) |
														
 
															+| 目标检测 | RSOD | https://github.com/RSIA-LIESMARS-WHU/RSOD-Dataset- | [prepare_rsod.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_rsod.py) |
														
 
															+| 图像分割 | iSAID | https://captain-whu.github.io/iSAID/ | [prepare_isaid.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_isaid.py) |
														
--- a/docs/intro/data_prep_en.md
+++ b/docs/intro/data_prep_en.md
@@ -9,5 +9,6 @@
 
															 | Change Detection | LEVIR-CD | https://justchenhao.github.io/LEVIR/ | [prepare_levircd.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_levircd.py) |
														
 
															 | Change Detection | Season-varying | https://paperswithcode.com/dataset/cdd-dataset-season-varying | [prepare_svcd.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_svcd.py) |
														
 
															 | Scene Classification | UC Merced | http://weegee.vision.ucmerced.edu/datasets/landuse.html | [prepare_ucmerced.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_ucmerced.py) |
														
 
															-| Object Detection | RSOD | https://github.com/RSIA-LIESMARS-WHU/RSOD-Dataset- | [prepare_rsod](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_rsod.py) |
														
 
															-| Image Segmentation | iSAID | https://captain-whu.github.io/iSAID/ | [prepare_isaid](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_isaid.py) |
														
 
															+| Object Detection | DOTA | https://captain-whu.github.io/DOTA/ | [prepare_dota.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_dota.py) |
														
 
															+| Object Detection | RSOD | https://github.com/RSIA-LIESMARS-WHU/RSOD-Dataset- | [prepare_rsod.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_rsod.py) |
														
 
															+| Image Segmentation | iSAID | https://captain-whu.github.io/iSAID/ | [prepare_isaid.py](https://github.com/PaddlePaddle/PaddleRS/blob/develop/tools/prepare_dataset/prepare_isaid.py) |
														
--- a/docs/intro/model_cons_params_cn.md
+++ b/docs/intro/model_cons_params_cn.md
@@ -449,6 +449,7 @@
 
															 | 参数名 | 描述                            | 默认值 |
														
 
															 | --- |-------------------------------| --- |
														
 
															+| `rotate (bool)` | 表示是否执行旋转目标检测 | `False` |
														
 
															 | `num_classes (int)` | 目标类别数量                        | `80` |
														
 
															 | `backbone (str)` | 骨干网络名称                | `'MobileNetV1'` |
														
 
															 | `anchors (list[list[int]])` | 预定义锚框的大小                       | `[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]]` |
														
--- a/docs/intro/model_cons_params_en.md
+++ b/docs/intro/model_cons_params_en.md
@@ -443,6 +443,7 @@ The YOLOv3 implementation based on PaddlePaddle.
 
															 | Parameter Name | Description                                                                                                                 | Default Value |
														
 
															 | --- |-----------------------------------------------------------------------------------------------------------------------------| --- |
														
 
															+| `rotate (bool)` | If True, the model performs rotated object detection | `False` |
														
 
															 | `num_classes (int)` | Number of target classes                                                                                                    | `80` |
														
 
															 | `backbone (str)` | Backbone network to use                                                                                      | `'MobileNetV1'` |
														
 
															 | `anchors (list[list[int]])` | Sizes of predefined anchor boxes                                                                                                   | `[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45 ], [59, 119], [116, 90], [156, 198], [373, 326]]` |
														
--- a/docs/intro/model_zoo_cn.md
+++ b/docs/intro/model_zoo_cn.md
@@ -33,6 +33,7 @@ PaddleRS目前已支持的全部模型如下（标注\*的为遥感专用模型
 
															 | 图像复原 | NAFNet | 是 |
														
 
															 | 图像复原 | SwinIR | 是 |
														
 
															 | 目标检测 | Faster R-CNN | 否 |
														
 
															+| 目标检测 | FCOSR | 否 |
														
 
															 | 目标检测 | PP-YOLO | 否 |
														
 
															 | 目标检测 | PP-YOLO Tiny | 否 |
														
 
															 | 目标检测 | PP-YOLOv2 | 否 |
														
--- a/docs/intro/model_zoo_en.md
+++ b/docs/intro/model_zoo_en.md
@@ -33,6 +33,7 @@ All models currently supported by PaddleRS are listed below (those marked \* are
 
															 | Image Restoration | SwinIR | Yes |
														
 
															 | Image Restoration | NAFNet | Yes |
														
 
															 | Object Detection | Faster R-CNN | No |
														
 
															+| Object Detection | FCOSR | No |
														
 
															 | Object Detection | PP-YOLO | No |
														
 
															 | Object Detection | PP-YOLO Tiny | No |
														
 
															 | Object Detection | PP-YOLOv2 | No |
														
--- a/docs/quick_start_cn.md
+++ b/docs/quick_start_cn.md
@@ -53,6 +53,15 @@ Windows用户可以在[此站点](https://www.lfd.uci.edu/~gohlke/pythonlibs/#gd
 
															 pip install GDAL‑3.3.3‑cp39‑cp39‑win_amd64.whl
														
 
															 ```
														
 
															+4. （可选）安装ext_op
														
 
															+
														
 
															+PaddleRS支持旋转目标检测，在使用之前需要安装`ext_op`外部自定义库，安装方式如下：
														
 
															+```shell
														
 
															+cd paddlers/models/ppdet/ext_op
														
 
															+python setup.py install
														
 
															+```
														
 
															+
														
 
															+
														
 
															 除了采用上述安装步骤以外，PaddleRS也提供Docker安装方式。具体步骤如下：
														
 
															 1. 从dockerhub拉取镜像:
														
--- a/docs/quick_start_en.md
+++ b/docs/quick_start_en.md
@@ -46,6 +46,15 @@ Windows users can download GDAL wheels from [this site](https://www.lfd.uci.edu/
 
															 pip install GDAL‑3.3.3‑cp39‑cp39‑win_amd64.whl
														
 
															 ```
														
 
															+4. (Optional) Install ext_op
														
 
															+
														
 
															+PaddleRS supports rotated object detection, which requires the installation of the `ext_op` external custom library before use. you need ti install ext_op as follows:
														
 
															+
														
 
															+```shell
														
 
															+cd paddlers/models/ppdet/ext_op
														
 
															+python setup.py install
														
 
															+```
														
 
															+
														
 
															 We also provide a docker image for installation:
														
 
															 1. Pull from dockerhub:
														
--- a/paddlers/datasets/base.py
+++ b/paddlers/datasets/base.py
@@ -18,7 +18,8 @@ from paddle.io import Dataset
 
															 from paddle.fluid.dataloader.collate import default_collate_fn
														
 
															 from paddlers.utils import get_num_workers
														
 
															-from paddlers.transforms import construct_sample_from_dict, Compose
														
 
															+import paddlers.utils.logging as logging
														
 
															+from paddlers.transforms import construct_sample_from_dict, Compose, BatchCompose
														
 
															 class BaseDataset(Dataset):
														
@@ -26,7 +27,13 @@ class BaseDataset(Dataset):
 
															     _KEYS_TO_DISCARD = None
														
 
															     _collate_trans_info = False
														
 
															-    def __init__(self, data_dir, label_list, transforms, num_workers, shuffle):
														
 
															+    def __init__(self,
														
 
															+                 data_dir,
														
 
															+                 label_list,
														
 
															+                 transforms,
														
 
															+                 num_workers,
														
 
															+                 shuffle,
														
 
															+                 batch_transforms=None):
														
 
															         super(BaseDataset, self).__init__()
														
 
															         self.data_dir = data_dir
														
@@ -37,6 +44,8 @@ class BaseDataset(Dataset):
 
															         self.num_workers = get_num_workers(num_workers)
														
 
															         self.shuffle = shuffle
														
 
															+        self.batch_transforms = None
														
 
															+        self.build_collate_fn(batch_transforms)
														
 
															     def __getitem__(self, idx):
														
 
															         sample = construct_sample_from_dict(self.file_list[idx])
														
@@ -59,8 +68,25 @@ class BaseDataset(Dataset):
 
															             for key in self._KEYS_TO_DISCARD:
														
 
															                 for s, _ in batch:
														
 
															                     s.pop(key, None)
														
 
															+
														
 
															+        samples = [s[0] for s in batch]
														
 
															+
														
 
															+        if self.batch_transforms:
														
 
															+            samples = self.batch_transforms(samples)
														
 
															+
														
 
															         if self._collate_trans_info:
														
 
															-            return default_collate_fn(
														
 
															-                [s[0] for s in batch]), [s[1] for s in batch]
														
 
															+            return default_collate_fn(samples), [s[1] for s in batch]
														
 
															         else:
														
 
															-            return default_collate_fn([s[0] for s in batch])
														
 
															+            return default_collate_fn(samples)
														
 
															+
														
 
															+    def build_collate_fn(self, batch_transforms, collate_fn_constructor=None):
														
 
															+        if self.batch_transforms is not None and batch_transforms:
														
 
															+            logging.warning(
														
 
															+                "The initial `batch_transforms` will be overwritten.")
														
 
															+        if batch_transforms is not None:
														
 
															+            batch_transforms = copy.deepcopy(batch_transforms)
														
 
															+            if isinstance(batch_transforms, list):
														
 
															+                batch_transforms = BatchCompose(batch_transforms)
														
 
															+            self.batch_transforms = batch_transforms
														
 
															+        if collate_fn_constructor:
														
 
															+            self.collate_fn = collate_fn_constructor(self)
														
--- a/paddlers/datasets/cd_dataset.py
+++ b/paddlers/datasets/cd_dataset.py
@@ -55,9 +55,10 @@ class CDDataset(BaseDataset):
 
															                  num_workers='auto',
														
 
															                  shuffle=False,
														
 
															                  with_seg_labels=False,
														
 
															-                 binarize_labels=False):
														
 
															+                 binarize_labels=False,
														
 
															+                 batch_transforms=None):
														
 
															         super(CDDataset, self).__init__(data_dir, label_list, transforms,
														
 
															-                                        num_workers, shuffle)
														
 
															+                                        num_workers, shuffle, batch_transforms)
														
 
															         DELIMETER = ' '
														
--- a/paddlers/datasets/clas_dataset.py
+++ b/paddlers/datasets/clas_dataset.py
@@ -42,9 +42,11 @@ class ClasDataset(BaseDataset):
 
															                  transforms,
														
 
															                  label_list=None,
														
 
															                  num_workers='auto',
														
 
															-                 shuffle=False):
														
 
															+                 shuffle=False,
														
 
															+                 batch_transforms=None):
														
 
															         super(ClasDataset, self).__init__(data_dir, label_list, transforms,
														
 
															-                                          num_workers, shuffle)
														
 
															+                                          num_workers, shuffle,
														
 
															+                                          batch_transforms)
														
 
															         self.file_list = list()
														
 
															         self.labels = list()
														
--- a/paddlers/datasets/coco.py
+++ b/paddlers/datasets/coco.py
@@ -17,7 +17,7 @@ import copy
 
															 import os
														
 
															 import os.path as osp
														
 
															 import random
														
 
															-from collections import OrderedDict
														
 
															+from collections import OrderedDict, defaultdict
														
 
															 import numpy as np
														
@@ -34,7 +34,7 @@ class COCODetDataset(BaseDataset):
 
															     Args:
														
 
															         data_dir (str): Root directory of the dataset.
														
 
															         image_dir (str): Directory that contains the images.
														
 
															-        ann_path (str): Path to COCO annotations.
														
 
															+        anno_path (str): Path to COCO annotations.
														
 
															         transforms (paddlers.transforms.Compose|list): Data preprocessing and data augmentation operators to apply.
														
 
															         label_list (str|None, optional): Path of the file that contains the category names. Defaults to None.
														
 
															         num_workers (int|str, optional): Number of processes used for data loading. If `num_workers` is 'auto',
														
@@ -45,6 +45,7 @@ class COCODetDataset(BaseDataset):
 
															         allow_empty (bool, optional): Whether to add negative samples. Defaults to False.
														
 
															         empty_ratio (float, optional): Ratio of negative samples. If `empty_ratio` is smaller than 0 or not less 
														
 
															             than 1, keep all generated negative samples. Defaults to 1.0.
														
 
															+        batch_transforms (paddlers.transforms.BatchCompose|list): Batch transformation operators to apply.
														
 
															     """
														
 
															     def __init__(self,
														
@@ -52,11 +53,12 @@ class COCODetDataset(BaseDataset):
 
															                  image_dir,
														
 
															                  anno_path,
														
 
															                  transforms,
														
 
															-                 label_list,
														
 
															+                 label_list=None,
														
 
															                  num_workers='auto',
														
 
															                  shuffle=False,
														
 
															                  allow_empty=False,
														
 
															-                 empty_ratio=1.):
														
 
															+                 empty_ratio=1.,
														
 
															+                 batch_transforms=None):
														
 
															         # matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
														
 
															         # or matplotlib.backends is imported for the first time.
														
 
															         import matplotlib
														
@@ -64,7 +66,8 @@ class COCODetDataset(BaseDataset):
 
															         from pycocotools.coco import COCO
														
 
															         super(COCODetDataset, self).__init__(data_dir, label_list, transforms,
														
 
															-                                             num_workers, shuffle)
														
 
															+                                             num_workers, shuffle,
														
 
															+                                             batch_transforms)
														
 
															         self.data_fields = None
														
 
															         self.num_max_boxes = 50
														
@@ -83,33 +86,31 @@ class COCODetDataset(BaseDataset):
 
															         self.file_list = list()
														
 
															         neg_file_list = list()
														
 
															         self.labels = list()
														
 
															+        self.anno_path = anno_path
														
 
															-        annotations = dict()
														
 
															-        annotations['images'] = list()
														
 
															-        annotations['categories'] = list()
														
 
															-        annotations['annotations'] = list()
														
 
															+        annotations = defaultdict(list)
														
 
															         cname2cid = OrderedDict()
														
 
															         label_id = 0
														
 
															-        with open(label_list, 'r', encoding=get_encoding(label_list)) as f:
														
 
															-            for line in f.readlines():
														
 
															-                cname2cid[line.strip()] = label_id
														
 
															-                label_id += 1
														
 
															-                self.labels.append(line.strip())
														
 
															-
														
 
															-        for k, v in cname2cid.items():
														
 
															-            annotations['categories'].append({
														
 
															-                'supercategory': 'component',
														
 
															-                'id': v + 1,
														
 
															-                'name': k
														
 
															-            })
														
 
															+        if label_list:
														
 
															+            with open(label_list, 'r', encoding=get_encoding(label_list)) as f:
														
 
															+                for line in f.readlines():
														
 
															+                    cname2cid[line.strip()] = label_id
														
 
															+                    label_id += 1
														
 
															+                    self.labels.append(line.strip())
														
 
															+
														
 
															+            for k, v in cname2cid.items():
														
 
															+                annotations['categories'].append({
														
 
															+                    'supercategory': 'component',
														
 
															+                    'id': v + 1,
														
 
															+                    'name': k
														
 
															+                })
														
 
															         anno_path = norm_path(os.path.join(self.data_dir, anno_path))
														
 
															         image_dir = norm_path(os.path.join(self.data_dir, image_dir))
														
 
															         assert anno_path.endswith('.json'), \
														
 
															             'invalid coco annotation file: ' + anno_path
														
 
															-        from pycocotools.coco import COCO
														
 
															         coco = COCO(anno_path)
														
 
															         img_ids = coco.getImgIds()
														
 
															         img_ids.sort()
														
@@ -155,7 +156,8 @@ class COCODetDataset(BaseDataset):
 
															             gt_classes = []
														
 
															             gt_bboxs = []
														
 
															             gt_scores = []
														
 
															-            difficults = []
														
 
															+            gt_poly = []
														
 
															+            difficulties = []
														
 
															             for inst in instances:
														
 
															                 # Check gt bbox
														
@@ -182,12 +184,21 @@ class COCODetDataset(BaseDataset):
 
															                         'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
														
 
															                             img_id, float(inst['area']), x1, y1, x2, y2))
														
 
															+                if 'segmentation' in inst and inst['iscrowd']:
														
 
															+                    gt_poly.append([0.0 for _ in range(8)])
														
 
															+                elif 'segmentation' in inst and inst['segmentation']:
														
 
															+                    if not np.array(
														
 
															+                            inst['segmentation'],
														
 
															+                            dtype=object).size > 0 and not self.allow_empty:
														
 
															+                        continue
														
 
															+                    else:
														
 
															+                        gt_poly.append(inst['segmentation'])
														
 
															+
														
 
															                 is_crowds.append([inst['iscrowd']])
														
 
															-                gt_classes.append([inst['category_id']])
														
 
															+                gt_classes.append([catid2clsid[inst['category_id']]])
														
 
															                 gt_bboxs.append(inst['clean_bbox'])
														
 
															                 gt_scores.append([1.])
														
 
															-                difficults.append([0])
														
 
															-
														
 
															+                difficulties.append(inst.get('difficult', 0.))
														
 
															                 annotations['annotations'].append({
														
 
															                     'iscrowd': inst['iscrowd'],
														
 
															                     'image_id': int(inst['image_id']),
														
@@ -195,18 +206,21 @@ class COCODetDataset(BaseDataset):
 
															                     'area': inst['area'],
														
 
															                     'category_id': inst['category_id'],
														
 
															                     'id': inst['id'],
														
 
															-                    'difficult': 0
														
 
															+                    'difficult': inst.get('difficult', 0.)
														
 
															                 })
														
 
															+                if gt_poly:
														
 
															+                    annotations['annotations'][-1]['gt_poly'] = gt_poly[-1]
														
 
															             label_info = {
														
 
															                 'is_crowd': np.array(is_crowds),
														
 
															                 'gt_class': np.array(gt_classes),
														
 
															                 'gt_bbox': np.array(gt_bboxs).astype(np.float32),
														
 
															                 'gt_score': np.array(gt_scores).astype(np.float32),
														
 
															-                'difficult': np.array(difficults),
														
 
															+                'difficult': np.array(difficulties),
														
 
															+                'gt_poly': np.array(gt_poly),
														
 
															             }
														
 
															-            if label_info['gt_bbox'].size > 0:
														
 
															+            if label_info['gt_bbox'].size > 0 or label_info['gt_poly'].size > 0:
														
 
															                 self.file_list.append({ ** im_info, ** label_info})
														
 
															                 annotations['images'].append({
														
 
															                     'height': im_h,
														
@@ -259,6 +273,7 @@ class COCODetDataset(BaseDataset):
 
															                 DecodeImg(to_rgb=False)(sample),
														
 
															                 DecodeImg(to_rgb=False)(sample_mix)
														
 
															             ])
														
 
															+
														
 
															         sample['trans_info'] = []
														
 
															         sample, trans_info = self.transforms(sample)
														
 
															         return sample, trans_info
														
@@ -266,6 +281,11 @@ class COCODetDataset(BaseDataset):
 
															     def __len__(self):
														
 
															         return self.num_samples
														
 
															+    def get_anno_path(self):
														
 
															+        if self.anno_path:
														
 
															+            return norm_path(os.path.join(self.data_dir, self.anno_path))
														
 
															+        return None
														
 
															+
														
 
															     def set_epoch(self, epoch_id):
														
 
															         self._epoch = epoch_id
														
--- a/paddlers/datasets/res_dataset.py
+++ b/paddlers/datasets/res_dataset.py
@@ -45,9 +45,10 @@ class ResDataset(BaseDataset):
 
															                  transforms,
														
 
															                  num_workers='auto',
														
 
															                  shuffle=False,
														
 
															-                 sr_factor=None):
														
 
															+                 sr_factor=None,
														
 
															+                 batch_transforms=None):
														
 
															         super(ResDataset, self).__init__(data_dir, None, transforms,
														
 
															-                                         num_workers, shuffle)
														
 
															+                                         num_workers, shuffle, batch_transforms)
														
 
															         self.file_list = list()
														
 
															         with open(file_list, encoding=get_encoding(file_list)) as f:
														
--- a/paddlers/datasets/seg_dataset.py
+++ b/paddlers/datasets/seg_dataset.py
@@ -43,9 +43,10 @@ class SegDataset(BaseDataset):
 
															                  transforms,
														
 
															                  label_list=None,
														
 
															                  num_workers='auto',
														
 
															-                 shuffle=False):
														
 
															+                 shuffle=False,
														
 
															+                 batch_transforms=None):
														
 
															         super(SegDataset, self).__init__(data_dir, label_list, transforms,
														
 
															-                                         num_workers, shuffle)
														
 
															+                                         num_workers, shuffle, batch_transforms)
														
 
															         self.file_list = list()
														
 
															         self.labels = list()
														
--- a/paddlers/datasets/voc.py
+++ b/paddlers/datasets/voc.py
@@ -46,6 +46,7 @@ class VOCDetDataset(BaseDataset):
 
															         allow_empty (bool, optional): Whether to add negative samples. Defaults to False.
														
 
															         empty_ratio (float, optional): Ratio of negative samples. If `empty_ratio` is smaller than 0 or not less 
														
 
															             than 1, keep all generated negative samples. Defaults to 1.0.
														
 
															+        batch_transforms (paddlers.transforms.BatchCompose|list): Batch transformation operators to apply.
														
 
															     """
														
 
															     def __init__(self,
														
@@ -56,14 +57,16 @@ class VOCDetDataset(BaseDataset):
 
															                  num_workers='auto',
														
 
															                  shuffle=False,
														
 
															                  allow_empty=False,
														
 
															-                 empty_ratio=1.):
														
 
															+                 empty_ratio=1.,
														
 
															+                 batch_transforms=None):
														
 
															         # matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
														
 
															         # or matplotlib.backends is imported for the first time.
														
 
															         import matplotlib
														
 
															         matplotlib.use('Agg')
														
 
															         from pycocotools.coco import COCO
														
 
															         super(VOCDetDataset, self).__init__(data_dir, label_list, transforms,
														
 
															-                                            num_workers, shuffle)
														
 
															+                                            num_workers, shuffle,
														
 
															+                                            batch_transforms)
														
 
															         self.data_fields = None
														
 
															         self.num_max_boxes = 50
														
--- a/paddlers/models/ppdet/core/workspace.py
+++ b/paddlers/models/ppdet/core/workspace.py
@@ -67,6 +67,15 @@ class AttrDict(dict):
 
															             return self[key]
														
 
															         raise AttributeError("object has no attribute '{}'".format(key))
														
 
															+    def __setattr__(self, key, value):
														
 
															+        self[key] = value
														
 
															+
														
 
															+    def copy(self):
														
 
															+        new_dict = AttrDict()
														
 
															+        for k, v in self.items():
														
 
															+            new_dict.update({k: v})
														
 
															+        return new_dict
														
 
															+
														
 
															 global_config = AttrDict()
														
--- a/paddlers/models/ppdet/data/crop_utils/__init__.py
+++ b/paddlers/models/ppdet/data/crop_utils/__init__.py
@@ -10,4 +10,4 @@
 
															 # distributed under the License is distributed on an "AS IS" BASIS,
														
 
															 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															 # See the License for the specific language governing permissions and
														
 
															-# limitations under the License.
														
 
															+# limitations under the License. 
														
--- a/paddlers/models/ppdet/data/crop_utils/annotation_cropper.py
+++ b/paddlers/models/ppdet/data/crop_utils/annotation_cropper.py
@@ -27,14 +27,15 @@ from .chip_box_utils import intersection_over_box
 
															 class AnnoCropper(object):
														
 
															-    def __init__(self, image_target_sizes: List[int],
														
 
															+    def __init__(self,
														
 
															+                 image_target_sizes: List[int],
														
 
															                  valid_box_ratio_ranges: List[List[float]],
														
 
															-                 chip_target_size: int, chip_target_stride: int,
														
 
															-                 use_neg_chip: bool = False,
														
 
															-                 max_neg_num_per_im: int = 8,
														
 
															-                 max_per_img: int = -1,
														
 
															-                 nms_thresh: int = 0.5
														
 
															-                 ):
														
 
															+                 chip_target_size: int,
														
 
															+                 chip_target_stride: int,
														
 
															+                 use_neg_chip: bool=False,
														
 
															+                 max_neg_num_per_im: int=8,
														
 
															+                 max_per_img: int=-1,
														
 
															+                 nms_thresh: int=0.5):
														
 
															         """
														
 
															         Generate chips by chip_target_size and chip_target_stride.
														
 
															         These two parameters just like kernel_size and stride in cnn.
														
@@ -117,7 +118,8 @@ class AnnoCropper(object):
 
															         self.chip_records = []
														
 
															         self._global_chip_id = 1
														
 
															         for r in records:
														
 
															-            self._cur_im_pos_chips = []  # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int]
														
 
															+            self._cur_im_pos_chips = [
														
 
															+            ]  # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int]
														
 
															             self._cur_im_neg_chips = []  # element: (chip, neg_box_num)
														
 
															             for scale_i in range(self.scale_num):
														
 
															                 self._get_current_scale_parameters(scale_i, r)
														
@@ -126,12 +128,16 @@ class AnnoCropper(object):
 
															                 chips = self._create_chips(r['h'], r['w'], self._cur_scale)
														
 
															                 # # dict: chipid->[box_id, ...]
														
 
															-                pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips(r['gt_bbox'], chips)
														
 
															+                pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips(
														
 
															+                    r['gt_bbox'], chips)
														
 
															                 # dict: chipid->neg_box_num
														
 
															-                neg_chip2box_num = self._get_neg_boxes_and_chips(chips, list(pos_chip2boxes_idx.keys()), r.get('proposals', None))
														
 
															+                neg_chip2box_num = self._get_neg_boxes_and_chips(
														
 
															+                    chips,
														
 
															+                    list(pos_chip2boxes_idx.keys()), r.get('proposals', None))
														
 
															-                self._add_to_cur_im_chips(chips, pos_chip2boxes_idx, neg_chip2box_num)
														
 
															+                self._add_to_cur_im_chips(chips, pos_chip2boxes_idx,
														
 
															+                                          neg_chip2box_num)
														
 
															             cur_image_records = self._trans_all_chips2annotations(r)
														
 
															             self.chip_records.extend(cur_image_records)
														
@@ -147,7 +153,7 @@ class AnnoCropper(object):
 
															         for neg_chipid, neg_box_num in neg_chip2box_num.items():
														
 
															             chip = np.array(chips[neg_chipid])
														
 
															-            self._cur_im_neg_chips.append((chip,  neg_box_num))
														
 
															+            self._cur_im_neg_chips.append((chip, neg_box_num))
														
 
															     def _trans_all_chips2annotations(self, r):
														
 
															         gt_bbox = r['gt_bbox']
														
@@ -156,20 +162,24 @@ class AnnoCropper(object):
 
															         gt_class = r['gt_class']
														
 
															         # gt_poly = r['gt_poly']   # [None]xN
														
 
															         # remaining keys: im_id, h, w
														
 
															-        chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox, is_crowd, gt_class)
														
 
															+        chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox,
														
 
															+                                                         is_crowd, gt_class)
														
 
															         if not self.use_neg_chip:
														
 
															             return chip_records
														
 
															         sampled_neg_chips = self._sample_neg_chips()
														
 
															-        neg_chip_records = self._trans_neg_chips2annotations(im_file, sampled_neg_chips)
														
 
															+        neg_chip_records = self._trans_neg_chips2annotations(im_file,
														
 
															+                                                             sampled_neg_chips)
														
 
															         chip_records.extend(neg_chip_records)
														
 
															         return chip_records
														
 
															-    def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd, gt_class):
														
 
															+    def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd,
														
 
															+                                     gt_class):
														
 
															         chip_records = []
														
 
															         for chip, boxes_idx in self._cur_im_pos_chips:
														
 
															-            chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx, chip)
														
 
															+            chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx,
														
 
															+                                                            chip)
														
 
															             x1, y1, x2, y2 = chip
														
 
															             chip_h = y2 - y1
														
 
															             chip_w = x2 - x1
														
@@ -197,12 +207,15 @@ class AnnoCropper(object):
 
															             return self._cur_im_neg_chips
														
 
															         candidate_num = int(sample_num * 1.5)
														
 
															-        candidate_neg_chips = sorted(self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num]
														
 
															+        candidate_neg_chips = sorted(
														
 
															+            self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num]
														
 
															         random.shuffle(candidate_neg_chips)
														
 
															         sampled_neg_chips = candidate_neg_chips[:sample_num]
														
 
															         return sampled_neg_chips
														
 
															-    def _trans_neg_chips2annotations(self, im_file: str, sampled_neg_chips: List[Tuple]):
														
 
															+    def _trans_neg_chips2annotations(self,
														
 
															+                                     im_file: str,
														
 
															+                                     sampled_neg_chips: List[Tuple]):
														
 
															         chip_records = []
														
 
															         for chip, neg_box_num in sampled_neg_chips:
														
 
															             x1, y1, x2, y2 = chip
														
@@ -213,9 +226,12 @@ class AnnoCropper(object):
 
															                 'im_id': np.array([self._global_chip_id]),
														
 
															                 'h': chip_h,
														
 
															                 'w': chip_w,
														
 
															-                'gt_bbox': np.zeros((0, 4), dtype=np.float32),
														
 
															-                'is_crowd': np.zeros((0, 1), dtype=np.int32),
														
 
															-                'gt_class': np.zeros((0, 1), dtype=np.int32),
														
 
															+                'gt_bbox': np.zeros(
														
 
															+                    (0, 4), dtype=np.float32),
														
 
															+                'is_crowd': np.zeros(
														
 
															+                    (0, 1), dtype=np.int32),
														
 
															+                'gt_class': np.zeros(
														
 
															+                    (0, 1), dtype=np.int32),
														
 
															                 # 'gt_poly': [],
														
 
															                 'chip': chip
														
 
															             }
														
@@ -247,7 +263,8 @@ class AnnoCropper(object):
 
															         assert chip_size >= stride
														
 
															         chip_overlap = chip_size - stride
														
 
															-        if (width - chip_overlap) % stride > min_chip_location_diff:  # 不能被stride整除的部分比较大，则保留
														
 
															+        if (width - chip_overlap
														
 
															+            ) % stride > min_chip_location_diff:  # 不能被stride整除的部分比较大，则保留
														
 
															             w_steps = max(1, int(math.ceil((width - chip_overlap) / stride)))
														
 
															         else:  # 不能被stride整除的部分比较小，则丢弃
														
 
															             w_steps = max(1, int(math.floor((width - chip_overlap) / stride)))
														
@@ -267,9 +284,10 @@ class AnnoCropper(object):
 
															         # check  chip size
														
 
															         for item in chips:
														
 
															-            if item[2] - item[0] > chip_size * 1.1 or item[3] - item[1] > chip_size * 1.1:
														
 
															+            if item[2] - item[0] > chip_size * 1.1 or item[3] - item[
														
 
															+                    1] > chip_size * 1.1:
														
 
															                 raise ValueError(item)
														
 
															-        chips = np.array(chips, dtype=np.float)
														
 
															+        chips = np.array(chips, dtype=np.float32)
														
 
															         raw_size_chips = chips / scale
														
 
															         return raw_size_chips
														
@@ -279,12 +297,15 @@ class AnnoCropper(object):
 
															         im_size = self._cur_im_size
														
 
															         scale = self._cur_scale
														
 
															         #   Nx4            N
														
 
															-        valid_boxes, valid_boxes_idx = self._validate_boxes(valid_ratio_range, im_size, gt_bbox, scale)
														
 
															+        valid_boxes, valid_boxes_idx = self._validate_boxes(
														
 
															+            valid_ratio_range, im_size, gt_bbox, scale)
														
 
															         # dict: chipid->[box_id, ...]
														
 
															-        pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes, valid_boxes_idx)
														
 
															+        pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes,
														
 
															+                                                  valid_boxes_idx)
														
 
															         return pos_chip2boxes_idx
														
 
															-    def _validate_boxes(self, valid_ratio_range: List[float],
														
 
															+    def _validate_boxes(self,
														
 
															+                        valid_ratio_range: List[float],
														
 
															                         im_size: int,
														
 
															                         gt_boxes: 'np.array of Nx4',
														
 
															                         scale: float):
														
@@ -299,20 +320,26 @@ class AnnoCropper(object):
 
															         target_mins = mins * scale
														
 
															         low = valid_ratio_range[0] if valid_ratio_range[0] > 0 else 0
														
 
															-        high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo(np.float).max
														
 
															+        high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo(
														
 
															+            np.float32).max
														
 
															-        valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) & (target_mins >= 2))[0]
														
 
															+        valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) & (
														
 
															+            target_mins >= 2))[0]
														
 
															         valid_boxes = gt_boxes[valid_boxes_idx]
														
 
															         return valid_boxes, valid_boxes_idx
														
 
															-    def _find_pos_chips(self, chips: 'Cx4', valid_boxes: 'Bx4', valid_boxes_idx: 'B'):
														
 
															+    def _find_pos_chips(self,
														
 
															+                        chips: 'Cx4',
														
 
															+                        valid_boxes: 'Bx4',
														
 
															+                        valid_boxes_idx: 'B'):
														
 
															         """
														
 
															         :return: pos_chip2boxes_idx, dict: chipid->[box_id, ...]
														
 
															         """
														
 
															         iob = intersection_over_box(chips, valid_boxes)  # overlap, CxB
														
 
															         iob_threshold_to_find_chips = 1.
														
 
															-        pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes(iob, iob_threshold_to_find_chips)
														
 
															+        pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes(
														
 
															+            iob, iob_threshold_to_find_chips)
														
 
															         pos_chip_ids = set(pos_chip_ids)
														
 
															         iob_threshold_to_assign_box = 0.5
														
@@ -323,7 +350,8 @@ class AnnoCropper(object):
 
															     def _find_chips_to_cover_overlaped_boxes(self, iob, overlap_threshold):
														
 
															         return find_chips_to_cover_overlaped_boxes(iob, overlap_threshold)
														
 
															-    def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids, valid_boxes_idx):
														
 
															+    def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids,
														
 
															+                                   valid_boxes_idx):
														
 
															         chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
														
 
															         pos_chip2boxes_idx = defaultdict(list)
														
 
															         for chip_id, box_id in zip(chip_ids, box_ids):
														
@@ -333,7 +361,10 @@ class AnnoCropper(object):
 
															             pos_chip2boxes_idx[chip_id].append(raw_gt_box_idx)
														
 
															         return pos_chip2boxes_idx
														
 
															-    def _get_neg_boxes_and_chips(self, chips: 'Cx4', pos_chip_ids: 'D', proposals: 'Px4'):
														
 
															+    def _get_neg_boxes_and_chips(self,
														
 
															+                                 chips: 'Cx4',
														
 
															+                                 pos_chip_ids: 'D',
														
 
															+                                 proposals: 'Px4'):
														
 
															         """
														
 
															         :param chips:
														
 
															         :param pos_chip_ids:
														
@@ -351,12 +382,16 @@ class AnnoCropper(object):
 
															         im_size = self._cur_im_size
														
 
															         scale = self._cur_scale
														
 
															-        valid_props, _ = self._validate_boxes(valid_ratio_range, im_size, proposals, scale)
														
 
															+        valid_props, _ = self._validate_boxes(valid_ratio_range, im_size,
														
 
															+                                              proposals, scale)
														
 
															         neg_boxes = self._find_neg_boxes(chips, pos_chip_ids, valid_props)
														
 
															         neg_chip2box_num = self._find_neg_chips(chips, pos_chip_ids, neg_boxes)
														
 
															         return neg_chip2box_num
														
 
															-    def _find_neg_boxes(self, chips: 'Cx4', pos_chip_ids: 'D', valid_props: 'Px4'):
														
 
															+    def _find_neg_boxes(self,
														
 
															+                        chips: 'Cx4',
														
 
															+                        pos_chip_ids: 'D',
														
 
															+                        valid_props: 'Px4'):
														
 
															         """
														
 
															         :return: neg_boxes: Nx4
														
 
															         """
														
@@ -370,7 +405,8 @@ class AnnoCropper(object):
 
															         neg_boxes = valid_props[non_overlap_props_idx]
														
 
															         return neg_boxes
														
 
															-    def _find_neg_chips(self, chips: 'Cx4', pos_chip_ids: 'D', neg_boxes: 'Nx4'):
														
 
															+    def _find_neg_chips(self, chips: 'Cx4', pos_chip_ids: 'D',
														
 
															+                        neg_boxes: 'Nx4'):
														
 
															         """
														
 
															         :return: neg_chip2box_num, dict: chipid->neg_box_num
														
 
															         """
														
@@ -469,31 +505,37 @@ class AnnoCropper(object):
 
															         for result in results:
														
 
															             bbox_locs = result['bbox']
														
 
															             bbox_nums = result['bbox_num']
														
 
															-            if len(bbox_locs) == 1 and bbox_locs[0][0] == -1:  # current batch has no detections
														
 
															+            if len(bbox_locs) == 1 and bbox_locs[0][
														
 
															+                    0] == -1:  # current batch has no detections
														
 
															                 # bbox_locs = array([[-1.]], dtype=float32); bbox_nums = [[1]]
														
 
															                 # MultiClassNMS output: If there is no detected boxes for all images, lod will be set to {1} and Out only contains one value which is -1.
														
 
															                 continue
														
 
															-            im_ids = result['im_id'] # replace with range(len(bbox_nums))
														
 
															+            im_ids = result['im_id']  # replace with range(len(bbox_nums))
														
 
															             last_bbox_num = 0
														
 
															             for idx, im_id in enumerate(im_ids):
														
 
															                 cur_bbox_len = bbox_nums[idx]
														
 
															-                bboxes = bbox_locs[last_bbox_num: last_bbox_num + cur_bbox_len]
														
 
															+                bboxes = bbox_locs[last_bbox_num:last_bbox_num + cur_bbox_len]
														
 
															                 last_bbox_num += cur_bbox_len
														
 
															                 # box: [num_id, score, xmin, ymin, xmax, ymax]
														
 
															                 if len(bboxes) == 0:  # current image has no detections
														
 
															                     continue
														
 
															-                chip_rec = records[int(im_id) - 1]  # im_id starts from 1, type is np.int64
														
 
															+                chip_rec = records[int(im_id) -
														
 
															+                                   1]  # im_id starts from 1, type is np.int64
														
 
															                 image_size = max(chip_rec["ori_im_h"], chip_rec["ori_im_w"])
														
 
															-                bboxes = transform_chip_boxes2image_boxes(bboxes, chip_rec["chip"], chip_rec["ori_im_h"], chip_rec["ori_im_w"])
														
 
															+                bboxes = transform_chip_boxes2image_boxes(
														
 
															+                    bboxes, chip_rec["chip"], chip_rec["ori_im_h"],
														
 
															+                    chip_rec["ori_im_w"])
														
 
															                 scale_i = chip_rec["scale_i"]
														
 
															-                cur_scale = self._get_current_scale(self.target_sizes[scale_i], image_size)
														
 
															-                _, valid_boxes_idx = self._validate_boxes(self.valid_box_ratio_ranges[scale_i], image_size,
														
 
															-                                                                    bboxes[:, 2:], cur_scale)
														
 
															+                cur_scale = self._get_current_scale(self.target_sizes[scale_i],
														
 
															+                                                    image_size)
														
 
															+                _, valid_boxes_idx = self._validate_boxes(
														
 
															+                    self.valid_box_ratio_ranges[scale_i], image_size,
														
 
															+                    bboxes[:, 2:], cur_scale)
														
 
															                 ori_img_id = self._global_chip_id2img_id[int(im_id)]
														
 
															                 img_id2bbox[ori_img_id].append(bboxes[valid_boxes_idx])
														
@@ -507,7 +549,8 @@ class AnnoCropper(object):
 
															         nms_thresh = self.nms_thresh
														
 
															         for img_id in img_id2bbox:
														
 
															-            box = img_id2bbox[img_id]  # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
														
 
															+            box = img_id2bbox[
														
 
															+                img_id]  # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
														
 
															             box = np.concatenate(box, axis=0)
														
 
															             nms_dets = nms(box, nms_thresh)
														
 
															             if max_per_img > 0:
														
@@ -525,18 +568,13 @@ class AnnoCropper(object):
 
															         results = []
														
 
															         for img_id in im_ids:  # output by original im_id order
														
 
															             if len(img_id2bbox[img_id]) == 0:
														
 
															-                bbox = np.array([[-1.,  0.,  0.,  0.,  0.,  0.]])  # edge case: no detections
														
 
															+                bbox = np.array(
														
 
															+                    [[-1., 0., 0., 0., 0., 0.]])  # edge case: no detections
														
 
															                 bbox_num = np.array([0])
														
 
															             else:
														
 
															                 # np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
														
 
															                 bbox = img_id2bbox[img_id]
														
 
															                 bbox_num = np.array([len(bbox)])
														
 
															-            res = dict(
														
 
															-                im_id=np.array([[img_id]]),
														
 
															-                bbox=bbox,
														
 
															-                bbox_num=bbox_num
														
 
															-            )
														
 
															+            res = dict(im_id=np.array([[img_id]]), bbox=bbox, bbox_num=bbox_num)
														
 
															             results.append(res)
														
 
															         return results
														
 
															-
														
 
															-
														
--- a/paddlers/models/ppdet/data/crop_utils/chip_box_utils.py
+++ b/paddlers/models/ppdet/data/crop_utils/chip_box_utils.py
@@ -33,8 +33,10 @@ def intersection_over_box(chips, boxes):
 
															     box_area = bbox_area(boxes)  # B
														
 
															-    inter_x2y2 = np.minimum(np.expand_dims(chips, 1)[:, :, 2:], boxes[:, 2:])  # CxBX2
														
 
															-    inter_x1y1 = np.maximum(np.expand_dims(chips, 1)[:, :, :2], boxes[:, :2])  # CxBx2
														
 
															+    inter_x2y2 = np.minimum(np.expand_dims(chips, 1)[:, :, 2:],
														
 
															+                            boxes[:, 2:])  # CxBX2
														
 
															+    inter_x1y1 = np.maximum(np.expand_dims(chips, 1)[:, :, :2],
														
 
															+                            boxes[:, :2])  # CxBx2
														
 
															     inter_wh = inter_x2y2 - inter_x1y1
														
 
															     inter_wh = np.clip(inter_wh, a_min=0, a_max=None)
														
 
															     inter_area = inter_wh[:, :, 0] * inter_wh[:, :, 1]  # CxB
														
@@ -81,8 +83,9 @@ def transform_chip_box(gt_bbox: 'Gx4', boxes_idx: 'B', chip: '4'):
 
															 def find_chips_to_cover_overlaped_boxes(iob, overlap_threshold):
														
 
															     chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
														
 
															     chip_id2overlap_box_num = np.bincount(chip_ids)  # 1d array
														
 
															-    chip_id2overlap_box_num = np.pad(chip_id2overlap_box_num, (0, len(iob) - len(chip_id2overlap_box_num)),
														
 
															-                                     constant_values=0)
														
 
															+    chip_id2overlap_box_num = np.pad(
														
 
															+        chip_id2overlap_box_num, (0, len(iob) - len(chip_id2overlap_box_num)),
														
 
															+        constant_values=0)
														
 
															     chosen_chip_ids = []
														
 
															     while len(box_ids) > 0:
														
@@ -92,7 +95,8 @@ def find_chips_to_cover_overlaped_boxes(iob, overlap_threshold):
 
															         chosen_chip_ids.append(max_count_chip_id)
														
 
															         box_ids_in_cur_chip = box_ids[chip_ids == max_count_chip_id]
														
 
															-        ids_not_in_cur_boxes_mask = np.logical_not(np.isin(box_ids, box_ids_in_cur_chip))
														
 
															+        ids_not_in_cur_boxes_mask = np.logical_not(
														
 
															+            np.isin(box_ids, box_ids_in_cur_chip))
														
 
															         chip_ids = chip_ids[ids_not_in_cur_boxes_mask]
														
 
															         box_ids = box_ids[ids_not_in_cur_boxes_mask]
														
 
															     return chosen_chip_ids, chip_id2overlap_box_num
														
@@ -124,7 +128,7 @@ def nms(dets, thresh):
 
															     order = scores.argsort()[::-1]
														
 
															     ndets = dets.shape[0]
														
 
															-    suppressed = np.zeros((ndets), dtype=np.int)
														
 
															+    suppressed = np.zeros((ndets), dtype=np.int32)
														
 
															     # nominal indices
														
 
															     # _i, _j
														
--- a/paddlers/models/ppdet/data/reader.py
+++ b/paddlers/models/ppdet/data/reader.py
@@ -12,6 +12,7 @@
 
															 # See the License for the specific language governing permissions and
														
 
															 # limitations under the License.
														
 
															+import copy
														
 
															 import os
														
 
															 import traceback
														
 
															 import six
														
@@ -21,6 +22,10 @@ if sys.version_info >= (3, 0):
 
															 else:
														
 
															     pass
														
 
															 import numpy as np
														
 
															+import paddle
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from copy import deepcopy
														
 
															 from paddle.io import DataLoader, DistributedBatchSampler
														
 
															 from .utils import default_collate_fn
														
@@ -300,3 +305,307 @@ class TestMOTReader(BaseDataLoader):
 
															         super(TestMOTReader, self).__init__(sample_transforms, batch_transforms,
														
 
															                                             batch_size, shuffle, drop_last,
														
 
															                                             num_classes, **kwargs)
														
 
															+
														
 
															+
														
 
															+# For Semi-Supervised Object Detection (SSOD)
														
 
															+class Compose_SSOD(object):
														
 
															+    def __init__(self, base_transforms, weak_aug, strong_aug, num_classes=80):
														
 
															+        self.base_transforms = base_transforms
														
 
															+        self.base_transforms_cls = []
														
 
															+        for t in self.base_transforms:
														
 
															+            for k, v in t.items():
														
 
															+                op_cls = getattr(transform, k)
														
 
															+                f = op_cls(**v)
														
 
															+                if hasattr(f, 'num_classes'):
														
 
															+                    f.num_classes = num_classes
														
 
															+                self.base_transforms_cls.append(f)
														
 
															+
														
 
															+        self.weak_augs = weak_aug
														
 
															+        self.weak_augs_cls = []
														
 
															+        for t in self.weak_augs:
														
 
															+            for k, v in t.items():
														
 
															+                op_cls = getattr(transform, k)
														
 
															+                f = op_cls(**v)
														
 
															+                if hasattr(f, 'num_classes'):
														
 
															+                    f.num_classes = num_classes
														
 
															+                self.weak_augs_cls.append(f)
														
 
															+
														
 
															+        self.strong_augs = strong_aug
														
 
															+        self.strong_augs_cls = []
														
 
															+        for t in self.strong_augs:
														
 
															+            for k, v in t.items():
														
 
															+                op_cls = getattr(transform, k)
														
 
															+                f = op_cls(**v)
														
 
															+                if hasattr(f, 'num_classes'):
														
 
															+                    f.num_classes = num_classes
														
 
															+                self.strong_augs_cls.append(f)
														
 
															+
														
 
															+    def __call__(self, data):
														
 
															+        for f in self.base_transforms_cls:
														
 
															+            try:
														
 
															+                data = f(data)
														
 
															+            except Exception as e:
														
 
															+                stack_info = traceback.format_exc()
														
 
															+                logger.warning("fail to map sample transform [{}] "
														
 
															+                               "with error: {} and stack:\n{}".format(
														
 
															+                                   f, e, str(stack_info)))
														
 
															+                raise e
														
 
															+
														
 
															+        weak_data = deepcopy(data)
														
 
															+        strong_data = deepcopy(data)
														
 
															+        for f in self.weak_augs_cls:
														
 
															+            try:
														
 
															+                weak_data = f(weak_data)
														
 
															+            except Exception as e:
														
 
															+                stack_info = traceback.format_exc()
														
 
															+                logger.warning("fail to map weak aug [{}] "
														
 
															+                               "with error: {} and stack:\n{}".format(
														
 
															+                                   f, e, str(stack_info)))
														
 
															+                raise e
														
 
															+
														
 
															+        for f in self.strong_augs_cls:
														
 
															+            try:
														
 
															+                strong_data = f(strong_data)
														
 
															+            except Exception as e:
														
 
															+                stack_info = traceback.format_exc()
														
 
															+                logger.warning("fail to map strong aug [{}] "
														
 
															+                               "with error: {} and stack:\n{}".format(
														
 
															+                                   f, e, str(stack_info)))
														
 
															+                raise e
														
 
															+
														
 
															+        weak_data['strong_aug'] = strong_data
														
 
															+        return weak_data
														
 
															+
														
 
															+
														
 
															+class BatchCompose_SSOD(Compose):
														
 
															+    def __init__(self, transforms, num_classes=80, collate_batch=True):
														
 
															+        super(BatchCompose_SSOD, self).__init__(transforms, num_classes)
														
 
															+        self.collate_batch = collate_batch
														
 
															+
														
 
															+    def __call__(self, data):
														
 
															+        # split strong_data from data(weak_data)
														
 
															+        strong_data = []
														
 
															+        for sample in data:
														
 
															+            strong_data.append(sample['strong_aug'])
														
 
															+            sample.pop('strong_aug')
														
 
															+
														
 
															+        for f in self.transforms_cls:
														
 
															+            try:
														
 
															+                data = f(data)
														
 
															+                strong_data = f(strong_data)
														
 
															+            except Exception as e:
														
 
															+                stack_info = traceback.format_exc()
														
 
															+                logger.warning("fail to map batch transform [{}] "
														
 
															+                               "with error: {} and stack:\n{}".format(
														
 
															+                                   f, e, str(stack_info)))
														
 
															+                raise e
														
 
															+
														
 
															+        # remove keys which is not needed by model
														
 
															+        extra_key = ['h', 'w', 'flipped']
														
 
															+        for k in extra_key:
														
 
															+            for sample in data:
														
 
															+                if k in sample:
														
 
															+                    sample.pop(k)
														
 
															+            for sample in strong_data:
														
 
															+                if k in sample:
														
 
															+                    sample.pop(k)
														
 
															+
														
 
															+        # batch data, if user-define batch function needed
														
 
															+        # use user-defined here
														
 
															+        if self.collate_batch:
														
 
															+            batch_data = default_collate_fn(data)
														
 
															+            strong_batch_data = default_collate_fn(strong_data)
														
 
															+            return batch_data, strong_batch_data
														
 
															+        else:
														
 
															+            batch_data = {}
														
 
															+            for k in data[0].keys():
														
 
															+                tmp_data = []
														
 
															+                for i in range(len(data)):
														
 
															+                    tmp_data.append(data[i][k])
														
 
															+                if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
														
 
															+                    tmp_data = np.stack(tmp_data, axis=0)
														
 
															+                batch_data[k] = tmp_data
														
 
															+
														
 
															+            strong_batch_data = {}
														
 
															+            for k in strong_data[0].keys():
														
 
															+                tmp_data = []
														
 
															+                for i in range(len(strong_data)):
														
 
															+                    tmp_data.append(strong_data[i][k])
														
 
															+                if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
														
 
															+                    tmp_data = np.stack(tmp_data, axis=0)
														
 
															+                strong_batch_data[k] = tmp_data
														
 
															+
														
 
															+        return batch_data, strong_batch_data
														
 
															+
														
 
															+
														
 
															+class CombineSSODLoader(object):
														
 
															+    def __init__(self, label_loader, unlabel_loader):
														
 
															+        self.label_loader = label_loader
														
 
															+        self.unlabel_loader = unlabel_loader
														
 
															+
														
 
															+    def __iter__(self):
														
 
															+        while True:
														
 
															+            try:
														
 
															+                label_samples = next(self.label_loader_iter)
														
 
															+            except:
														
 
															+                self.label_loader_iter = iter(self.label_loader)
														
 
															+                label_samples = next(self.label_loader_iter)
														
 
															+
														
 
															+            try:
														
 
															+                unlabel_samples = next(self.unlabel_loader_iter)
														
 
															+            except:
														
 
															+                self.unlabel_loader_iter = iter(self.unlabel_loader)
														
 
															+                unlabel_samples = next(self.unlabel_loader_iter)
														
 
															+
														
 
															+            yield (
														
 
															+                label_samples[0],  # sup weak
														
 
															+                label_samples[1],  # sup strong
														
 
															+                unlabel_samples[0],  # unsup weak
														
 
															+                unlabel_samples[1]  # unsup strong
														
 
															+            )
														
 
															+
														
 
															+    def __call__(self):
														
 
															+        return self.__iter__()
														
 
															+
														
 
															+
														
 
															+class BaseSemiDataLoader(object):
														
 
															+    def __init__(self,
														
 
															+                 sample_transforms=[],
														
 
															+                 weak_aug=[],
														
 
															+                 strong_aug=[],
														
 
															+                 sup_batch_transforms=[],
														
 
															+                 unsup_batch_transforms=[],
														
 
															+                 sup_batch_size=1,
														
 
															+                 unsup_batch_size=1,
														
 
															+                 shuffle=True,
														
 
															+                 drop_last=True,
														
 
															+                 num_classes=80,
														
 
															+                 collate_batch=True,
														
 
															+                 use_shared_memory=False,
														
 
															+                 **kwargs):
														
 
															+        # sup transforms
														
 
															+        self._sample_transforms_label = Compose_SSOD(
														
 
															+            sample_transforms, weak_aug, strong_aug, num_classes=num_classes)
														
 
															+        self._batch_transforms_label = BatchCompose_SSOD(
														
 
															+            sup_batch_transforms, num_classes, collate_batch)
														
 
															+        self.batch_size_label = sup_batch_size
														
 
															+
														
 
															+        # unsup transforms
														
 
															+        self._sample_transforms_unlabel = Compose_SSOD(
														
 
															+            sample_transforms, weak_aug, strong_aug, num_classes=num_classes)
														
 
															+        self._batch_transforms_unlabel = BatchCompose_SSOD(
														
 
															+            unsup_batch_transforms, num_classes, collate_batch)
														
 
															+        self.batch_size_unlabel = unsup_batch_size
														
 
															+
														
 
															+        # common
														
 
															+        self.shuffle = shuffle
														
 
															+        self.drop_last = drop_last
														
 
															+        self.use_shared_memory = use_shared_memory
														
 
															+        self.kwargs = kwargs
														
 
															+
														
 
															+    def __call__(self,
														
 
															+                 dataset_label,
														
 
															+                 dataset_unlabel,
														
 
															+                 worker_num,
														
 
															+                 batch_sampler_label=None,
														
 
															+                 batch_sampler_unlabel=None,
														
 
															+                 return_list=False):
														
 
															+        # sup dataset 
														
 
															+        self.dataset_label = dataset_label
														
 
															+        self.dataset_label.check_or_download_dataset()
														
 
															+        self.dataset_label.parse_dataset()
														
 
															+        self.dataset_label.set_transform(self._sample_transforms_label)
														
 
															+        self.dataset_label.set_kwargs(**self.kwargs)
														
 
															+        if batch_sampler_label is None:
														
 
															+            self._batch_sampler_label = DistributedBatchSampler(
														
 
															+                self.dataset_label,
														
 
															+                batch_size=self.batch_size_label,
														
 
															+                shuffle=self.shuffle,
														
 
															+                drop_last=self.drop_last)
														
 
															+        else:
														
 
															+            self._batch_sampler_label = batch_sampler_label
														
 
															+
														
 
															+        # unsup dataset
														
 
															+        self.dataset_unlabel = dataset_unlabel
														
 
															+        self.dataset_unlabel.length = self.dataset_label.__len__()
														
 
															+        self.dataset_unlabel.check_or_download_dataset()
														
 
															+        self.dataset_unlabel.parse_dataset()
														
 
															+        self.dataset_unlabel.set_transform(self._sample_transforms_unlabel)
														
 
															+        self.dataset_unlabel.set_kwargs(**self.kwargs)
														
 
															+        if batch_sampler_unlabel is None:
														
 
															+            self._batch_sampler_unlabel = DistributedBatchSampler(
														
 
															+                self.dataset_unlabel,
														
 
															+                batch_size=self.batch_size_unlabel,
														
 
															+                shuffle=self.shuffle,
														
 
															+                drop_last=self.drop_last)
														
 
															+        else:
														
 
															+            self._batch_sampler_unlabel = batch_sampler_unlabel
														
 
															+
														
 
															+        # DataLoader do not start sub-process in Windows and Mac
														
 
															+        # system, do not need to use shared memory
														
 
															+        use_shared_memory = self.use_shared_memory and \
														
 
															+                            sys.platform not in ['win32', 'darwin']
														
 
															+        # check whether shared memory size is bigger than 1G(1024M)
														
 
															+        if use_shared_memory:
														
 
															+            shm_size = _get_shared_memory_size_in_M()
														
 
															+            if shm_size is not None and shm_size < 1024.:
														
 
															+                logger.warning("Shared memory size is less than 1G, "
														
 
															+                               "disable shared_memory in DataLoader")
														
 
															+                use_shared_memory = False
														
 
															+
														
 
															+        self.dataloader_label = DataLoader(
														
 
															+            dataset=self.dataset_label,
														
 
															+            batch_sampler=self._batch_sampler_label,
														
 
															+            collate_fn=self._batch_transforms_label,
														
 
															+            num_workers=worker_num,
														
 
															+            return_list=return_list,
														
 
															+            use_shared_memory=use_shared_memory)
														
 
															+
														
 
															+        self.dataloader_unlabel = DataLoader(
														
 
															+            dataset=self.dataset_unlabel,
														
 
															+            batch_sampler=self._batch_sampler_unlabel,
														
 
															+            collate_fn=self._batch_transforms_unlabel,
														
 
															+            num_workers=worker_num,
														
 
															+            return_list=return_list,
														
 
															+            use_shared_memory=use_shared_memory)
														
 
															+
														
 
															+        self.dataloader = CombineSSODLoader(self.dataloader_label,
														
 
															+                                            self.dataloader_unlabel)
														
 
															+        self.loader = iter(self.dataloader)
														
 
															+        return self
														
 
															+
														
 
															+    def __len__(self):
														
 
															+        return len(self._batch_sampler_label)
														
 
															+
														
 
															+    def __iter__(self):
														
 
															+        return self
														
 
															+
														
 
															+    def __next__(self):
														
 
															+        return next(self.loader)
														
 
															+
														
 
															+    def next(self):
														
 
															+        # python2 compatibility
														
 
															+        return self.__next__()
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class SemiTrainReader(BaseSemiDataLoader):
														
 
															+    __shared__ = ['num_classes']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 sample_transforms=[],
														
 
															+                 weak_aug=[],
														
 
															+                 strong_aug=[],
														
 
															+                 sup_batch_transforms=[],
														
 
															+                 unsup_batch_transforms=[],
														
 
															+                 sup_batch_size=1,
														
 
															+                 unsup_batch_size=1,
														
 
															+                 shuffle=True,
														
 
															+                 drop_last=True,
														
 
															+                 num_classes=80,
														
 
															+                 collate_batch=True,
														
 
															+                 **kwargs):
														
 
															+        super(SemiTrainReader, self).__init__(
														
 
															+            sample_transforms, weak_aug, strong_aug, sup_batch_transforms,
														
 
															+            unsup_batch_transforms, sup_batch_size, unsup_batch_size, shuffle,
														
 
															+            drop_last, num_classes, collate_batch, **kwargs)
														
--- a/paddlers/models/ppdet/data/source/__init__.py
+++ b/paddlers/models/ppdet/data/source/__init__.py
@@ -28,3 +28,4 @@ from .keypoint_coco import *
 
															 from .mot import *
														
 
															 from .sniper_coco import SniperCOCODataSet
														
 
															 from .dataset import ImageFolder
														
 
															+from .pose3d_cmb import *
														
--- a/paddlers/models/ppdet/data/source/category.py
+++ b/paddlers/models/ppdet/data/source/category.py
@@ -118,6 +118,9 @@ def get_categories(metric_type, anno_file=None, arch=None):
 
															     ) == 'keypointtopdownmpiieval':
														
 
															         return (None, {'id': 'keypoint'})
														
 
															+    elif metric_type.lower() == 'pose3deval':
														
 
															+        return (None, {'id': 'pose3d'})
														
 
															+
														
 
															     elif metric_type.lower() in ['mot', 'motdet', 'reid']:
														
 
															         if anno_file and os.path.isfile(anno_file):
														
 
															             cats = []
														
--- a/paddlers/models/ppdet/data/source/coco.py
+++ b/paddlers/models/ppdet/data/source/coco.py
@@ -13,6 +13,11 @@
 
															 # limitations under the License.
														
 
															 import os
														
 
															+import copy
														
 
															+try:
														
 
															+    from collections.abc import Sequence
														
 
															+except Exception:
														
 
															+    from collections import Sequence
														
 
															 import numpy as np
														
 
															 from paddlers.models.ppdet.core.workspace import register, serializable
														
 
															 from .dataset import DetDataset
														
@@ -20,6 +25,8 @@ from .dataset import DetDataset
 
															 from paddlers.models.ppdet.utils.logger import setup_logger
														
 
															 logger = setup_logger(__name__)
														
 
															+__all__ = ['COCODataSet', 'SlicedCOCODataSet', 'SemiCOCODataSet']
														
 
															+
														
 
															 @register
														
 
															 @serializable
														
@@ -170,8 +177,10 @@ class COCODataSet(DetDataset):
 
															                 gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
														
 
															                 is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
														
 
															                 gt_poly = [None] * num_bbox
														
 
															+                gt_track_id = -np.ones((num_bbox, 1), dtype=np.int32)
														
 
															                 has_segmentation = False
														
 
															+                has_track_id = False
														
 
															                 for i, box in enumerate(bboxes):
														
 
															                     catid = box['category_id']
														
 
															                     gt_class[i][0] = self.catid2clsid[catid]
														
@@ -181,8 +190,9 @@ class COCODataSet(DetDataset):
 
															                     if 'segmentation' in box and box['iscrowd'] == 1:
														
 
															                         gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
														
 
															                     elif 'segmentation' in box and box['segmentation']:
														
 
															-                        if not np.array(box['segmentation']
														
 
															-                                        ).size > 0 and not self.allow_empty:
														
 
															+                        if not np.array(
														
 
															+                                box['segmentation'],
														
 
															+                                dtype=object).size > 0 and not self.allow_empty:
														
 
															                             bboxes.pop(i)
														
 
															                             gt_poly.pop(i)
														
 
															                             np.delete(is_crowd, i)
														
@@ -192,6 +202,10 @@ class COCODataSet(DetDataset):
 
															                             gt_poly[i] = box['segmentation']
														
 
															                         has_segmentation = True
														
 
															+                    if 'track_id' in box:
														
 
															+                        gt_track_id[i][0] = box['track_id']
														
 
															+                        has_track_id = True
														
 
															+
														
 
															                 if has_segmentation and not any(
														
 
															                         gt_poly) and not self.allow_empty:
														
 
															                     continue
														
@@ -202,6 +216,8 @@ class COCODataSet(DetDataset):
 
															                     'gt_bbox': gt_bbox,
														
 
															                     'gt_poly': gt_poly,
														
 
															                 }
														
 
															+                if has_track_id:
														
 
															+                    gt_rec.update({'gt_track_id': gt_track_id})
														
 
															                 for k, v in gt_rec.items():
														
 
															                     if k in self.data_fields:
														
@@ -223,7 +239,8 @@ class COCODataSet(DetDataset):
 
															             if self.sample_num > 0 and ct >= self.sample_num:
														
 
															                 break
														
 
															         assert ct > 0, 'not found any coco record in %s' % (anno_path)
														
 
															-        logger.debug('{} samples in file {}'.format(ct, anno_path))
														
 
															+        logger.info('Load [{} samples valid, {} samples invalid] in file {}.'.
														
 
															+                    format(ct, len(img_ids) - ct, anno_path))
														
 
															         if self.allow_empty and len(empty_records) > 0:
														
 
															             empty_records = self._sample_empty(empty_records, len(records))
														
 
															             records += empty_records
														
@@ -351,3 +368,220 @@ class SlicedCOCODataSet(COCODataSet):
 
															             empty_records = self._sample_empty(empty_records, len(records))
														
 
															             records += empty_records
														
 
															         self.roidbs = records
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+@serializable
														
 
															+class SemiCOCODataSet(COCODataSet):
														
 
															+    """Semi-COCODataSet used for supervised and unsupervised dataSet"""
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 dataset_dir=None,
														
 
															+                 image_dir=None,
														
 
															+                 anno_path=None,
														
 
															+                 data_fields=['image'],
														
 
															+                 sample_num=-1,
														
 
															+                 load_crowd=False,
														
 
															+                 allow_empty=False,
														
 
															+                 empty_ratio=1.,
														
 
															+                 repeat=1,
														
 
															+                 supervised=True):
														
 
															+        super(SemiCOCODataSet, self).__init__(
														
 
															+            dataset_dir, image_dir, anno_path, data_fields, sample_num,
														
 
															+            load_crowd, allow_empty, empty_ratio, repeat)
														
 
															+        self.supervised = supervised
														
 
															+        self.length = -1  # defalut -1 means all
														
 
															+
														
 
															+    def parse_dataset(self):
														
 
															+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
														
 
															+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
														
 
															+
														
 
															+        assert anno_path.endswith('.json'), \
														
 
															+            'invalid coco annotation file: ' + anno_path
														
 
															+        from pycocotools.coco import COCO
														
 
															+        coco = COCO(anno_path)
														
 
															+        img_ids = coco.getImgIds()
														
 
															+        img_ids.sort()
														
 
															+        cat_ids = coco.getCatIds()
														
 
															+        records = []
														
 
															+        empty_records = []
														
 
															+        ct = 0
														
 
															+
														
 
															+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
														
 
															+        self.cname2cid = dict({
														
 
															+            coco.loadCats(catid)[0]['name']: clsid
														
 
															+            for catid, clsid in self.catid2clsid.items()
														
 
															+        })
														
 
															+
														
 
															+        if 'annotations' not in coco.dataset or self.supervised == False:
														
 
															+            self.load_image_only = True
														
 
															+            logger.warning('Annotation file: {} does not contains ground truth '
														
 
															+                           'and load image information only.'.format(anno_path))
														
 
															+
														
 
															+        for img_id in img_ids:
														
 
															+            img_anno = coco.loadImgs([img_id])[0]
														
 
															+            im_fname = img_anno['file_name']
														
 
															+            im_w = float(img_anno['width'])
														
 
															+            im_h = float(img_anno['height'])
														
 
															+
														
 
															+            im_path = os.path.join(image_dir,
														
 
															+                                   im_fname) if image_dir else im_fname
														
 
															+            is_empty = False
														
 
															+            if not os.path.exists(im_path):
														
 
															+                logger.warning('Illegal image file: {}, and it will be '
														
 
															+                               'ignored'.format(im_path))
														
 
															+                continue
														
 
															+
														
 
															+            if im_w < 0 or im_h < 0:
														
 
															+                logger.warning('Illegal width: {} or height: {} in annotation, '
														
 
															+                               'and im_id: {} will be ignored'.format(
														
 
															+                                   im_w, im_h, img_id))
														
 
															+                continue
														
 
															+
														
 
															+            coco_rec = {
														
 
															+                'im_file': im_path,
														
 
															+                'im_id': np.array([img_id]),
														
 
															+                'h': im_h,
														
 
															+                'w': im_w,
														
 
															+            } if 'image' in self.data_fields else {}
														
 
															+
														
 
															+            if not self.load_image_only:
														
 
															+                ins_anno_ids = coco.getAnnIds(
														
 
															+                    imgIds=[img_id], iscrowd=None if self.load_crowd else False)
														
 
															+                instances = coco.loadAnns(ins_anno_ids)
														
 
															+
														
 
															+                bboxes = []
														
 
															+                is_rbox_anno = False
														
 
															+                for inst in instances:
														
 
															+                    # check gt bbox
														
 
															+                    if inst.get('ignore', False):
														
 
															+                        continue
														
 
															+                    if 'bbox' not in inst.keys():
														
 
															+                        continue
														
 
															+                    else:
														
 
															+                        if not any(np.array(inst['bbox'])):
														
 
															+                            continue
														
 
															+
														
 
															+                    x1, y1, box_w, box_h = inst['bbox']
														
 
															+                    x2 = x1 + box_w
														
 
															+                    y2 = y1 + box_h
														
 
															+                    eps = 1e-5
														
 
															+                    if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
														
 
															+                        inst['clean_bbox'] = [
														
 
															+                            round(float(x), 3) for x in [x1, y1, x2, y2]
														
 
															+                        ]
														
 
															+                        bboxes.append(inst)
														
 
															+                    else:
														
 
															+                        logger.warning(
														
 
															+                            'Found an invalid bbox in annotations: im_id: {}, '
														
 
															+                            'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
														
 
															+                                img_id, float(inst['area']), x1, y1, x2, y2))
														
 
															+
														
 
															+                num_bbox = len(bboxes)
														
 
															+                if num_bbox <= 0 and not self.allow_empty:
														
 
															+                    continue
														
 
															+                elif num_bbox <= 0:
														
 
															+                    is_empty = True
														
 
															+
														
 
															+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
														
 
															+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
														
 
															+                is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
														
 
															+                gt_poly = [None] * num_bbox
														
 
															+
														
 
															+                has_segmentation = False
														
 
															+                for i, box in enumerate(bboxes):
														
 
															+                    catid = box['category_id']
														
 
															+                    gt_class[i][0] = self.catid2clsid[catid]
														
 
															+                    gt_bbox[i, :] = box['clean_bbox']
														
 
															+                    is_crowd[i][0] = box['iscrowd']
														
 
															+                    # check RLE format 
														
 
															+                    if 'segmentation' in box and box['iscrowd'] == 1:
														
 
															+                        gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
														
 
															+                    elif 'segmentation' in box and box['segmentation']:
														
 
															+                        if not np.array(box['segmentation']
														
 
															+                                        ).size > 0 and not self.allow_empty:
														
 
															+                            bboxes.pop(i)
														
 
															+                            gt_poly.pop(i)
														
 
															+                            np.delete(is_crowd, i)
														
 
															+                            np.delete(gt_class, i)
														
 
															+                            np.delete(gt_bbox, i)
														
 
															+                        else:
														
 
															+                            gt_poly[i] = box['segmentation']
														
 
															+                        has_segmentation = True
														
 
															+
														
 
															+                if has_segmentation and not any(
														
 
															+                        gt_poly) and not self.allow_empty:
														
 
															+                    continue
														
 
															+
														
 
															+                gt_rec = {
														
 
															+                    'is_crowd': is_crowd,
														
 
															+                    'gt_class': gt_class,
														
 
															+                    'gt_bbox': gt_bbox,
														
 
															+                    'gt_poly': gt_poly,
														
 
															+                }
														
 
															+
														
 
															+                for k, v in gt_rec.items():
														
 
															+                    if k in self.data_fields:
														
 
															+                        coco_rec[k] = v
														
 
															+
														
 
															+                # TODO: remove load_semantic
														
 
															+                if self.load_semantic and 'semantic' in self.data_fields:
														
 
															+                    seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
														
 
															+                                            'train2017', im_fname[:-3] + 'png')
														
 
															+                    coco_rec.update({'semantic': seg_path})
														
 
															+
														
 
															+            logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
														
 
															+                im_path, img_id, im_h, im_w))
														
 
															+            if is_empty:
														
 
															+                empty_records.append(coco_rec)
														
 
															+            else:
														
 
															+                records.append(coco_rec)
														
 
															+            ct += 1
														
 
															+            if self.sample_num > 0 and ct >= self.sample_num:
														
 
															+                break
														
 
															+        assert ct > 0, 'not found any coco record in %s' % (anno_path)
														
 
															+        logger.info('Load [{} samples valid, {} samples invalid] in file {}.'.
														
 
															+                    format(ct, len(img_ids) - ct, anno_path))
														
 
															+        if self.allow_empty and len(empty_records) > 0:
														
 
															+            empty_records = self._sample_empty(empty_records, len(records))
														
 
															+            records += empty_records
														
 
															+        self.roidbs = records
														
 
															+
														
 
															+        if self.supervised:
														
 
															+            logger.info(f'Use {len(self.roidbs)} sup_samples data as LABELED')
														
 
															+        else:
														
 
															+            if self.length > 0:  # unsup length will be decide by sup length
														
 
															+                all_roidbs = self.roidbs.copy()
														
 
															+                selected_idxs = [
														
 
															+                    np.random.choice(len(all_roidbs))
														
 
															+                    for _ in range(self.length)
														
 
															+                ]
														
 
															+                self.roidbs = [all_roidbs[i] for i in selected_idxs]
														
 
															+            logger.info(
														
 
															+                f'Use {len(self.roidbs)} unsup_samples data as UNLABELED')
														
 
															+
														
 
															+    def __getitem__(self, idx):
														
 
															+        n = len(self.roidbs)
														
 
															+        if self.repeat > 1:
														
 
															+            idx %= n
														
 
															+        # data batch
														
 
															+        roidb = copy.deepcopy(self.roidbs[idx])
														
 
															+        if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
														
 
															+            idx = np.random.randint(n)
														
 
															+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
														
 
															+        elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
														
 
															+            idx = np.random.randint(n)
														
 
															+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
														
 
															+        elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
														
 
															+            roidb = [roidb, ] + [
														
 
															+                copy.deepcopy(self.roidbs[np.random.randint(n)])
														
 
															+                for _ in range(4)
														
 
															+            ]
														
 
															+        if isinstance(roidb, Sequence):
														
 
															+            for r in roidb:
														
 
															+                r['curr_iter'] = self._curr_iter
														
 
															+        else:
														
 
															+            roidb['curr_iter'] = self._curr_iter
														
 
															+        self._curr_iter += 1
														
 
															+
														
 
															+        return self.transform(roidb)
														
--- a/paddlers/models/ppdet/data/source/dataset.py
+++ b/paddlers/models/ppdet/data/source/dataset.py
@@ -86,6 +86,12 @@ class DetDataset(Dataset):
 
															                 copy.deepcopy(self.roidbs[np.random.randint(n)])
														
 
															                 for _ in range(4)
														
 
															             ]
														
 
															+        elif self.pre_img_epoch == 0 or self._epoch < self.pre_img_epoch:
														
 
															+            # Add previous image as input, only used in CenterTrack
														
 
															+            idx_pre_img = idx - 1
														
 
															+            if idx_pre_img < 0:
														
 
															+                idx_pre_img = idx + 1
														
 
															+            roidb = [roidb, ] + [copy.deepcopy(self.roidbs[idx_pre_img])]
														
 
															         if isinstance(roidb, Sequence):
														
 
															             for r in roidb:
														
 
															                 r['curr_iter'] = self._curr_iter
														
@@ -103,6 +109,7 @@ class DetDataset(Dataset):
 
															         self.mixup_epoch = kwargs.get('mixup_epoch', -1)
														
 
															         self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
														
 
															         self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
														
 
															+        self.pre_img_epoch = kwargs.get('pre_img_epoch', -1)
														
 
															     def set_transform(self, transform):
														
 
															         self.transform = transform
														
@@ -254,7 +261,8 @@ class ImageFolder(DetDataset):
 
															                 records.append(rec)
														
 
															             ct_sub += sub_img_num
														
 
															             ct += 1
														
 
															-        print('{} samples and slice to {} sub_samples'.format(ct, ct_sub))
														
 
															+        logger.info('{} samples and slice to {} sub_samples.'.format(ct,
														
 
															+                                                                     ct_sub))
														
 
															         self.roidbs = records
														
 
															     def get_label_list(self):
														
--- a/paddlers/models/ppdet/data/source/keypoint_coco.py
+++ b/paddlers/models/ppdet/data/source/keypoint_coco.py
@@ -80,7 +80,8 @@ class KeypointBottomUpBaseDataset(DetDataset):
 
															         records = copy.deepcopy(self._get_imganno(idx))
														
 
															         records['image'] = cv2.imread(records['image_file'])
														
 
															         records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
														
 
															-        records['mask'] = (records['mask'] + 0).astype('uint8')
														
 
															+        if 'mask' in records:
														
 
															+            records['mask'] = (records['mask'] + 0).astype('uint8')
														
 
															         records = self.transform(records)
														
 
															         return records
														
@@ -135,24 +136,37 @@ class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
 
															                  num_joints,
														
 
															                  transform=[],
														
 
															                  shard=[0, 1],
														
 
															-                 test_mode=False):
														
 
															+                 test_mode=False,
														
 
															+                 return_mask=True,
														
 
															+                 return_bbox=True,
														
 
															+                 return_area=True,
														
 
															+                 return_class=True):
														
 
															         super().__init__(dataset_dir, image_dir, anno_path, num_joints,
														
 
															                          transform, shard, test_mode)
														
 
															         self.ann_file = os.path.join(dataset_dir, anno_path)
														
 
															         self.shard = shard
														
 
															         self.test_mode = test_mode
														
 
															+        self.return_mask = return_mask
														
 
															+        self.return_bbox = return_bbox
														
 
															+        self.return_area = return_area
														
 
															+        self.return_class = return_class
														
 
															     def parse_dataset(self):
														
 
															         self.coco = COCO(self.ann_file)
														
 
															         self.img_ids = self.coco.getImgIds()
														
 
															         if not self.test_mode:
														
 
															-            self.img_ids = [
														
 
															-                img_id for img_id in self.img_ids
														
 
															-                if len(self.coco.getAnnIds(
														
 
															-                    imgIds=img_id, iscrowd=None)) > 0
														
 
															-            ]
														
 
															+            self.img_ids_tmp = []
														
 
															+            for img_id in self.img_ids:
														
 
															+                ann_ids = self.coco.getAnnIds(imgIds=img_id)
														
 
															+                anno = self.coco.loadAnns(ann_ids)
														
 
															+                anno = [obj for obj in anno if obj['iscrowd'] == 0]
														
 
															+                if len(anno) == 0:
														
 
															+                    continue
														
 
															+                self.img_ids_tmp.append(img_id)
														
 
															+            self.img_ids = self.img_ids_tmp
														
 
															+
														
 
															         blocknum = int(len(self.img_ids) / self.shard[1])
														
 
															         self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
														
 
															             self.shard[0] + 1))]
														
@@ -199,21 +213,31 @@ class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
 
															         ann_ids = coco.getAnnIds(imgIds=img_id)
														
 
															         anno = coco.loadAnns(ann_ids)
														
 
															-        mask = self._get_mask(anno, idx)
														
 
															         anno = [
														
 
															             obj for obj in anno
														
 
															-            if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
														
 
															+            if obj['iscrowd'] == 0 and obj['num_keypoints'] > 0
														
 
															         ]
														
 
															+        db_rec = {}
														
 
															         joints, orgsize = self._get_joints(anno, idx)
														
 
															+        db_rec['gt_joints'] = joints
														
 
															+        db_rec['im_shape'] = orgsize
														
 
															+
														
 
															+        if self.return_bbox:
														
 
															+            db_rec['gt_bbox'] = self._get_bboxs(anno, idx)
														
 
															+
														
 
															+        if self.return_class:
														
 
															+            db_rec['gt_class'] = self._get_labels(anno, idx)
														
 
															+
														
 
															+        if self.return_area:
														
 
															+            db_rec['gt_areas'] = self._get_areas(anno, idx)
														
 
															+
														
 
															+        if self.return_mask:
														
 
															+            db_rec['mask'] = self._get_mask(anno, idx)
														
 
															-        db_rec = {}
														
 
															         db_rec['im_id'] = img_id
														
 
															         db_rec['image_file'] = os.path.join(self.img_prefix,
														
 
															                                             self.id2name[img_id])
														
 
															-        db_rec['mask'] = mask
														
 
															-        db_rec['joints'] = joints
														
 
															-        db_rec['im_shape'] = orgsize
														
 
															         return db_rec
														
@@ -229,12 +253,41 @@ class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
 
															                 np.array(obj['keypoints']).reshape([-1, 3])
														
 
															         img_info = self.coco.loadImgs(self.img_ids[idx])[0]
														
 
															-        joints[..., 0] /= img_info['width']
														
 
															-        joints[..., 1] /= img_info['height']
														
 
															-        orgsize = np.array([img_info['height'], img_info['width']])
														
 
															+        orgsize = np.array([img_info['height'], img_info['width'], 1])
														
 
															         return joints, orgsize
														
 
															+    def _get_bboxs(self, anno, idx):
														
 
															+        num_people = len(anno)
														
 
															+        gt_bboxes = np.zeros((num_people, 4), dtype=np.float32)
														
 
															+
														
 
															+        for idx, obj in enumerate(anno):
														
 
															+            if 'bbox' in obj:
														
 
															+                gt_bboxes[idx, :] = obj['bbox']
														
 
															+
														
 
															+        gt_bboxes[:, 2] += gt_bboxes[:, 0]
														
 
															+        gt_bboxes[:, 3] += gt_bboxes[:, 1]
														
 
															+        return gt_bboxes
														
 
															+
														
 
															+    def _get_labels(self, anno, idx):
														
 
															+        num_people = len(anno)
														
 
															+        gt_labels = np.zeros((num_people, 1), dtype=np.float32)
														
 
															+
														
 
															+        for idx, obj in enumerate(anno):
														
 
															+            if 'category_id' in obj:
														
 
															+                catid = obj['category_id']
														
 
															+                gt_labels[idx, 0] = self.catid2clsid[catid]
														
 
															+        return gt_labels
														
 
															+
														
 
															+    def _get_areas(self, anno, idx):
														
 
															+        num_people = len(anno)
														
 
															+        gt_areas = np.zeros((num_people, ), dtype=np.float32)
														
 
															+
														
 
															+        for idx, obj in enumerate(anno):
														
 
															+            if 'area' in obj:
														
 
															+                gt_areas[idx, ] = obj['area']
														
 
															+        return gt_areas
														
 
															+
														
 
															     def _get_mask(self, anno, idx):
														
 
															         """Get ignore masks to mask out losses."""
														
 
															         coco = self.coco
														
@@ -487,9 +540,9 @@ class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
 
															                     continue
														
 
															                 joints = np.zeros(
														
 
															-                    (self.ann_info['num_joints'], 3), dtype=np.float)
														
 
															+                    (self.ann_info['num_joints'], 3), dtype=np.float32)
														
 
															                 joints_vis = np.zeros(
														
 
															-                    (self.ann_info['num_joints'], 3), dtype=np.float)
														
 
															+                    (self.ann_info['num_joints'], 3), dtype=np.float32)
														
 
															                 for ipt in range(self.ann_info['num_joints']):
														
 
															                     joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
														
 
															                     joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
														
@@ -506,7 +559,7 @@ class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
 
															                     'image_file': os.path.join(self.img_prefix, file_name),
														
 
															                     'center': center,
														
 
															                     'scale': scale,
														
 
															-                    'joints': joints,
														
 
															+                    'gt_joints': joints,
														
 
															                     'joints_vis': joints_vis,
														
 
															                     'im_id': im_id,
														
 
															                 })
														
@@ -560,16 +613,17 @@ class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
 
															                 continue
														
 
															             center, scale = self._box2cs(box)
														
 
															-            joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
														
 
															+            joints = np.zeros(
														
 
															+                (self.ann_info['num_joints'], 3), dtype=np.float32)
														
 
															             joints_vis = np.ones(
														
 
															-                (self.ann_info['num_joints'], 3), dtype=np.float)
														
 
															+                (self.ann_info['num_joints'], 3), dtype=np.float32)
														
 
															             kpt_db.append({
														
 
															                 'image_file': img_name,
														
 
															                 'im_id': im_id,
														
 
															                 'center': center,
														
 
															                 'scale': scale,
														
 
															                 'score': score,
														
 
															-                'joints': joints,
														
 
															+                'gt_joints': joints,
														
 
															                 'joints_vis': joints_vis,
														
 
															             })
														
@@ -633,8 +687,8 @@ class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
 
															             im_id = a['image_id'] if 'image_id' in a else int(
														
 
															                 os.path.splitext(image_name)[0])
														
 
															-            c = np.array(a['center'], dtype=np.float)
														
 
															-            s = np.array([a['scale'], a['scale']], dtype=np.float)
														
 
															+            c = np.array(a['center'], dtype=np.float32)
														
 
															+            s = np.array([a['scale'], a['scale']], dtype=np.float32)
														
 
															             # Adjust center/scale slightly to avoid cropping limbs
														
 
															             if c[0] != -1:
														
@@ -642,11 +696,12 @@ class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
 
															                 s = s * 1.25
														
 
															             c = c - 1
														
 
															-            joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
														
 
															+            joints = np.zeros(
														
 
															+                (self.ann_info['num_joints'], 3), dtype=np.float32)
														
 
															             joints_vis = np.zeros(
														
 
															-                (self.ann_info['num_joints'], 3), dtype=np.float)
														
 
															-            if 'joints' in a:
														
 
															-                joints_ = np.array(a['joints'])
														
 
															+                (self.ann_info['num_joints'], 3), dtype=np.float32)
														
 
															+            if 'gt_joints' in a:
														
 
															+                joints_ = np.array(a['gt_joints'])
														
 
															                 joints_[:, 0:2] = joints_[:, 0:2] - 1
														
 
															                 joints_vis_ = np.array(a['joints_vis'])
														
 
															                 assert len(joints_) == self.ann_info[
														
@@ -662,7 +717,7 @@ class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
 
															                 'im_id': im_id,
														
 
															                 'center': c,
														
 
															                 'scale': s,
														
 
															-                'joints': joints,
														
 
															+                'gt_joints': joints,
														
 
															                 'joints_vis': joints_vis
														
 
															             })
														
 
															         print("number length: {}".format(len(gt_db)))
														
--- a/paddlers/models/ppdet/data/source/pose3d_cmb.py
+++ b/paddlers/models/ppdet/data/source/pose3d_cmb.py
@@ -0,0 +1,380 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
														
 
															+#   
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");   
														
 
															+# you may not use this file except in compliance with the License.  
														
 
															+# You may obtain a copy of the License at   
														
 
															+#   
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0    
														
 
															+#   
														
 
															+# Unless required by applicable law or agreed to in writing, software   
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS, 
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
														
 
															+# See the License for the specific language governing permissions and   
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import os
														
 
															+import cv2
														
 
															+import numpy as np
														
 
															+import json
														
 
															+import copy
														
 
															+import pycocotools
														
 
															+from pycocotools.coco import COCO
														
 
															+from .dataset import DetDataset
														
 
															+from paddlers.models.ppdet.core.workspace import register, serializable
														
 
															+from paddle.io import Dataset
														
 
															+
														
 
															+
														
 
															+@serializable
														
 
															+class Pose3DDataset(DetDataset):
														
 
															+    """Pose3D Dataset class. 
														
 
															+
														
 
															+    Args:
														
 
															+        dataset_dir (str): Root path to the dataset.
														
 
															+        anno_list (list of str): each of the element is a relative path to the annotation file.
														
 
															+        image_dirs (list of str): each of path is a relative path where images are held.
														
 
															+        transform (composed(operators)): A sequence of data transforms.
														
 
															+        test_mode (bool): Store True when building test or
														
 
															+            validation dataset. Default: False.
														
 
															+        24 joints order:
														
 
															+        0-2: 'R_Ankle', 'R_Knee', 'R_Hip', 
														
 
															+        3-5:'L_Hip', 'L_Knee', 'L_Ankle', 
														
 
															+        6-8:'R_Wrist', 'R_Elbow', 'R_Shoulder', 
														
 
															+        9-11:'L_Shoulder','L_Elbow','L_Wrist',
														
 
															+        12-14:'Neck','Top_of_Head','Pelvis',
														
 
															+        15-18:'Thorax','Spine','Jaw','Head',
														
 
															+        19-23:'Nose','L_Eye','R_Eye','L_Ear','R_Ear'
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 dataset_dir,
														
 
															+                 image_dirs,
														
 
															+                 anno_list,
														
 
															+                 transform=[],
														
 
															+                 num_joints=24,
														
 
															+                 test_mode=False):
														
 
															+        super().__init__(dataset_dir, image_dirs, anno_list)
														
 
															+        self.image_info = {}
														
 
															+        self.ann_info = {}
														
 
															+        self.num_joints = num_joints
														
 
															+
														
 
															+        self.transform = transform
														
 
															+        self.test_mode = test_mode
														
 
															+
														
 
															+        self.img_ids = []
														
 
															+        self.dataset_dir = dataset_dir
														
 
															+        self.image_dirs = image_dirs
														
 
															+        self.anno_list = anno_list
														
 
															+
														
 
															+    def get_mask(self, mvm_percent=0.3):
														
 
															+        num_joints = self.num_joints
														
 
															+        mjm_mask = np.ones((num_joints, 1)).astype(np.float32)
														
 
															+        if self.test_mode == False:
														
 
															+            pb = np.random.random_sample()
														
 
															+            masked_num = int(
														
 
															+                pb * mvm_percent *
														
 
															+                num_joints)  # at most x% of the joints could be masked
														
 
															+            indices = np.random.choice(
														
 
															+                np.arange(num_joints), replace=False, size=masked_num)
														
 
															+            mjm_mask[indices, :] = 0.0
														
 
															+        # return mjm_mask
														
 
															+
														
 
															+        num_joints = 10
														
 
															+        mvm_mask = np.ones((num_joints, 1)).astype(np.float)
														
 
															+        if self.test_mode == False:
														
 
															+            num_vertices = num_joints
														
 
															+            pb = np.random.random_sample()
														
 
															+            masked_num = int(
														
 
															+                pb * mvm_percent *
														
 
															+                num_vertices)  # at most x% of the vertices could be masked
														
 
															+            indices = np.random.choice(
														
 
															+                np.arange(num_vertices), replace=False, size=masked_num)
														
 
															+            mvm_mask[indices, :] = 0.0
														
 
															+
														
 
															+        mjm_mask = np.concatenate([mjm_mask, mvm_mask], axis=0)
														
 
															+        return mjm_mask
														
 
															+
														
 
															+    def filterjoints(self, x):
														
 
															+        if self.num_joints == 24:
														
 
															+            return x
														
 
															+        elif self.num_joints == 14:
														
 
															+            return x[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18], :]
														
 
															+        elif self.num_joints == 17:
														
 
															+            return x[
														
 
															+                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 19], :]
														
 
															+        else:
														
 
															+            raise ValueError(
														
 
															+                "unsupported joint numbers, only [24 or 17 or 14] is supported!")
														
 
															+
														
 
															+    def parse_dataset(self):
														
 
															+        print("Loading annotations..., please wait")
														
 
															+        self.annos = []
														
 
															+        im_id = 0
														
 
															+        self.human36m_num = 0
														
 
															+        for idx, annof in enumerate(self.anno_list):
														
 
															+            img_prefix = os.path.join(self.dataset_dir, self.image_dirs[idx])
														
 
															+            dataf = os.path.join(self.dataset_dir, annof)
														
 
															+            with open(dataf, 'r') as rf:
														
 
															+                anno_data = json.load(rf)
														
 
															+                annos = anno_data['data']
														
 
															+                new_annos = []
														
 
															+                print("{} has annos numbers: {}".format(dataf, len(annos)))
														
 
															+                for anno in annos:
														
 
															+                    new_anno = {}
														
 
															+                    new_anno['im_id'] = im_id
														
 
															+                    im_id += 1
														
 
															+                    imagename = anno['imageName']
														
 
															+                    if imagename.startswith("COCO_train2014_"):
														
 
															+                        imagename = imagename[len("COCO_train2014_"):]
														
 
															+                    elif imagename.startswith("COCO_val2014_"):
														
 
															+                        imagename = imagename[len("COCO_val2014_"):]
														
 
															+                    imagename = os.path.join(img_prefix, imagename)
														
 
															+                    if not os.path.exists(imagename):
														
 
															+                        if "train2017" in imagename:
														
 
															+                            imagename = imagename.replace("train2017",
														
 
															+                                                          "val2017")
														
 
															+                            if not os.path.exists(imagename):
														
 
															+                                print("cannot find imagepath:{}".format(
														
 
															+                                    imagename))
														
 
															+                                continue
														
 
															+                        else:
														
 
															+                            print("cannot find imagepath:{}".format(imagename))
														
 
															+                            continue
														
 
															+                    new_anno['imageName'] = imagename
														
 
															+                    if 'human3.6m' in imagename:
														
 
															+                        self.human36m_num += 1
														
 
															+                    new_anno['bbox_center'] = anno['bbox_center']
														
 
															+                    new_anno['bbox_scale'] = anno['bbox_scale']
														
 
															+                    new_anno['joints_2d'] = np.array(anno[
														
 
															+                        'gt_keypoint_2d']).astype(np.float32)
														
 
															+                    if new_anno['joints_2d'].shape[0] == 49:
														
 
															+                        #if the joints_2d is in SPIN format(which generated by eft), choose the last 24 public joints
														
 
															+                        #for detail please refer: https://github.com/nkolot/SPIN/blob/master/constants.py
														
 
															+                        new_anno['joints_2d'] = new_anno['joints_2d'][25:]
														
 
															+                    new_anno['joints_3d'] = np.array(anno[
														
 
															+                        'pose3d'])[:, :3].astype(np.float32)
														
 
															+                    new_anno['mjm_mask'] = self.get_mask()
														
 
															+                    if not 'has_3d_joints' in anno:
														
 
															+                        new_anno['has_3d_joints'] = int(1)
														
 
															+                        new_anno['has_2d_joints'] = int(1)
														
 
															+                    else:
														
 
															+                        new_anno['has_3d_joints'] = int(anno['has_3d_joints'])
														
 
															+                        new_anno['has_2d_joints'] = int(anno['has_2d_joints'])
														
 
															+                    new_anno['joints_2d'] = self.filterjoints(new_anno[
														
 
															+                        'joints_2d'])
														
 
															+                    self.annos.append(new_anno)
														
 
															+                del annos
														
 
															+
														
 
															+    def get_temp_num(self):
														
 
															+        """get temporal data number, like human3.6m"""
														
 
															+        return self.human36m_num
														
 
															+
														
 
															+    def __len__(self):
														
 
															+        """Get dataset length."""
														
 
															+        return len(self.annos)
														
 
															+
														
 
															+    def _get_imganno(self, idx):
														
 
															+        """Get anno for a single image."""
														
 
															+        return self.annos[idx]
														
 
															+
														
 
															+    def __getitem__(self, idx):
														
 
															+        """Prepare image for training given the index."""
														
 
															+        records = copy.deepcopy(self._get_imganno(idx))
														
 
															+        imgpath = records['imageName']
														
 
															+        assert os.path.exists(imgpath), "cannot find image {}".format(imgpath)
														
 
															+        records['image'] = cv2.imread(imgpath)
														
 
															+        records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
														
 
															+        records = self.transform(records)
														
 
															+        return records
														
 
															+
														
 
															+    def check_or_download_dataset(self):
														
 
															+        alldatafind = True
														
 
															+        for image_dir in self.image_dirs:
														
 
															+            image_dir = os.path.join(self.dataset_dir, image_dir)
														
 
															+            if not os.path.isdir(image_dir):
														
 
															+                print("dataset [{}] is not found".format(image_dir))
														
 
															+                alldatafind = False
														
 
															+        if not alldatafind:
														
 
															+            raise ValueError(
														
 
															+                "Some dataset is not valid and cannot download automatically now, please prepare the dataset first"
														
 
															+            )
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+@serializable
														
 
															+class Keypoint3DMultiFramesDataset(Dataset):
														
 
															+    """24 keypoints 3D dataset for pose estimation. 
														
 
															+
														
 
															+    each item is a list of images
														
 
															+
														
 
															+    The dataset loads raw features and apply specified transforms
														
 
															+    to return a dict containing the image tensors and other information.
														
 
															+
														
 
															+    Args:
														
 
															+        dataset_dir (str): Root path to the dataset.
														
 
															+        image_dir (str): Path to a directory where images are held.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            dataset_dir,  # 数据集根目录
														
 
															+            image_dir,  # 图像文件夹
														
 
															+            p3d_dir,  # 3D关键点文件夹
														
 
															+            json_path,
														
 
															+            img_size,  #图像resize大小
														
 
															+            num_frames,  # 帧序列长度
														
 
															+            anno_path=None, ):
														
 
															+
														
 
															+        self.dataset_dir = dataset_dir
														
 
															+        self.image_dir = image_dir
														
 
															+        self.p3d_dir = p3d_dir
														
 
															+        self.json_path = json_path
														
 
															+        self.img_size = img_size
														
 
															+        self.num_frames = num_frames
														
 
															+        self.anno_path = anno_path
														
 
															+
														
 
															+        self.data_labels, self.mf_inds = self._generate_multi_frames_list()
														
 
															+
														
 
															+    def _generate_multi_frames_list(self):
														
 
															+        act_list = os.listdir(self.dataset_dir)  # 动作列表
														
 
															+        count = 0
														
 
															+        mf_list = []
														
 
															+        annos_dict = {'images': [], 'annotations': [], 'act_inds': []}
														
 
															+        for act in act_list:  #对每个动作，生成帧序列
														
 
															+            if '.' in act:
														
 
															+                continue
														
 
															+
														
 
															+            json_path = os.path.join(self.dataset_dir, act, self.json_path)
														
 
															+            with open(json_path, 'r') as j:
														
 
															+                annos = json.load(j)
														
 
															+            length = len(annos['images'])
														
 
															+            for k, v in annos.items():
														
 
															+                if k in annos_dict:
														
 
															+                    annos_dict[k].extend(v)
														
 
															+            annos_dict['act_inds'].extend([act] * length)
														
 
															+
														
 
															+            mf = [[i + j + count for j in range(self.num_frames)]
														
 
															+                  for i in range(0, length - self.num_frames + 1)]
														
 
															+            mf_list.extend(mf)
														
 
															+            count += length
														
 
															+
														
 
															+        print("total data number:", len(mf_list))
														
 
															+        return annos_dict, mf_list
														
 
															+
														
 
															+    def __call__(self, *args, **kwargs):
														
 
															+        return self
														
 
															+
														
 
															+    def __getitem__(self, index):  # 拿一个连续的序列
														
 
															+        inds = self.mf_inds[
														
 
															+            index]  # 如[568, 569, 570, 571, 572, 573]，长度为num_frames
														
 
															+
														
 
															+        images = self.data_labels['images']  # all images
														
 
															+        annots = self.data_labels['annotations']  # all annots
														
 
															+
														
 
															+        act = self.data_labels['act_inds'][inds[0]]  # 动作名（文件夹名）
														
 
															+
														
 
															+        kps3d_list = []
														
 
															+        kps3d_vis_list = []
														
 
															+        names = []
														
 
															+
														
 
															+        h, w = 0, 0
														
 
															+        for ind in inds:  # one image
														
 
															+            height = float(images[ind]['height'])
														
 
															+            width = float(images[ind]['width'])
														
 
															+            name = images[ind]['file_name']  # 图像名称，带有后缀
														
 
															+
														
 
															+            kps3d_name = name.split('.')[0] + '.obj'
														
 
															+            kps3d_path = os.path.join(self.dataset_dir, act, self.p3d_dir,
														
 
															+                                      kps3d_name)
														
 
															+
														
 
															+            joints, joints_vis = self.kps3d_process(kps3d_path)
														
 
															+            joints_vis = np.array(joints_vis, dtype=np.float32)
														
 
															+
														
 
															+            kps3d_list.append(joints)
														
 
															+            kps3d_vis_list.append(joints_vis)
														
 
															+            names.append(name)
														
 
															+
														
 
															+        kps3d = np.array(kps3d_list)  # (6, 24, 3),(num_frames, joints_num, 3)
														
 
															+        kps3d_vis = np.array(kps3d_vis_list)
														
 
															+
														
 
															+        # read image
														
 
															+        imgs = []
														
 
															+        for name in names:
														
 
															+            img_path = os.path.join(self.dataset_dir, act, self.image_dir, name)
														
 
															+
														
 
															+            image = cv2.imread(img_path, cv2.IMREAD_COLOR |
														
 
															+                               cv2.IMREAD_IGNORE_ORIENTATION)
														
 
															+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
														
 
															+
														
 
															+            imgs.append(np.expand_dims(image, axis=0))
														
 
															+
														
 
															+        imgs = np.concatenate(imgs, axis=0)
														
 
															+        imgs = imgs.astype(
														
 
															+            np.float32)  # (6, 1080, 1920, 3),(num_frames, h, w, c)
														
 
															+
														
 
															+        # attention: 此时图像和标注是镜像的
														
 
															+        records = {
														
 
															+            'kps3d': kps3d,
														
 
															+            'kps3d_vis': kps3d_vis,
														
 
															+            "image": imgs,
														
 
															+            'act': act,
														
 
															+            'names': names,
														
 
															+            'im_id': index
														
 
															+        }
														
 
															+
														
 
															+        return self.transform(records)
														
 
															+
														
 
															+    def kps3d_process(self, kps3d_path):
														
 
															+        count = 0
														
 
															+        kps = []
														
 
															+        kps_vis = []
														
 
															+
														
 
															+        with open(kps3d_path, 'r') as f:
														
 
															+            lines = f.readlines()
														
 
															+            for line in lines:
														
 
															+                if line[0] == 'v':
														
 
															+                    kps.append([])
														
 
															+                    line = line.strip('\n').split(' ')[1:]
														
 
															+                    for kp in line:
														
 
															+                        kps[-1].append(float(kp))
														
 
															+                    count += 1
														
 
															+
														
 
															+                    kps_vis.append([1, 1, 1])
														
 
															+
														
 
															+        kps = np.array(kps)  # 52，3
														
 
															+        kps_vis = np.array(kps_vis)
														
 
															+
														
 
															+        kps *= 10  # scale points
														
 
															+        kps -= kps[[0], :]  # set root point to zero
														
 
															+
														
 
															+        kps = np.concatenate((kps[0:23], kps[[37]]), axis=0)  # 24,3
														
 
															+
														
 
															+        kps *= 10
														
 
															+
														
 
															+        kps_vis = np.concatenate((kps_vis[0:23], kps_vis[[37]]), axis=0)  # 24,3
														
 
															+
														
 
															+        return kps, kps_vis
														
 
															+
														
 
															+    def __len__(self):
														
 
															+        return len(self.mf_inds)
														
 
															+
														
 
															+    def get_anno(self):
														
 
															+        if self.anno_path is None:
														
 
															+            return
														
 
															+        return os.path.join(self.dataset_dir, self.anno_path)
														
 
															+
														
 
															+    def check_or_download_dataset(self):
														
 
															+        return
														
 
															+
														
 
															+    def parse_dataset(self, ):
														
 
															+        return
														
 
															+
														
 
															+    def set_transform(self, transform):
														
 
															+        self.transform = transform
														
 
															+
														
 
															+    def set_epoch(self, epoch_id):
														
 
															+        self._epoch = epoch_id
														
 
															+
														
 
															+    def set_kwargs(self, **kwargs):
														
 
															+        self.mixup_epoch = kwargs.get('mixup_epoch', -1)
														
 
															+        self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
														
 
															+        self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
														
--- a/paddlers/models/ppdet/data/transform/__init__.py
+++ b/paddlers/models/ppdet/data/transform/__init__.py
@@ -17,12 +17,14 @@ from . import batch_operators
 
															 from . import keypoint_operators
														
 
															 from . import mot_operators
														
 
															 from . import rotated_operators
														
 
															+from . import keypoints_3d_operators
														
 
															 from .operators import *
														
 
															 from .batch_operators import *
														
 
															 from .keypoint_operators import *
														
 
															 from .mot_operators import *
														
 
															 from .rotated_operators import *
														
 
															+from .keypoints_3d_operators import *
														
 
															 __all__ = []
														
 
															 __all__ += registered_ops
														
--- a/paddlers/models/ppdet/data/transform/atss_assigner.py
+++ b/paddlers/models/ppdet/data/transform/atss_assigner.py
@@ -43,7 +43,8 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
 
															     Returns:
														
 
															         Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
														
 
															     """
														
 
															-    assert mode in ['iou', 'iof', 'giou'], 'Unsupported mode {}'.format(mode)
														
 
															+    assert mode in ['iou', 'iof', 'giou', 'diou'], 'Unsupported mode {}'.format(
														
 
															+        mode)
														
 
															     # Either the boxes are empty or the length of boxes's last dimenstion is 4
														
 
															     assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
														
 
															     assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
														
@@ -83,6 +84,13 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
 
															         if mode == 'giou':
														
 
															             enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
														
 
															             enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
														
 
															+        if mode == 'diou':
														
 
															+            enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
														
 
															+            enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
														
 
															+            b1_x1, b1_y1 = bboxes1[..., 0], bboxes1[..., 1]
														
 
															+            b1_x2, b1_y2 = bboxes1[..., 2], bboxes1[..., 3]
														
 
															+            b2_x1, b2_y1 = bboxes2[..., 0], bboxes2[..., 1]
														
 
															+            b2_x2, b2_y2 = bboxes2[..., 2], bboxes2[..., 3]
														
 
															     else:
														
 
															         lt = np.maximum(bboxes1[..., :, None, :2],
														
 
															                         bboxes2[..., None, :, :2])  # [B, rows, cols, 2]
														
@@ -101,6 +109,15 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
 
															                                      bboxes2[..., None, :, :2])
														
 
															             enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
														
 
															                                      bboxes2[..., None, :, 2:])
														
 
															+        if mode == 'diou':
														
 
															+            enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
														
 
															+                                     bboxes2[..., None, :, :2])
														
 
															+            enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
														
 
															+                                     bboxes2[..., None, :, 2:])
														
 
															+            b1_x1, b1_y1 = bboxes1[..., :, None, 0], bboxes1[..., :, None, 1]
														
 
															+            b1_x2, b1_y2 = bboxes1[..., :, None, 2], bboxes1[..., :, None, 3]
														
 
															+            b2_x1, b2_y1 = bboxes2[..., None, :, 0], bboxes2[..., None, :, 1]
														
 
															+            b2_x2, b2_y2 = bboxes2[..., None, :, 2], bboxes2[..., None, :, 3]
														
 
															     eps = np.array([eps])
														
 
															     union = np.maximum(union, eps)
														
@@ -108,18 +125,32 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
 
															     if mode in ['iou', 'iof']:
														
 
															         return ious
														
 
															     # calculate gious
														
 
															-    enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
														
 
															-    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
														
 
															-    enclose_area = np.maximum(enclose_area, eps)
														
 
															-    gious = ious - (enclose_area - union) / enclose_area
														
 
															-    return gious
														
 
															+    if mode in ['giou']:
														
 
															+        enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
														
 
															+        enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
														
 
															+        enclose_area = np.maximum(enclose_area, eps)
														
 
															+        gious = ious - (enclose_area - union) / enclose_area
														
 
															+        return gious
														
 
															+    if mode in ['diou']:
														
 
															+        left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
														
 
															+        right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
														
 
															+        rho2 = left + right
														
 
															+        enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
														
 
															+        enclose_c = enclose_wh[..., 0]**2 + enclose_wh[..., 1]**2
														
 
															+        enclose_c = np.maximum(enclose_c, eps)
														
 
															+        dious = ious - rho2 / enclose_c
														
 
															+        return dious
														
 
															 def topk_(input, k, axis=1, largest=True):
														
 
															     x = -input if largest else input
														
 
															     if axis == 0:
														
 
															         row_index = np.arange(input.shape[1 - axis])
														
 
															-        topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
														
 
															+        if k == x.shape[0]:  # argpartition requires index < len(input)
														
 
															+            topk_index = np.argpartition(x, k - 1, axis=axis)[0:k, :]
														
 
															+        else:
														
 
															+            topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
														
 
															+
														
 
															         topk_data = x[topk_index, row_index]
														
 
															         topk_index_sort = np.argsort(topk_data, axis=axis)
														
@@ -267,3 +298,124 @@ class ATSSAssigner(object):
 
															                          -np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
														
 
															         return assigned_gt_inds, max_overlaps
														
 
															+
														
 
															+    def get_vlr_region(self,
														
 
															+                       bboxes,
														
 
															+                       num_level_bboxes,
														
 
															+                       gt_bboxes,
														
 
															+                       gt_bboxes_ignore=None,
														
 
															+                       gt_labels=None):
														
 
															+        """get vlr region for ld distillation.
														
 
															+        Args:
														
 
															+            bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
														
 
															+            num_level_bboxes (List): num of bboxes in each level
														
 
															+            gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
														
 
															+            gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
														
 
															+                labelled as `ignored`, e.g., crowd boxes in COCO.
														
 
															+            gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
														
 
															+        """
														
 
															+        bboxes = bboxes[:, :4]
														
 
															+
														
 
															+        num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
														
 
															+
														
 
															+        # compute iou between all bbox and gt
														
 
															+        overlaps = bbox_overlaps(bboxes, gt_bboxes)
														
 
															+
														
 
															+        # compute diou between all bbox and gt
														
 
															+        diou = bbox_overlaps(bboxes, gt_bboxes, mode='diou')
														
 
															+
														
 
															+        # assign 0 by default
														
 
															+        assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
														
 
															+
														
 
															+        vlr_region_iou = (assigned_gt_inds + 0).astype(np.float32)
														
 
															+
														
 
															+        if num_gt == 0 or num_bboxes == 0:
														
 
															+            # No ground truth or boxes, return empty assignment
														
 
															+            max_overlaps = np.zeros((num_bboxes, ))
														
 
															+            if num_gt == 0:
														
 
															+                # No truth, assign everything to background
														
 
															+                assigned_gt_inds[:] = 0
														
 
															+            if not np.any(gt_labels):
														
 
															+                assigned_labels = None
														
 
															+            else:
														
 
															+                assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
														
 
															+            return assigned_gt_inds, max_overlaps
														
 
															+
														
 
															+        # compute center distance between all bbox and gt
														
 
															+        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
														
 
															+        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
														
 
															+        gt_points = np.stack((gt_cx, gt_cy), axis=1)
														
 
															+
														
 
															+        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
														
 
															+        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
														
 
															+        bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
														
 
															+
														
 
															+        distances = np.sqrt(
														
 
															+            np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
														
 
															+            .sum(-1))
														
 
															+
														
 
															+        # Selecting candidates based on the center distance
														
 
															+        candidate_idxs = []
														
 
															+        candidate_idxs_t = []
														
 
															+        start_idx = 0
														
 
															+        for bboxes_per_level in num_level_bboxes:
														
 
															+            # on each pyramid level, for each gt,
														
 
															+            # select k bbox whose center are closest to the gt center
														
 
															+            end_idx = start_idx + bboxes_per_level
														
 
															+            distances_per_level = distances[start_idx:end_idx, :]
														
 
															+            selectable_t = min(self.topk, bboxes_per_level)
														
 
															+            selectable_k = bboxes_per_level  #k for all
														
 
															+            _, topt_idxs_per_level = topk_(
														
 
															+                distances_per_level, selectable_t, axis=0, largest=False)
														
 
															+            _, topk_idxs_per_level = topk_(
														
 
															+                distances_per_level, selectable_k, axis=0, largest=False)
														
 
															+            candidate_idxs_t.append(topt_idxs_per_level + start_idx)
														
 
															+            candidate_idxs.append(topk_idxs_per_level + start_idx)
														
 
															+            start_idx = end_idx
														
 
															+
														
 
															+        candidate_idxs_t = np.concatenate(candidate_idxs_t, axis=0)
														
 
															+        candidate_idxs = np.concatenate(candidate_idxs, axis=0)
														
 
															+
														
 
															+        # get corresponding iou for the these candidates, and compute the
														
 
															+        # mean and std, set mean + std as the iou threshold
														
 
															+        candidate_overlaps_t = overlaps[candidate_idxs_t, np.arange(num_gt)]
														
 
															+
														
 
															+        # compute tdiou
														
 
															+        t_diou = diou[candidate_idxs, np.arange(num_gt)]
														
 
															+
														
 
															+        overlaps_mean_per_gt = candidate_overlaps_t.mean(0)
														
 
															+        overlaps_std_per_gt = candidate_overlaps_t.std(
														
 
															+            0, ddof=1)  # NOTE: use Bessel correction
														
 
															+        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
														
 
															+
														
 
															+        # compute region        
														
 
															+        is_pos = (t_diou < overlaps_thr_per_gt[None, :]) & (
														
 
															+            t_diou >= 0.25 * overlaps_thr_per_gt[None, :])
														
 
															+
														
 
															+        # limit the positive sample's center in gt
														
 
															+        for gt_idx in range(num_gt):
														
 
															+            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
														
 
															+
														
 
															+        candidate_idxs = candidate_idxs.reshape(-1)
														
 
															+
														
 
															+        # if an anchor box is assigned to multiple gts,
														
 
															+        # the one with the highest IoU will be selected.
														
 
															+        overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
														
 
															+        index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
														
 
															+
														
 
															+        overlaps_inf[index] = overlaps.T.reshape(-1)[index]
														
 
															+        overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
														
 
															+
														
 
															+        max_overlaps = overlaps_inf.max(axis=1)
														
 
															+        argmax_overlaps = overlaps_inf.argmax(axis=1)
														
 
															+
														
 
															+        overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
														
 
															+        overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
														
 
															+
														
 
															+        assigned_gt_inds[max_overlaps !=
														
 
															+                         -np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
														
 
															+
														
 
															+        vlr_region_iou[max_overlaps !=
														
 
															+                       -np.inf] = max_overlaps[max_overlaps != -np.inf] + 0
														
 
															+
														
 
															+        return vlr_region_iou
														
--- a/paddlers/models/ppdet/data/transform/batch_operators.py
+++ b/paddlers/models/ppdet/data/transform/batch_operators.py
@@ -24,6 +24,7 @@ except Exception:
 
															     from collections import Sequence
														
 
															 import cv2
														
 
															+import copy
														
 
															 import math
														
 
															 import numpy as np
														
 
															 from .operators import register_op, BaseOperator, Resize
														
@@ -43,10 +44,11 @@ __all__ = [
 
															     'Gt2FCOSTarget',
														
 
															     'Gt2TTFTarget',
														
 
															     'Gt2Solov2Target',
														
 
															-    'Gt2SparseRCNNTarget',
														
 
															+    'Gt2SparseTarget',
														
 
															     'PadMaskBatch',
														
 
															     'Gt2GFLTarget',
														
 
															     'Gt2CenterNetTarget',
														
 
															+    'Gt2CenterTrackTarget',
														
 
															     'PadGT',
														
 
															     'PadRGT',
														
 
															 ]
														
@@ -169,6 +171,7 @@ class BatchRandomResize(BaseOperator):
 
															 @register_op
														
 
															 class Gt2YoloTarget(BaseOperator):
														
 
															+    __shared__ = ['num_classes']
														
 
															     """
														
 
															     Generate YOLOv3 targets by groud truth data, this operator is only used in
														
 
															     fine grained YOLOv3 loss mode
														
@@ -292,7 +295,9 @@ class Gt2FCOSTarget(BaseOperator):
 
															                  object_sizes_boundary,
														
 
															                  center_sampling_radius,
														
 
															                  downsample_ratios,
														
 
															-                 norm_reg_targets=False):
														
 
															+                 num_shift=0.5,
														
 
															+                 multiply_strides_reg_targets=False,
														
 
															+                 norm_reg_targets=True):
														
 
															         super(Gt2FCOSTarget, self).__init__()
														
 
															         self.center_sampling_radius = center_sampling_radius
														
 
															         self.downsample_ratios = downsample_ratios
														
@@ -304,6 +309,8 @@ class Gt2FCOSTarget(BaseOperator):
 
															                 self.object_sizes_boundary[i], self.object_sizes_boundary[i + 1]
														
 
															             ])
														
 
															         self.object_sizes_of_interest = object_sizes_of_interest
														
 
															+        self.num_shift = num_shift
														
 
															+        self.multiply_strides_reg_targets = multiply_strides_reg_targets
														
 
															         self.norm_reg_targets = norm_reg_targets
														
 
															     def _compute_points(self, w, h):
														
@@ -320,7 +327,8 @@ class Gt2FCOSTarget(BaseOperator):
 
															             shift_x, shift_y = np.meshgrid(shift_x, shift_y)
														
 
															             shift_x = shift_x.flatten()
														
 
															             shift_y = shift_y.flatten()
														
 
															-            location = np.stack([shift_x, shift_y], axis=1) + stride // 2
														
 
															+            location = np.stack(
														
 
															+                [shift_x, shift_y], axis=1) + stride * self.num_shift
														
 
															             locations.append(location)
														
 
															         num_points_each_level = [len(location) for location in locations]
														
 
															         locations = np.concatenate(locations, axis=0)
														
@@ -459,11 +467,16 @@ class Gt2FCOSTarget(BaseOperator):
 
															                 grid_w = int(np.ceil(w / self.downsample_ratios[lvl]))
														
 
															                 grid_h = int(np.ceil(h / self.downsample_ratios[lvl]))
														
 
															                 if self.norm_reg_targets:
														
 
															-                    sample['reg_target{}'.format(lvl)] = \
														
 
															-                        np.reshape(
														
 
															-                            reg_targets_by_level[lvl] / \
														
 
															-                            self.downsample_ratios[lvl],
														
 
															+                    if self.multiply_strides_reg_targets:
														
 
															+                        sample['reg_target{}'.format(lvl)] = np.reshape(
														
 
															+                            reg_targets_by_level[lvl],
														
 
															                             newshape=[grid_h, grid_w, 4])
														
 
															+                    else:
														
 
															+                        sample['reg_target{}'.format(lvl)] = \
														
 
															+                            np.reshape(
														
 
															+                                reg_targets_by_level[lvl] / \
														
 
															+                                self.downsample_ratios[lvl],
														
 
															+                                newshape=[grid_h, grid_w, 4])
														
 
															                 else:
														
 
															                     sample['reg_target{}'.format(lvl)] = np.reshape(
														
 
															                         reg_targets_by_level[lvl],
														
@@ -482,6 +495,7 @@ class Gt2FCOSTarget(BaseOperator):
 
															 @register_op
														
 
															 class Gt2GFLTarget(BaseOperator):
														
 
															+    __shared__ = ['num_classes']
														
 
															     """
														
 
															     Generate GFocal loss targets by groud truth data
														
 
															     """
														
@@ -490,12 +504,14 @@ class Gt2GFLTarget(BaseOperator):
 
															                  num_classes=80,
														
 
															                  downsample_ratios=[8, 16, 32, 64, 128],
														
 
															                  grid_cell_scale=4,
														
 
															-                 cell_offset=0):
														
 
															+                 cell_offset=0,
														
 
															+                 compute_vlr_region=False):
														
 
															         super(Gt2GFLTarget, self).__init__()
														
 
															         self.num_classes = num_classes
														
 
															         self.downsample_ratios = downsample_ratios
														
 
															         self.grid_cell_scale = grid_cell_scale
														
 
															         self.cell_offset = cell_offset
														
 
															+        self.compute_vlr_region = compute_vlr_region
														
 
															         self.assigner = ATSSAssigner()
														
@@ -574,6 +590,13 @@ class Gt2GFLTarget(BaseOperator):
 
															             assign_gt_inds, _ = self.assigner(grid_cells, num_level_cells,
														
 
															                                               gt_bboxes, gt_bboxes_ignore,
														
 
															                                               gt_labels)
														
 
															+
														
 
															+            if self.compute_vlr_region:
														
 
															+                vlr_region = self.assigner.get_vlr_region(
														
 
															+                    grid_cells, num_level_cells, gt_bboxes, gt_bboxes_ignore,
														
 
															+                    gt_labels)
														
 
															+                sample['vlr_regions'] = vlr_region
														
 
															+
														
 
															             pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds = self.get_sample(
														
 
															                 assign_gt_inds, gt_bboxes)
														
@@ -766,7 +789,7 @@ class Gt2Solov2Target(BaseOperator):
 
															                 ins_label = []
														
 
															                 grid_order = []
														
 
															                 cate_label = np.zeros([num_grid, num_grid], dtype=np.int64)
														
 
															-                ins_ind_label = np.zeros([num_grid**2], dtype=np.bool)
														
 
															+                ins_ind_label = np.zeros([num_grid**2], dtype=np.bool_)
														
 
															                 if num_ins == 0:
														
 
															                     ins_label = np.zeros(
														
@@ -893,27 +916,33 @@ class Gt2Solov2Target(BaseOperator):
 
															 @register_op
														
 
															-class Gt2SparseRCNNTarget(BaseOperator):
														
 
															-    '''
														
 
															-    Generate SparseRCNN targets by groud truth data
														
 
															-    '''
														
 
															-
														
 
															-    def __init__(self):
														
 
															-        super(Gt2SparseRCNNTarget, self).__init__()
														
 
															+class Gt2SparseTarget(BaseOperator):
														
 
															+    def __init__(self, use_padding_shape=False):
														
 
															+        super(Gt2SparseTarget, self).__init__()
														
 
															+        self.use_padding_shape = use_padding_shape
														
 
															     def __call__(self, samples, context=None):
														
 
															         for sample in samples:
														
 
															-            im = sample["image"]
														
 
															-            h, w = im.shape[1:3]
														
 
															-            img_whwh = np.array([w, h, w, h], dtype=np.int32)
														
 
															-            sample["img_whwh"] = img_whwh
														
 
															-            if "scale_factor" in sample:
														
 
															-                sample["scale_factor_wh"] = np.array(
														
 
															-                    [sample["scale_factor"][1], sample["scale_factor"][0]],
														
 
															-                    dtype=np.float32)
														
 
															+            ori_h, ori_w = sample['h'], sample['w']
														
 
															+            if self.use_padding_shape:
														
 
															+                h, w = sample["image"].shape[1:3]
														
 
															+                if "scale_factor" in sample:
														
 
															+                    sf_w, sf_h = sample["scale_factor"][1], sample[
														
 
															+                        "scale_factor"][0]
														
 
															+                    sample["scale_factor_whwh"] = np.array(
														
 
															+                        [sf_w, sf_h, sf_w, sf_h], dtype=np.float32)
														
 
															+                else:
														
 
															+                    sample["scale_factor_whwh"] = np.array(
														
 
															+                        [1.0, 1.0, 1.0, 1.0], dtype=np.float32)
														
 
															             else:
														
 
															-                sample["scale_factor_wh"] = np.array(
														
 
															-                    [1.0, 1.0], dtype=np.float32)
														
 
															+                h, w = round(sample['im_shape'][0]), round(sample['im_shape'][
														
 
															+                    1])
														
 
															+                sample["scale_factor_whwh"] = np.array(
														
 
															+                    [w / ori_w, h / ori_h, w / ori_w, h / ori_h],
														
 
															+                    dtype=np.float32)
														
 
															+
														
 
															+            sample["img_whwh"] = np.array([w, h, w, h], dtype=np.float32)
														
 
															+            sample["ori_shape"] = np.array([ori_h, ori_w], dtype=np.int32)
														
 
															         return samples
														
@@ -981,6 +1010,7 @@ class PadMaskBatch(BaseOperator):
 
															 @register_op
														
 
															 class Gt2CenterNetTarget(BaseOperator):
														
 
															+    __shared__ = ['num_classes']
														
 
															     """Gt2CenterNetTarget
														
 
															     Genterate CenterNet targets by ground-truth
														
 
															     Args:
														
@@ -990,40 +1020,39 @@ class Gt2CenterNetTarget(BaseOperator):
 
															         max_objs (int): The maximum objects detected, 128 by default.
														
 
															     """
														
 
															-    def __init__(self, down_ratio, num_classes=80, max_objs=128):
														
 
															+    def __init__(self, num_classes=80, down_ratio=4, max_objs=128):
														
 
															         super(Gt2CenterNetTarget, self).__init__()
														
 
															+        self.nc = num_classes
														
 
															         self.down_ratio = down_ratio
														
 
															-        self.num_classes = num_classes
														
 
															         self.max_objs = max_objs
														
 
															     def __call__(self, sample, context=None):
														
 
															         input_h, input_w = sample['image'].shape[1:]
														
 
															         output_h = input_h // self.down_ratio
														
 
															         output_w = input_w // self.down_ratio
														
 
															-        num_classes = self.num_classes
														
 
															-        c = sample['center']
														
 
															-        s = sample['scale']
														
 
															         gt_bbox = sample['gt_bbox']
														
 
															         gt_class = sample['gt_class']
														
 
															-        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
														
 
															+        hm = np.zeros((self.nc, output_h, output_w), dtype=np.float32)
														
 
															         wh = np.zeros((self.max_objs, 2), dtype=np.float32)
														
 
															-        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
														
 
															         reg = np.zeros((self.max_objs, 2), dtype=np.float32)
														
 
															         ind = np.zeros((self.max_objs), dtype=np.int64)
														
 
															         reg_mask = np.zeros((self.max_objs), dtype=np.int32)
														
 
															-        cat_spec_wh = np.zeros(
														
 
															-            (self.max_objs, num_classes * 2), dtype=np.float32)
														
 
															-        cat_spec_mask = np.zeros(
														
 
															-            (self.max_objs, num_classes * 2), dtype=np.int32)
														
 
															+        cat_spec_wh = np.zeros((self.max_objs, self.nc * 2), dtype=np.float32)
														
 
															+        cat_spec_mask = np.zeros((self.max_objs, self.nc * 2), dtype=np.int32)
														
 
															-        trans_output = get_affine_transform(c, [s, s], 0, [output_w, output_h])
														
 
															+        trans_output = get_affine_transform(
														
 
															+            center=sample['center'],
														
 
															+            input_size=[sample['scale'], sample['scale']],
														
 
															+            rot=0,
														
 
															+            output_size=[output_w, output_h])
														
 
															         gt_det = []
														
 
															         for i, (bbox, cls) in enumerate(zip(gt_bbox, gt_class)):
														
 
															             cls = int(cls)
														
 
															             bbox[:2] = affine_transform(bbox[:2], trans_output)
														
 
															             bbox[2:] = affine_transform(bbox[2:], trans_output)
														
 
															+            bbox_amodal = copy.deepcopy(bbox)
														
 
															             bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
														
 
															             bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
														
 
															             h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
														
@@ -1034,10 +1063,12 @@ class Gt2CenterNetTarget(BaseOperator):
 
															                     [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
														
 
															                     dtype=np.float32)
														
 
															                 ct_int = ct.astype(np.int32)
														
 
															+
														
 
															+                # get hm,wh,reg,ind,ind_mask
														
 
															                 draw_umich_gaussian(hm[cls], ct_int, radius)
														
 
															                 wh[i] = 1. * w, 1. * h
														
 
															-                ind[i] = ct_int[1] * output_w + ct_int[0]
														
 
															                 reg[i] = ct - ct_int
														
 
															+                ind[i] = ct_int[1] * output_w + ct_int[0]
														
 
															                 reg_mask[i] = 1
														
 
															                 cat_spec_wh[i, cls * 2:cls * 2 + 2] = wh[i]
														
 
															                 cat_spec_mask[i, cls * 2:cls * 2 + 2] = 1
														
@@ -1052,9 +1083,10 @@ class Gt2CenterNetTarget(BaseOperator):
 
															         sample.pop('scale', None)
														
 
															         sample.pop('is_crowd', None)
														
 
															         sample.pop('difficult', None)
														
 
															-        sample['heatmap'] = hm
														
 
															-        sample['index_mask'] = reg_mask
														
 
															+
														
 
															         sample['index'] = ind
														
 
															+        sample['index_mask'] = reg_mask
														
 
															+        sample['heatmap'] = hm
														
 
															         sample['size'] = wh
														
 
															         sample['offset'] = reg
														
 
															         return sample
														
@@ -1070,13 +1102,115 @@ class PadGT(BaseOperator):
 
															                                 1 means bbox, 0 means no bbox.
														
 
															     """
														
 
															-    def __init__(self, return_gt_mask=True):
														
 
															+    def __init__(self, return_gt_mask=True, pad_img=False, minimum_gtnum=0):
														
 
															         super(PadGT, self).__init__()
														
 
															         self.return_gt_mask = return_gt_mask
														
 
															+        self.pad_img = pad_img
														
 
															+        self.minimum_gtnum = minimum_gtnum
														
 
															+
														
 
															+    def _impad(self, img: np.ndarray,
														
 
															+            *,
														
 
															+            shape = None,
														
 
															+            padding = None,
														
 
															+            pad_val = 0,
														
 
															+            padding_mode = 'constant') -> np.ndarray:
														
 
															+        """Pad the given image to a certain shape or pad on all sides with
														
 
															+        specified padding mode and padding value.
														
 
															+
														
 
															+        Args:
														
 
															+            img (ndarray): Image to be padded.
														
 
															+            shape (tuple[int]): Expected padding shape (h, w). Default: None.
														
 
															+            padding (int or tuple[int]): Padding on each border. If a single int is
														
 
															+                provided this is used to pad all borders. If tuple of length 2 is
														
 
															+                provided this is the padding on left/right and top/bottom
														
 
															+                respectively. If a tuple of length 4 is provided this is the
														
 
															+                padding for the left, top, right and bottom borders respectively.
														
 
															+                Default: None. Note that `shape` and `padding` can not be both
														
 
															+                set.
														
 
															+            pad_val (Number | Sequence[Number]): Values to be filled in padding
														
 
															+                areas when padding_mode is 'constant'. Default: 0.
														
 
															+            padding_mode (str): Type of padding. Should be: constant, edge,
														
 
															+                reflect or symmetric. Default: constant.
														
 
															+                - constant: pads with a constant value, this value is specified
														
 
															+                with pad_val.
														
 
															+                - edge: pads with the last value at the edge of the image.
														
 
															+                - reflect: pads with reflection of image without repeating the last
														
 
															+                value on the edge. For example, padding [1, 2, 3, 4] with 2
														
 
															+                elements on both sides in reflect mode will result in
														
 
															+                [3, 2, 1, 2, 3, 4, 3, 2].
														
 
															+                - symmetric: pads with reflection of image repeating the last value
														
 
															+                on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
														
 
															+                both sides in symmetric mode will result in
														
 
															+                [2, 1, 1, 2, 3, 4, 4, 3]
														
 
															+
														
 
															+        Returns:
														
 
															+            ndarray: The padded image.
														
 
															+        """
														
 
															+
														
 
															+        assert (shape is not None) ^ (padding is not None)
														
 
															+        if shape is not None:
														
 
															+            width = max(shape[1] - img.shape[1], 0)
														
 
															+            height = max(shape[0] - img.shape[0], 0)
														
 
															+            padding = (0, 0, int(width), int(height))
														
 
															+
														
 
															+        # check pad_val
														
 
															+        import numbers
														
 
															+        if isinstance(pad_val, tuple):
														
 
															+            assert len(pad_val) == img.shape[-1]
														
 
															+        elif not isinstance(pad_val, numbers.Number):
														
 
															+            raise TypeError('pad_val must be a int or a tuple. '
														
 
															+                            f'But received {type(pad_val)}')
														
 
															+
														
 
															+        # check padding
														
 
															+        if isinstance(padding, tuple) and len(padding) in [2, 4]:
														
 
															+            if len(padding) == 2:
														
 
															+                padding = (padding[0], padding[1], padding[0], padding[1])
														
 
															+        elif isinstance(padding, numbers.Number):
														
 
															+            padding = (padding, padding, padding, padding)
														
 
															+        else:
														
 
															+            raise ValueError('Padding must be a int or a 2, or 4 element tuple.'
														
 
															+                            f'But received {padding}')
														
 
															+
														
 
															+        # check padding mode
														
 
															+        assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
														
 
															+
														
 
															+        border_type = {
														
 
															+            'constant': cv2.BORDER_CONSTANT,
														
 
															+            'edge': cv2.BORDER_REPLICATE,
														
 
															+            'reflect': cv2.BORDER_REFLECT_101,
														
 
															+            'symmetric': cv2.BORDER_REFLECT
														
 
															+        }
														
 
															+        img = cv2.copyMakeBorder(
														
 
															+            img,
														
 
															+            padding[1],
														
 
															+            padding[3],
														
 
															+            padding[0],
														
 
															+            padding[2],
														
 
															+            border_type[padding_mode],
														
 
															+            value=pad_val)
														
 
															+
														
 
															+        return img
														
 
															+
														
 
															+    def checkmaxshape(self, samples):
														
 
															+        maxh, maxw = 0, 0
														
 
															+        for sample in samples:
														
 
															+            h,w = sample['im_shape']
														
 
															+            if h>maxh:
														
 
															+                maxh = h
														
 
															+            if w>maxw:
														
 
															+                maxw = w
														
 
															+        return (maxh, maxw)
														
 
															     def __call__(self, samples, context=None):
														
 
															         num_max_boxes = max([len(s['gt_bbox']) for s in samples])
														
 
															+        num_max_boxes = max(self.minimum_gtnum, num_max_boxes)
														
 
															+        if self.pad_img:
														
 
															+            maxshape = self.checkmaxshape(samples)
														
 
															         for sample in samples:
														
 
															+            if self.pad_img:
														
 
															+                img = sample['image']
														
 
															+                padimg = self._impad(img, shape=maxshape)
														
 
															+                sample['image'] = padimg
														
 
															             if self.return_gt_mask:
														
 
															                 sample['pad_gt_mask'] = np.zeros(
														
 
															                     (num_max_boxes, 1), dtype=np.float32)
														
@@ -1110,6 +1244,17 @@ class PadGT(BaseOperator):
 
															                 if num_gt > 0:
														
 
															                     pad_diff[:num_gt] = sample['difficult']
														
 
															                 sample['difficult'] = pad_diff
														
 
															+            if 'gt_joints' in sample:
														
 
															+                num_joints = sample['gt_joints'].shape[1]
														
 
															+                pad_gt_joints = np.zeros((num_max_boxes, num_joints, 3), dtype=np.float32)
														
 
															+                if num_gt > 0:
														
 
															+                    pad_gt_joints[:num_gt] = sample['gt_joints']
														
 
															+                sample['gt_joints'] = pad_gt_joints
														
 
															+            if 'gt_areas' in sample:
														
 
															+                pad_gt_areas = np.zeros((num_max_boxes, 1), dtype=np.float32)
														
 
															+                if num_gt > 0:
														
 
															+                    pad_gt_areas[:num_gt, 0] = sample['gt_areas']
														
 
															+                sample['gt_areas'] = pad_gt_areas
														
 
															         return samples
														
@@ -1165,3 +1310,175 @@ class PadRGT(BaseOperator):
 
															                                num_gt)
														
 
															         return samples
														
 
															+
														
 
															+
														
 
															+@register_op
														
 
															+class Gt2CenterTrackTarget(BaseOperator):
														
 
															+    __shared__ = ['num_classes']
														
 
															+    """Gt2CenterTrackTarget
														
 
															+    Genterate CenterTrack targets by ground-truth
														
 
															+    Args:
														
 
															+        num_classes (int): The number of classes, 1 by default.
														
 
															+        down_ratio (int): The down sample ratio between output feature and 
														
 
															+                          input image.
														
 
															+        max_objs (int): The maximum objects detected, 256 by default.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes=1,
														
 
															+                 down_ratio=4,
														
 
															+                 max_objs=256,
														
 
															+                 hm_disturb=0.05,
														
 
															+                 lost_disturb=0.4,
														
 
															+                 fp_disturb=0.1,
														
 
															+                 pre_hm=True,
														
 
															+                 add_tracking=True,
														
 
															+                 add_ltrb_amodal=True):
														
 
															+        super(Gt2CenterTrackTarget, self).__init__()
														
 
															+        self.nc = num_classes
														
 
															+        self.down_ratio = down_ratio
														
 
															+        self.max_objs = max_objs
														
 
															+
														
 
															+        self.hm_disturb = hm_disturb
														
 
															+        self.lost_disturb = lost_disturb
														
 
															+        self.fp_disturb = fp_disturb
														
 
															+        self.pre_hm = pre_hm
														
 
															+        self.add_tracking = add_tracking
														
 
															+        self.add_ltrb_amodal = add_ltrb_amodal
														
 
															+
														
 
															+    def _get_pre_dets(self, input_h, input_w, trans_input_pre, gt_bbox_pre,
														
 
															+                      gt_class_pre, gt_track_id_pre):
														
 
															+        hm_h, hm_w = input_h, input_w
														
 
															+        reutrn_hm = self.pre_hm
														
 
															+        pre_hm = np.zeros(
														
 
															+            (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None
														
 
															+        pre_cts, track_ids = [], []
														
 
															+
														
 
															+        for i, (
														
 
															+                bbox, cls, track_id
														
 
															+        ) in enumerate(zip(gt_bbox_pre, gt_class_pre, gt_track_id_pre)):
														
 
															+            cls = int(cls)
														
 
															+            bbox[:2] = affine_transform(bbox[:2], trans_input_pre)
														
 
															+            bbox[2:] = affine_transform(bbox[2:], trans_input_pre)
														
 
															+            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
														
 
															+            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
														
 
															+            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
														
 
															+            max_rad = 1
														
 
															+            if (h > 0 and w > 0):
														
 
															+                radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
														
 
															+                radius = max(0, int(radius))
														
 
															+                max_rad = max(max_rad, radius)
														
 
															+                ct = np.array(
														
 
															+                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
														
 
															+                    dtype=np.float32)
														
 
															+                ct0 = ct.copy()
														
 
															+                conf = 1
														
 
															+
														
 
															+                ct[0] = ct[0] + np.random.randn() * self.hm_disturb * w
														
 
															+                ct[1] = ct[1] + np.random.randn() * self.hm_disturb * h
														
 
															+                conf = 1 if np.random.rand() > self.lost_disturb else 0
														
 
															+
														
 
															+                ct_int = ct.astype(np.int32)
														
 
															+                if conf == 0:
														
 
															+                    pre_cts.append(ct / self.down_ratio)
														
 
															+                else:
														
 
															+                    pre_cts.append(ct0 / self.down_ratio)
														
 
															+
														
 
															+                track_ids.append(track_id)
														
 
															+                if reutrn_hm:
														
 
															+                    draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf)
														
 
															+
														
 
															+                if np.random.rand() < self.fp_disturb and reutrn_hm:
														
 
															+                    ct2 = ct0.copy()
														
 
															+                    # Hard code heatmap disturb ratio, haven't tried other numbers.
														
 
															+                    ct2[0] = ct2[0] + np.random.randn() * 0.05 * w
														
 
															+                    ct2[1] = ct2[1] + np.random.randn() * 0.05 * h
														
 
															+                    ct2_int = ct2.astype(np.int32)
														
 
															+                    draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf)
														
 
															+        return pre_hm, pre_cts, track_ids
														
 
															+
														
 
															+    def __call__(self, sample, context=None):
														
 
															+        input_h, input_w = sample['image'].shape[1:]
														
 
															+        output_h = input_h // self.down_ratio
														
 
															+        output_w = input_w // self.down_ratio
														
 
															+        gt_bbox = sample['gt_bbox']
														
 
															+        gt_class = sample['gt_class']
														
 
															+
														
 
															+        # init
														
 
															+        hm = np.zeros((self.nc, output_h, output_w), dtype=np.float32)
														
 
															+        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
														
 
															+        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
														
 
															+        ind = np.zeros((self.max_objs), dtype=np.int64)
														
 
															+        reg_mask = np.zeros((self.max_objs), dtype=np.int32)
														
 
															+        if self.add_tracking:
														
 
															+            tr = np.zeros((self.max_objs, 2), dtype=np.float32)
														
 
															+        if self.add_ltrb_amodal:
														
 
															+            ltrb_amodal = np.zeros((self.max_objs, 4), dtype=np.float32)
														
 
															+
														
 
															+        trans_output = get_affine_transform(
														
 
															+            center=sample['center'],
														
 
															+            input_size=[sample['scale'], sample['scale']],
														
 
															+            rot=0,
														
 
															+            output_size=[output_w, output_h])
														
 
															+
														
 
															+        pre_hm, pre_cts, track_ids = self._get_pre_dets(
														
 
															+            input_h, input_w, sample['trans_input'], sample['pre_gt_bbox'],
														
 
															+            sample['pre_gt_class'], sample['pre_gt_track_id'])
														
 
															+
														
 
															+        for i, (bbox, cls) in enumerate(zip(gt_bbox, gt_class)):
														
 
															+            cls = int(cls)
														
 
															+            rect = np.array(
														
 
															+                [[bbox[0], bbox[1]], [bbox[0], bbox[3]], [bbox[2], bbox[3]],
														
 
															+                 [bbox[2], bbox[1]]],
														
 
															+                dtype=np.float32)
														
 
															+            for t in range(4):
														
 
															+                rect[t] = affine_transform(rect[t], trans_output)
														
 
															+                bbox[:2] = rect[:, 0].min(), rect[:, 1].min()
														
 
															+                bbox[2:] = rect[:, 0].max(), rect[:, 1].max()
														
 
															+
														
 
															+            bbox_amodal = copy.deepcopy(bbox)
														
 
															+            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
														
 
															+            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
														
 
															+
														
 
															+            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
														
 
															+            if h > 0 and w > 0:
														
 
															+                radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
														
 
															+                radius = max(0, int(radius))
														
 
															+                ct = np.array(
														
 
															+                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
														
 
															+                    dtype=np.float32)
														
 
															+                ct_int = ct.astype(np.int32)
														
 
															+
														
 
															+                # get hm,wh,reg,ind,ind_mask
														
 
															+                draw_umich_gaussian(hm[cls], ct_int, radius)
														
 
															+                wh[i] = 1. * w, 1. * h
														
 
															+                reg[i] = ct - ct_int
														
 
															+                ind[i] = ct_int[1] * output_w + ct_int[0]
														
 
															+                reg_mask[i] = 1
														
 
															+                if self.add_tracking:
														
 
															+                    if sample['gt_track_id'][i] in track_ids:
														
 
															+                        pre_ct = pre_cts[track_ids.index(sample['gt_track_id'][
														
 
															+                            i])]
														
 
															+                        tr[i] = pre_ct - ct_int
														
 
															+
														
 
															+                if self.add_ltrb_amodal:
														
 
															+                    ltrb_amodal[i] = \
														
 
															+                        bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \
														
 
															+                        bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1]
														
 
															+
														
 
															+        new_sample = {'image': sample['image']}
														
 
															+        new_sample['index'] = ind
														
 
															+        new_sample['index_mask'] = reg_mask
														
 
															+        new_sample['heatmap'] = hm
														
 
															+        new_sample['size'] = wh
														
 
															+        new_sample['offset'] = reg
														
 
															+        if self.add_tracking:
														
 
															+            new_sample['tracking'] = tr
														
 
															+        if self.add_ltrb_amodal:
														
 
															+            new_sample['ltrb_amodal'] = ltrb_amodal
														
 
															+
														
 
															+        new_sample['pre_image'] = sample['pre_image']
														
 
															+        new_sample['pre_hm'] = pre_hm
														
 
															+
														
 
															+        del sample
														
 
															+        return new_sample
														
--- a/paddlers/models/ppdet/data/transform/keypoint_operators.py
+++ b/paddlers/models/ppdet/data/transform/keypoint_operators.py
@@ -36,19 +36,12 @@ logger = setup_logger(__name__)
 
															 registered_ops = []
														
 
															 __all__ = [
														
 
															-    'RandomAffine',
														
 
															-    'KeyPointFlip',
														
 
															-    'TagGenerate',
														
 
															-    'ToHeatmaps',
														
 
															-    'NormalizePermute',
														
 
															-    'EvalAffine',
														
 
															-    'RandomFlipHalfBodyTransform',
														
 
															-    'TopDownAffine',
														
 
															-    'ToHeatmapsTopDown',
														
 
															-    'ToHeatmapsTopDown_DARK',
														
 
															-    'ToHeatmapsTopDown_UDP',
														
 
															-    'TopDownEvalAffine',
														
 
															-    'AugmentationbyInformantionDropping',
														
 
															+    'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps',
														
 
															+    'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform',
														
 
															+    'TopDownAffine', 'ToHeatmapsTopDown', 'ToHeatmapsTopDown_DARK',
														
 
															+    'ToHeatmapsTopDown_UDP', 'TopDownEvalAffine',
														
 
															+    'AugmentationbyInformantionDropping', 'SinglePoseAffine', 'NoiseJitter',
														
 
															+    'FlipPose', 'PETR_Resize'
														
 
															 ]
														
@@ -72,38 +65,77 @@ class KeyPointFlip(object):
 
															     """
														
 
															-    def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
														
 
															+    def __init__(self, flip_permutation, hmsize=None, flip_prob=0.5):
														
 
															         super(KeyPointFlip, self).__init__()
														
 
															         assert isinstance(flip_permutation, Sequence)
														
 
															         self.flip_permutation = flip_permutation
														
 
															         self.flip_prob = flip_prob
														
 
															         self.hmsize = hmsize
														
 
															-    def __call__(self, records):
														
 
															-        image = records['image']
														
 
															-        kpts_lst = records['joints']
														
 
															-        mask_lst = records['mask']
														
 
															-        flip = np.random.random() < self.flip_prob
														
 
															-        if flip:
														
 
															-            image = image[:, ::-1]
														
 
															-            for idx, hmsize in enumerate(self.hmsize):
														
 
															-                if len(mask_lst) > idx:
														
 
															-                    mask_lst[idx] = mask_lst[idx][:, ::-1]
														
 
															+    def _flipjoints(self, records, sizelst):
														
 
															+        '''
														
 
															+        records['gt_joints'] is Sequence in higherhrnet
														
 
															+        '''
														
 
															+        if not ('gt_joints' in records and len(records['gt_joints']) > 0):
														
 
															+            return records
														
 
															+
														
 
															+        kpts_lst = records['gt_joints']
														
 
															+        if isinstance(kpts_lst, Sequence):
														
 
															+            for idx, hmsize in enumerate(sizelst):
														
 
															                 if kpts_lst[idx].ndim == 3:
														
 
															                     kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
														
 
															                 else:
														
 
															                     kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
														
 
															                 kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
														
 
															-                kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
														
 
															-                kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
														
 
															-                kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
														
 
															-                kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
														
 
															-                kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
														
 
															-        records['image'] = image
														
 
															-        records['joints'] = kpts_lst
														
 
															+        else:
														
 
															+            hmsize = sizelst[0]
														
 
															+            if kpts_lst.ndim == 3:
														
 
															+                kpts_lst = kpts_lst[:, self.flip_permutation]
														
 
															+            else:
														
 
															+                kpts_lst = kpts_lst[self.flip_permutation]
														
 
															+            kpts_lst[..., 0] = hmsize - kpts_lst[..., 0]
														
 
															+
														
 
															+        records['gt_joints'] = kpts_lst
														
 
															+        return records
														
 
															+
														
 
															+    def _flipmask(self, records, sizelst):
														
 
															+        if not 'mask' in records:
														
 
															+            return records
														
 
															+
														
 
															+        mask_lst = records['mask']
														
 
															+        for idx, hmsize in enumerate(sizelst):
														
 
															+            if len(mask_lst) > idx:
														
 
															+                mask_lst[idx] = mask_lst[idx][:, ::-1]
														
 
															         records['mask'] = mask_lst
														
 
															         return records
														
 
															+    def _flipbbox(self, records, sizelst):
														
 
															+        if not 'gt_bbox' in records:
														
 
															+            return records
														
 
															+
														
 
															+        bboxes = records['gt_bbox']
														
 
															+        hmsize = sizelst[0]
														
 
															+        bboxes[:, 0::2] = hmsize - bboxes[:, 0::2][:, ::-1]
														
 
															+        bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, hmsize)
														
 
															+        records['gt_bbox'] = bboxes
														
 
															+        return records
														
 
															+
														
 
															+    def __call__(self, records):
														
 
															+        flip = np.random.random() < self.flip_prob
														
 
															+        if flip:
														
 
															+            image = records['image']
														
 
															+            image = image[:, ::-1]
														
 
															+            records['image'] = image
														
 
															+            if self.hmsize is None:
														
 
															+                sizelst = [image.shape[1]]
														
 
															+            else:
														
 
															+                sizelst = self.hmsize
														
 
															+            self._flipjoints(records, sizelst)
														
 
															+            self._flipmask(records, sizelst)
														
 
															+            self._flipbbox(records, sizelst)
														
 
															+
														
 
															+        return records
														
 
															+
														
 
															 @register_keypointop
														
 
															 class RandomAffine(object):
														
@@ -115,7 +147,7 @@ class RandomAffine(object):
 
															         max_scale (list[2]): the scale range to apply, transform range is [min, max]
														
 
															         max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
														
 
															         hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
														
 
															-        trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
														
 
															+        trainsize (list[2]): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
														
 
															         scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
														
 
															         records(dict): the dict contained the image, mask and coords
														
@@ -128,9 +160,10 @@ class RandomAffine(object):
 
															                  max_degree=30,
														
 
															                  scale=[0.75, 1.5],
														
 
															                  max_shift=0.2,
														
 
															-                 hmsize=[128, 256],
														
 
															-                 trainsize=512,
														
 
															-                 scale_type='short'):
														
 
															+                 hmsize=None,
														
 
															+                 trainsize=[512, 512],
														
 
															+                 scale_type='short',
														
 
															+                 boldervalue=[114, 114, 114]):
														
 
															         super(RandomAffine, self).__init__()
														
 
															         self.max_degree = max_degree
														
 
															         self.min_scale = scale[0]
														
@@ -139,8 +172,9 @@ class RandomAffine(object):
 
															         self.hmsize = hmsize
														
 
															         self.trainsize = trainsize
														
 
															         self.scale_type = scale_type
														
 
															+        self.boldervalue = boldervalue
														
 
															-    def _get_affine_matrix(self, center, scale, res, rot=0):
														
 
															+    def _get_affine_matrix_old(self, center, scale, res, rot=0):
														
 
															         """Generate transformation matrix."""
														
 
															         h = scale
														
 
															         t = np.zeros((3, 3), dtype=np.float32)
														
@@ -166,21 +200,94 @@ class RandomAffine(object):
 
															             t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
														
 
															         return t
														
 
															+    def _get_affine_matrix(self, center, scale, res, rot=0):
														
 
															+        """Generate transformation matrix."""
														
 
															+        w, h = scale
														
 
															+        t = np.zeros((3, 3), dtype=np.float32)
														
 
															+        t[0, 0] = float(res[0]) / w
														
 
															+        t[1, 1] = float(res[1]) / h
														
 
															+        t[0, 2] = res[0] * (-float(center[0]) / w + .5)
														
 
															+        t[1, 2] = res[1] * (-float(center[1]) / h + .5)
														
 
															+        t[2, 2] = 1
														
 
															+        if rot != 0:
														
 
															+            rot = -rot  # To match direction of rotation from cropping
														
 
															+            rot_mat = np.zeros((3, 3), dtype=np.float32)
														
 
															+            rot_rad = rot * np.pi / 180
														
 
															+            sn, cs = np.sin(rot_rad), np.cos(rot_rad)
														
 
															+            rot_mat[0, :2] = [cs, -sn]
														
 
															+            rot_mat[1, :2] = [sn, cs]
														
 
															+            rot_mat[2, 2] = 1
														
 
															+            # Need to rotate around center
														
 
															+            t_mat = np.eye(3)
														
 
															+            t_mat[0, 2] = -res[0] / 2
														
 
															+            t_mat[1, 2] = -res[1] / 2
														
 
															+            t_inv = t_mat.copy()
														
 
															+            t_inv[:2, 2] *= -1
														
 
															+            t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
														
 
															+        return t
														
 
															+
														
 
															+    def _affine_joints_mask(self,
														
 
															+                            degree,
														
 
															+                            center,
														
 
															+                            roi_size,
														
 
															+                            dsize,
														
 
															+                            keypoints=None,
														
 
															+                            heatmap_mask=None,
														
 
															+                            gt_bbox=None):
														
 
															+        kpts = None
														
 
															+        mask = None
														
 
															+        bbox = None
														
 
															+        mask_affine_mat = self._get_affine_matrix(center, roi_size, dsize,
														
 
															+                                                  degree)[:2]
														
 
															+        if heatmap_mask is not None:
														
 
															+            mask = cv2.warpAffine(heatmap_mask, mask_affine_mat, dsize)
														
 
															+            mask = ((mask / 255) > 0.5).astype(np.float32)
														
 
															+        if keypoints is not None:
														
 
															+            kpts = copy.deepcopy(keypoints)
														
 
															+            kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
														
 
															+                                                mask_affine_mat)
														
 
															+            kpts[(kpts[..., 0]) > dsize[0], :] = 0
														
 
															+            kpts[(kpts[..., 1]) > dsize[1], :] = 0
														
 
															+            kpts[(kpts[..., 0]) < 0, :] = 0
														
 
															+            kpts[(kpts[..., 1]) < 0, :] = 0
														
 
															+        if gt_bbox is not None:
														
 
															+            temp_bbox = gt_bbox[:, [0, 3, 2, 1]]
														
 
															+            cat_bbox = np.concatenate((gt_bbox, temp_bbox), axis=-1)
														
 
															+            gt_bbox_warped = warp_affine_joints(cat_bbox, mask_affine_mat)
														
 
															+            bbox = np.zeros_like(gt_bbox)
														
 
															+            bbox[:, 0] = gt_bbox_warped[:, 0::2].min(1).clip(0, dsize[0])
														
 
															+            bbox[:, 2] = gt_bbox_warped[:, 0::2].max(1).clip(0, dsize[0])
														
 
															+            bbox[:, 1] = gt_bbox_warped[:, 1::2].min(1).clip(0, dsize[1])
														
 
															+            bbox[:, 3] = gt_bbox_warped[:, 1::2].max(1).clip(0, dsize[1])
														
 
															+        return kpts, mask, bbox
														
 
															+
														
 
															     def __call__(self, records):
														
 
															         image = records['image']
														
 
															-        keypoints = records['joints']
														
 
															-        heatmap_mask = records['mask']
														
 
															+        shape = np.array(image.shape[:2][::-1])
														
 
															+        keypoints = None
														
 
															+        heatmap_mask = None
														
 
															+        gt_bbox = None
														
 
															+        if 'gt_joints' in records:
														
 
															+            keypoints = records['gt_joints']
														
 
															+
														
 
															+        if 'mask' in records:
														
 
															+            heatmap_mask = records['mask']
														
 
															+            heatmap_mask *= 255
														
 
															+
														
 
															+        if 'gt_bbox' in records:
														
 
															+            gt_bbox = records['gt_bbox']
														
 
															         degree = (np.random.random() * 2 - 1) * self.max_degree
														
 
															-        shape = np.array(image.shape[:2][::-1])
														
 
															         center = center = np.array((np.array(shape) / 2))
														
 
															         aug_scale = np.random.random() * (self.max_scale - self.min_scale
														
 
															                                           ) + self.min_scale
														
 
															         if self.scale_type == 'long':
														
 
															-            scale = max(shape[0], shape[1]) / 1.0
														
 
															+            scale = np.array([max(shape[0], shape[1]) / 1.0] * 2)
														
 
															         elif self.scale_type == 'short':
														
 
															-            scale = min(shape[0], shape[1]) / 1.0
														
 
															+            scale = np.array([min(shape[0], shape[1]) / 1.0] * 2)
														
 
															+        elif self.scale_type == 'wh':
														
 
															+            scale = shape
														
 
															         else:
														
 
															             raise ValueError('Unknown scale type: {}'.format(self.scale_type))
														
 
															         roi_size = aug_scale * scale
														
@@ -188,44 +295,55 @@ class RandomAffine(object):
 
															         dy = int(0)
														
 
															         if self.max_shift > 0:
														
 
															-            dx = np.random.randint(-self.max_shift * roi_size,
														
 
															-                                   self.max_shift * roi_size)
														
 
															-            dy = np.random.randint(-self.max_shift * roi_size,
														
 
															-                                   self.max_shift * roi_size)
														
 
															+            dx = np.random.randint(-self.max_shift * roi_size[0],
														
 
															+                                   self.max_shift * roi_size[0])
														
 
															+            dy = np.random.randint(-self.max_shift * roi_size[0],
														
 
															+                                   self.max_shift * roi_size[1])
														
 
															         center += np.array([dx, dy])
														
 
															         input_size = 2 * center
														
 
															+        if self.trainsize != -1:
														
 
															+            dsize = self.trainsize
														
 
															+            imgshape = (dsize)
														
 
															+        else:
														
 
															+            dsize = scale
														
 
															+            imgshape = (shape.tolist())
														
 
															-        keypoints[..., :2] *= shape
														
 
															-        heatmap_mask *= 255
														
 
															-        kpts_lst = []
														
 
															-        mask_lst = []
														
 
															-
														
 
															-        image_affine_mat = self._get_affine_matrix(
														
 
															-            center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
														
 
															+        image_affine_mat = self._get_affine_matrix(center, roi_size, dsize,
														
 
															+                                                   degree)[:2]
														
 
															         image = cv2.warpAffine(
														
 
															             image,
														
 
															-            image_affine_mat, (self.trainsize, self.trainsize),
														
 
															-            flags=cv2.INTER_LINEAR)
														
 
															+            image_affine_mat,
														
 
															+            imgshape,
														
 
															+            flags=cv2.INTER_LINEAR,
														
 
															+            borderValue=self.boldervalue)
														
 
															+
														
 
															+        if self.hmsize is None:
														
 
															+            kpts, mask, gt_bbox = self._affine_joints_mask(
														
 
															+                degree, center, roi_size, dsize, keypoints, heatmap_mask,
														
 
															+                gt_bbox)
														
 
															+            records['image'] = image
														
 
															+            if kpts is not None: records['gt_joints'] = kpts
														
 
															+            if mask is not None: records['mask'] = mask
														
 
															+            if gt_bbox is not None: records['gt_bbox'] = gt_bbox
														
 
															+            return records
														
 
															+
														
 
															+        kpts_lst = []
														
 
															+        mask_lst = []
														
 
															         for hmsize in self.hmsize:
														
 
															-            kpts = copy.deepcopy(keypoints)
														
 
															-            mask_affine_mat = self._get_affine_matrix(
														
 
															-                center, roi_size, (hmsize, hmsize), degree)[:2]
														
 
															-            if heatmap_mask is not None:
														
 
															-                mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
														
 
															-                                      (hmsize, hmsize))
														
 
															-                mask = ((mask / 255) > 0.5).astype(np.float32)
														
 
															-            kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
														
 
															-                                                mask_affine_mat)
														
 
															-            kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
														
 
															-            kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
														
 
															-            kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
														
 
															-            kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
														
 
															+            kpts, mask, gt_bbox = self._affine_joints_mask(
														
 
															+                degree, center, roi_size, [hmsize, hmsize], keypoints,
														
 
															+                heatmap_mask, gt_bbox)
														
 
															             kpts_lst.append(kpts)
														
 
															             mask_lst.append(mask)
														
 
															         records['image'] = image
														
 
															-        records['joints'] = kpts_lst
														
 
															-        records['mask'] = mask_lst
														
 
															+
														
 
															+        if 'gt_joints' in records:
														
 
															+            records['gt_joints'] = kpts_lst
														
 
															+        if 'mask' in records:
														
 
															+            records['mask'] = mask_lst
														
 
															+        if 'gt_bbox' in records:
														
 
															+            records['gt_bbox'] = gt_bbox
														
 
															         return records
														
@@ -258,9 +376,10 @@ class EvalAffine(object):
 
															         if mask is not None:
														
 
															             mask = cv2.warpAffine(mask, trans, size_resized)
														
 
															             records['mask'] = mask
														
 
															-        if 'joints' in records:
														
 
															-            del records['joints']
														
 
															+        if 'gt_joints' in records:
														
 
															+            del records['gt_joints']
														
 
															         records['image'] = image_resized
														
 
															+        records['scale_factor'] = self.size / min(h, w)
														
 
															         return records
														
@@ -310,7 +429,7 @@ class TagGenerate(object):
 
															         self.num_joints = num_joints
														
 
															     def __call__(self, records):
														
 
															-        kpts_lst = records['joints']
														
 
															+        kpts_lst = records['gt_joints']
														
 
															         kpts = kpts_lst[0]
														
 
															         tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64)
														
 
															         inds = np.where(kpts[..., 2] > 0)
														
@@ -322,7 +441,7 @@ class TagGenerate(object):
 
															         tagmap[p, j, 2] = visible[..., 0]  # x
														
 
															         tagmap[p, j, 3] = 1
														
 
															         records['tagmap'] = tagmap
														
 
															-        del records['joints']
														
 
															+        del records['gt_joints']
														
 
															         return records
														
@@ -356,7 +475,7 @@ class ToHeatmaps(object):
 
															         self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
														
 
															     def __call__(self, records):
														
 
															-        kpts_lst = records['joints']
														
 
															+        kpts_lst = records['gt_joints']
														
 
															         mask_lst = records['mask']
														
 
															         for idx, hmsize in enumerate(self.hmsize):
														
 
															             mask = mask_lst[idx]
														
@@ -477,7 +596,7 @@ class RandomFlipHalfBodyTransform(object):
 
															     def __call__(self, records):
														
 
															         image = records['image']
														
 
															-        joints = records['joints']
														
 
															+        joints = records['gt_joints']
														
 
															         joints_vis = records['joints_vis']
														
 
															         c = records['center']
														
 
															         s = records['scale']
														
@@ -500,7 +619,7 @@ class RandomFlipHalfBodyTransform(object):
 
															                 joints, joints_vis, image.shape[1], self.flip_pairs)
														
 
															             c[0] = image.shape[1] - c[0] - 1
														
 
															         records['image'] = image
														
 
															-        records['joints'] = joints
														
 
															+        records['gt_joints'] = joints
														
 
															         records['joints_vis'] = joints_vis
														
 
															         records['center'] = c
														
 
															         records['scale'] = s
														
@@ -560,7 +679,7 @@ class AugmentationbyInformantionDropping(object):
 
															     def __call__(self, records):
														
 
															         img = records['image']
														
 
															-        joints = records['joints']
														
 
															+        joints = records['gt_joints']
														
 
															         joints_vis = records['joints_vis']
														
 
															         if np.random.rand() < self.prob_cutout:
														
 
															             img = self._cutout(img, joints, joints_vis)
														
@@ -588,7 +707,7 @@ class TopDownAffine(object):
 
															     def __call__(self, records):
														
 
															         image = records['image']
														
 
															-        joints = records['joints']
														
 
															+        joints = records['gt_joints']
														
 
															         joints_vis = records['joints_vis']
														
 
															         rot = records['rotate'] if "rotate" in records else 0
														
 
															         if self.use_udp:
														
@@ -613,8 +732,171 @@ class TopDownAffine(object):
 
															                     joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
														
 
															         records['image'] = image
														
 
															-        records['joints'] = joints
														
 
															+        records['gt_joints'] = joints
														
 
															+
														
 
															+        return records
														
 
															+
														
 
															+
														
 
															+@register_keypointop
														
 
															+class SinglePoseAffine(object):
														
 
															+    """apply affine transform to image and coords
														
 
															+
														
 
															+    Args:
														
 
															+        trainsize (list): [w, h], the standard size used to train
														
 
															+        use_udp (bool): whether to use Unbiased Data Processing.
														
 
															+        records(dict): the dict contained the image and coords
														
 
															+
														
 
															+    Returns:
														
 
															+        records (dict): contain the image and coords after tranformed
														
 
															+
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 trainsize,
														
 
															+                 rotate=[1.0, 30],
														
 
															+                 scale=[1.0, 0.25],
														
 
															+                 use_udp=False):
														
 
															+        self.trainsize = trainsize
														
 
															+        self.use_udp = use_udp
														
 
															+        self.rot_prob = rotate[0]
														
 
															+        self.rot_range = rotate[1]
														
 
															+        self.scale_prob = scale[0]
														
 
															+        self.scale_ratio = scale[1]
														
 
															+
														
 
															+    def __call__(self, records):
														
 
															+        image = records['image']
														
 
															+        if 'joints_2d' in records:
														
 
															+            joints = records['joints_2d'] if 'joints_2d' in records else None
														
 
															+            joints_vis = records[
														
 
															+                'joints_vis'] if 'joints_vis' in records else np.ones(
														
 
															+                    (len(joints), 1))
														
 
															+        rot = 0
														
 
															+        s = 1.
														
 
															+        if np.random.random() < self.rot_prob:
														
 
															+            rot = np.clip(np.random.randn() * self.rot_range,
														
 
															+                          -self.rot_range * 2, self.rot_range * 2)
														
 
															+        if np.random.random() < self.scale_prob:
														
 
															+            s = np.clip(np.random.randn() * self.scale_ratio + 1,
														
 
															+                        1 - self.scale_ratio, 1 + self.scale_ratio)
														
 
															+
														
 
															+        if self.use_udp:
														
 
															+            trans = get_warp_matrix(
														
 
															+                rot,
														
 
															+                np.array(records['bbox_center']) * 2.0,
														
 
															+                [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
														
 
															+                records['bbox_scale'] * 200.0 * s)
														
 
															+            image = cv2.warpAffine(
														
 
															+                image,
														
 
															+                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
														
 
															+                flags=cv2.INTER_LINEAR)
														
 
															+            if 'joints_2d' in records:
														
 
															+                joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(),
														
 
															+                                                    trans)
														
 
															+        else:
														
 
															+            trans = get_affine_transform(
														
 
															+                np.array(records['bbox_center']),
														
 
															+                records['bbox_scale'] * s * 200, rot, self.trainsize)
														
 
															+            image = cv2.warpAffine(
														
 
															+                image,
														
 
															+                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
														
 
															+                flags=cv2.INTER_LINEAR)
														
 
															+            if 'joints_2d' in records:
														
 
															+                for i in range(len(joints)):
														
 
															+                    if joints_vis[i, 0] > 0.0:
														
 
															+                        joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
														
 
															+
														
 
															+        if 'joints_3d' in records:
														
 
															+            pose3d = records['joints_3d']
														
 
															+            if not rot == 0:
														
 
															+                trans_3djoints = np.eye(3)
														
 
															+                rot_rad = -rot * np.pi / 180
														
 
															+                sn, cs = np.sin(rot_rad), np.cos(rot_rad)
														
 
															+                trans_3djoints[0, :2] = [cs, -sn]
														
 
															+                trans_3djoints[1, :2] = [sn, cs]
														
 
															+                pose3d[:, :3] = np.einsum('ij,kj->ki', trans_3djoints,
														
 
															+                                          pose3d[:, :3])
														
 
															+                records['joints_3d'] = pose3d
														
 
															+
														
 
															+        records['image'] = image
														
 
															+        if 'joints_2d' in records:
														
 
															+            records['joints_2d'] = joints
														
 
															+
														
 
															+        return records
														
 
															+
														
 
															+
														
 
															+@register_keypointop
														
 
															+class NoiseJitter(object):
														
 
															+    """apply NoiseJitter to image
														
 
															+
														
 
															+    Args:
														
 
															+        noise_factor (float): the noise factor ratio used to generate the jitter
														
 
															+
														
 
															+    Returns:
														
 
															+        records (dict): contain the image and coords after tranformed
														
 
															+
														
 
															+    """
														
 
															+    def __init__(self, noise_factor=0.4):
														
 
															+        self.noise_factor = noise_factor
														
 
															+
														
 
															+    def __call__(self, records):
														
 
															+        self.pn = np.random.uniform(1 - self.noise_factor,
														
 
															+                                    1 + self.noise_factor, 3)
														
 
															+        rgb_img = records['image']
														
 
															+        rgb_img[:, :, 0] = np.minimum(
														
 
															+            255.0, np.maximum(0.0, rgb_img[:, :, 0] * self.pn[0]))
														
 
															+        rgb_img[:, :, 1] = np.minimum(
														
 
															+            255.0, np.maximum(0.0, rgb_img[:, :, 1] * self.pn[1]))
														
 
															+        rgb_img[:, :, 2] = np.minimum(
														
 
															+            255.0, np.maximum(0.0, rgb_img[:, :, 2] * self.pn[2]))
														
 
															+        records['image'] = rgb_img
														
 
															+        return records
														
 
															+
														
 
															+
														
 
															+@register_keypointop
														
 
															+class FlipPose(object):
														
 
															+    """random apply flip to image
														
 
															+
														
 
															+    Args:
														
 
															+        noise_factor (float): the noise factor ratio used to generate the jitter
														
 
															+
														
 
															+    Returns:
														
 
															+        records (dict): contain the image and coords after tranformed
														
 
															+
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, flip_prob=0.5, img_res=224, num_joints=14):
														
 
															+        self.flip_pob = flip_prob
														
 
															+        self.img_res = img_res
														
 
															+        if num_joints == 24:
														
 
															+            self.perm = [
														
 
															+                5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17,
														
 
															+                18, 19, 21, 20, 23, 22
														
 
															+            ]
														
 
															+        elif num_joints == 14:
														
 
															+            self.perm = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13]
														
 
															+        else:
														
 
															+            print("error num_joints in flip :{}".format(num_joints))
														
 
															+
														
 
															+    def __call__(self, records):
														
 
															+
														
 
															+        if np.random.random() < self.flip_pob:
														
 
															+            img = records['image']
														
 
															+            img = np.fliplr(img)
														
 
															+
														
 
															+            if 'joints_2d' in records:
														
 
															+                joints_2d = records['joints_2d']
														
 
															+                joints_2d = joints_2d[self.perm]
														
 
															+                joints_2d[:, 0] = self.img_res - joints_2d[:, 0]
														
 
															+                records['joints_2d'] = joints_2d
														
 
															+
														
 
															+            if 'joints_3d' in records:
														
 
															+                joints_3d = records['joints_3d']
														
 
															+                joints_3d = joints_3d[self.perm]
														
 
															+                joints_3d[:, 0] = -joints_3d[:, 0]
														
 
															+                records['joints_3d'] = joints_3d
														
 
															+
														
 
															+            records['image'] = img
														
 
															         return records
														
@@ -686,7 +968,7 @@ class ToHeatmapsTopDown(object):
 
															             https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
														
 
															             Copyright (c) Microsoft, under the MIT License.
														
 
															         """
														
 
															-        joints = records['joints']
														
 
															+        joints = records['gt_joints']
														
 
															         joints_vis = records['joints_vis']
														
 
															         num_joints = joints.shape[0]
														
 
															         image_size = np.array(
														
@@ -729,7 +1011,7 @@ class ToHeatmapsTopDown(object):
 
															                     0]:g_y[1], g_x[0]:g_x[1]]
														
 
															         records['target'] = target
														
 
															         records['target_weight'] = target_weight
														
 
															-        del records['joints'], records['joints_vis']
														
 
															+        del records['gt_joints'], records['joints_vis']
														
 
															         return records
														
@@ -754,7 +1036,7 @@ class ToHeatmapsTopDown_DARK(object):
 
															         self.sigma = sigma
														
 
															     def __call__(self, records):
														
 
															-        joints = records['joints']
														
 
															+        joints = records['gt_joints']
														
 
															         joints_vis = records['joints_vis']
														
 
															         num_joints = joints.shape[0]
														
 
															         image_size = np.array(
														
@@ -787,7 +1069,7 @@ class ToHeatmapsTopDown_DARK(object):
 
															                     (x - mu_x)**2 + (y - mu_y)**2) / (2 * self.sigma**2))
														
 
															         records['target'] = target
														
 
															         records['target_weight'] = target_weight
														
 
															-        del records['joints'], records['joints_vis']
														
 
															+        del records['gt_joints'], records['joints_vis']
														
 
															         return records
														
@@ -816,7 +1098,7 @@ class ToHeatmapsTopDown_UDP(object):
 
															         self.sigma = sigma
														
 
															     def __call__(self, records):
														
 
															-        joints = records['joints']
														
 
															+        joints = records['gt_joints']
														
 
															         joints_vis = records['joints_vis']
														
 
															         num_joints = joints.shape[0]
														
 
															         image_size = np.array(
														
@@ -861,6 +1143,471 @@ class ToHeatmapsTopDown_UDP(object):
 
															                     0]:g_y[1], g_x[0]:g_x[1]]
														
 
															         records['target'] = target
														
 
															         records['target_weight'] = target_weight
														
 
															-        del records['joints'], records['joints_vis']
														
 
															+        del records['gt_joints'], records['joints_vis']
														
 
															         return records
														
 
															+
														
 
															+
														
 
															+from typing import Optional, Tuple, Union, List
														
 
															+import numbers
														
 
															+
														
 
															+
														
 
															+def _scale_size(
														
 
															+        size: Tuple[int, int],
														
 
															+        scale: Union[float, int, tuple], ) -> Tuple[int, int]:
														
 
															+    """Rescale a size by a ratio.
														
 
															+
														
 
															+    Args:
														
 
															+        size (tuple[int]): (w, h).
														
 
															+        scale (float | tuple(float)): Scaling factor.
														
 
															+
														
 
															+    Returns:
														
 
															+        tuple[int]: scaled size.
														
 
															+    """
														
 
															+    if isinstance(scale, (float, int)):
														
 
															+        scale = (scale, scale)
														
 
															+    w, h = size
														
 
															+    return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)
														
 
															+
														
 
															+
														
 
															+def rescale_size(old_size: tuple,
														
 
															+                 scale: Union[float, int, tuple],
														
 
															+                 return_scale: bool=False) -> tuple:
														
 
															+    """Calculate the new size to be rescaled to.
														
 
															+
														
 
															+    Args:
														
 
															+        old_size (tuple[int]): The old size (w, h) of image.
														
 
															+        scale (float | tuple[int]): The scaling factor or maximum size.
														
 
															+            If it is a float number, then the image will be rescaled by this
														
 
															+            factor, else if it is a tuple of 2 integers, then the image will
														
 
															+            be rescaled as large as possible within the scale.
														
 
															+        return_scale (bool): Whether to return the scaling factor besides the
														
 
															+            rescaled image size.
														
 
															+
														
 
															+    Returns:
														
 
															+        tuple[int]: The new rescaled image size.
														
 
															+    """
														
 
															+    w, h = old_size
														
 
															+    if isinstance(scale, (float, int)):
														
 
															+        if scale <= 0:
														
 
															+            raise ValueError(f'Invalid scale {scale}, must be positive.')
														
 
															+        scale_factor = scale
														
 
															+    elif isinstance(scale, list):
														
 
															+        max_long_edge = max(scale)
														
 
															+        max_short_edge = min(scale)
														
 
															+        scale_factor = min(max_long_edge / max(h, w),
														
 
															+                           max_short_edge / min(h, w))
														
 
															+    else:
														
 
															+        raise TypeError(
														
 
															+            f'Scale must be a number or tuple of int, but got {type(scale)}')
														
 
															+
														
 
															+    new_size = _scale_size((w, h), scale_factor)
														
 
															+
														
 
															+    if return_scale:
														
 
															+        return new_size, scale_factor
														
 
															+    else:
														
 
															+        return new_size
														
 
															+
														
 
															+
														
 
															+def imrescale(img: np.ndarray,
														
 
															+              scale: Union[float, Tuple[int, int]],
														
 
															+              return_scale: bool=False,
														
 
															+              interpolation: str='bilinear',
														
 
															+              backend: Optional[str]=None) -> Union[np.ndarray, Tuple[
														
 
															+                  np.ndarray, float]]:
														
 
															+    """Resize image while keeping the aspect ratio.
														
 
															+
														
 
															+    Args:
														
 
															+        img (ndarray): The input image.
														
 
															+        scale (float | tuple[int]): The scaling factor or maximum size.
														
 
															+            If it is a float number, then the image will be rescaled by this
														
 
															+            factor, else if it is a tuple of 2 integers, then the image will
														
 
															+            be rescaled as large as possible within the scale.
														
 
															+        return_scale (bool): Whether to return the scaling factor besides the
														
 
															+            rescaled image.
														
 
															+        interpolation (str): Same as :func:`resize`.
														
 
															+        backend (str | None): Same as :func:`resize`.
														
 
															+
														
 
															+    Returns:
														
 
															+        ndarray: The rescaled image.
														
 
															+    """
														
 
															+    h, w = img.shape[:2]
														
 
															+    new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
														
 
															+    rescaled_img = imresize(
														
 
															+        img, new_size, interpolation=interpolation, backend=backend)
														
 
															+    if return_scale:
														
 
															+        return rescaled_img, scale_factor
														
 
															+    else:
														
 
															+        return rescaled_img
														
 
															+
														
 
															+
														
 
															+def imresize(
														
 
															+        img: np.ndarray,
														
 
															+        size: Tuple[int, int],
														
 
															+        return_scale: bool=False,
														
 
															+        interpolation: str='bilinear',
														
 
															+        out: Optional[np.ndarray]=None,
														
 
															+        backend: Optional[str]=None,
														
 
															+        interp=cv2.INTER_LINEAR, ) -> Union[Tuple[np.ndarray, float, float],
														
 
															+                                            np.ndarray]:
														
 
															+    """Resize image to a given size.
														
 
															+
														
 
															+    Args:
														
 
															+        img (ndarray): The input image.
														
 
															+        size (tuple[int]): Target size (w, h).
														
 
															+        return_scale (bool): Whether to return `w_scale` and `h_scale`.
														
 
															+        interpolation (str): Interpolation method, accepted values are
														
 
															+            "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
														
 
															+            backend, "nearest", "bilinear" for 'pillow' backend.
														
 
															+        out (ndarray): The output destination.
														
 
															+        backend (str | None): The image resize backend type. Options are `cv2`,
														
 
															+            `pillow`, `None`. If backend is None, the global imread_backend
														
 
															+            specified by ``mmcv.use_backend()`` will be used. Default: None.
														
 
															+
														
 
															+    Returns:
														
 
															+        tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
														
 
															+        `resized_img`.
														
 
															+    """
														
 
															+    h, w = img.shape[:2]
														
 
															+    if backend is None:
														
 
															+        backend = imread_backend
														
 
															+    if backend not in ['cv2', 'pillow']:
														
 
															+        raise ValueError(f'backend: {backend} is not supported for resize.'
														
 
															+                         f"Supported backends are 'cv2', 'pillow'")
														
 
															+
														
 
															+    if backend == 'pillow':
														
 
															+        assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
														
 
															+        pil_image = Image.fromarray(img)
														
 
															+        pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
														
 
															+        resized_img = np.array(pil_image)
														
 
															+    else:
														
 
															+        resized_img = cv2.resize(img, size, dst=out, interpolation=interp)
														
 
															+    if not return_scale:
														
 
															+        return resized_img
														
 
															+    else:
														
 
															+        w_scale = size[0] / w
														
 
															+        h_scale = size[1] / h
														
 
															+        return resized_img, w_scale, h_scale
														
 
															+
														
 
															+
														
 
															+class PETR_Resize:
														
 
															+    """Resize images & bbox & mask.
														
 
															+
														
 
															+    This transform resizes the input image to some scale. Bboxes and masks are
														
 
															+    then resized with the same scale factor. If the input dict contains the key
														
 
															+    "scale", then the scale in the input dict is used, otherwise the specified
														
 
															+    scale in the init method is used. If the input dict contains the key
														
 
															+    "scale_factor" (if MultiScaleFlipAug does not give img_scale but
														
 
															+    scale_factor), the actual scale will be computed by image shape and
														
 
															+    scale_factor.
														
 
															+
														
 
															+    `img_scale` can either be a tuple (single-scale) or a list of tuple
														
 
															+    (multi-scale). There are 3 multiscale modes:
														
 
															+
														
 
															+    - ``ratio_range is not None``: randomly sample a ratio from the ratio \
														
 
															+      range and multiply it with the image scale.
														
 
															+    - ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly \
														
 
															+      sample a scale from the multiscale range.
														
 
															+    - ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly \
														
 
															+      sample a scale from multiple scales.
														
 
															+
														
 
															+    Args:
														
 
															+        img_scale (tuple or list[tuple]): Images scales for resizing.
														
 
															+        multiscale_mode (str): Either "range" or "value".
														
 
															+        ratio_range (tuple[float]): (min_ratio, max_ratio)
														
 
															+        keep_ratio (bool): Whether to keep the aspect ratio when resizing the
														
 
															+            image.
														
 
															+        bbox_clip_border (bool, optional): Whether to clip the objects outside
														
 
															+            the border of the image. In some dataset like MOT17, the gt bboxes
														
 
															+            are allowed to cross the border of images. Therefore, we don't
														
 
															+            need to clip the gt bboxes in these cases. Defaults to True.
														
 
															+        backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
														
 
															+            These two backends generates slightly different results. Defaults
														
 
															+            to 'cv2'.
														
 
															+        interpolation (str): Interpolation method, accepted values are
														
 
															+            "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
														
 
															+            backend, "nearest", "bilinear" for 'pillow' backend.
														
 
															+        override (bool, optional): Whether to override `scale` and
														
 
															+            `scale_factor` so as to call resize twice. Default False. If True,
														
 
															+            after the first resizing, the existed `scale` and `scale_factor`
														
 
															+            will be ignored so the second resizing can be allowed.
														
 
															+            This option is a work-around for multiple times of resize in DETR.
														
 
															+            Defaults to False.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 img_scale=None,
														
 
															+                 multiscale_mode='range',
														
 
															+                 ratio_range=None,
														
 
															+                 keep_ratio=True,
														
 
															+                 bbox_clip_border=True,
														
 
															+                 backend='cv2',
														
 
															+                 interpolation='bilinear',
														
 
															+                 override=False,
														
 
															+                 keypoint_clip_border=True):
														
 
															+        if img_scale is None:
														
 
															+            self.img_scale = None
														
 
															+        else:
														
 
															+            if isinstance(img_scale, list):
														
 
															+                self.img_scale = img_scale
														
 
															+            else:
														
 
															+                self.img_scale = [img_scale]
														
 
															+            assert isinstance(self.img_scale, list)
														
 
															+
														
 
															+        if ratio_range is not None:
														
 
															+            # mode 1: given a scale and a range of image ratio
														
 
															+            assert len(self.img_scale) == 1
														
 
															+        else:
														
 
															+            # mode 2: given multiple scales or a range of scales
														
 
															+            assert multiscale_mode in ['value', 'range']
														
 
															+
														
 
															+        self.backend = backend
														
 
															+        self.multiscale_mode = multiscale_mode
														
 
															+        self.ratio_range = ratio_range
														
 
															+        self.keep_ratio = keep_ratio
														
 
															+        # TODO: refactor the override option in Resize
														
 
															+        self.interpolation = interpolation
														
 
															+        self.override = override
														
 
															+        self.bbox_clip_border = bbox_clip_border
														
 
															+        self.keypoint_clip_border = keypoint_clip_border
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def random_select(img_scales):
														
 
															+        """Randomly select an img_scale from given candidates.
														
 
															+
														
 
															+        Args:
														
 
															+            img_scales (list[tuple]): Images scales for selection.
														
 
															+
														
 
															+        Returns:
														
 
															+            (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \
														
 
															+                where ``img_scale`` is the selected image scale and \
														
 
															+                ``scale_idx`` is the selected index in the given candidates.
														
 
															+        """
														
 
															+
														
 
															+        assert isinstance(img_scales, list)
														
 
															+        scale_idx = np.random.randint(len(img_scales))
														
 
															+        img_scale = img_scales[scale_idx]
														
 
															+        return img_scale, scale_idx
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def random_sample(img_scales):
														
 
															+        """Randomly sample an img_scale when ``multiscale_mode=='range'``.
														
 
															+
														
 
															+        Args:
														
 
															+            img_scales (list[tuple]): Images scale range for sampling.
														
 
															+                There must be two tuples in img_scales, which specify the lower
														
 
															+                and upper bound of image scales.
														
 
															+
														
 
															+        Returns:
														
 
															+            (tuple, None): Returns a tuple ``(img_scale, None)``, where \
														
 
															+                ``img_scale`` is sampled scale and None is just a placeholder \
														
 
															+                to be consistent with :func:`random_select`.
														
 
															+        """
														
 
															+
														
 
															+        assert isinstance(img_scales, list) and len(img_scales) == 2
														
 
															+        img_scale_long = [max(s) for s in img_scales]
														
 
															+        img_scale_short = [min(s) for s in img_scales]
														
 
															+        long_edge = np.random.randint(
														
 
															+            min(img_scale_long), max(img_scale_long) + 1)
														
 
															+        short_edge = np.random.randint(
														
 
															+            min(img_scale_short), max(img_scale_short) + 1)
														
 
															+        img_scale = (long_edge, short_edge)
														
 
															+        return img_scale, None
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def random_sample_ratio(img_scale, ratio_range):
														
 
															+        """Randomly sample an img_scale when ``ratio_range`` is specified.
														
 
															+
														
 
															+        A ratio will be randomly sampled from the range specified by
														
 
															+        ``ratio_range``. Then it would be multiplied with ``img_scale`` to
														
 
															+        generate sampled scale.
														
 
															+
														
 
															+        Args:
														
 
															+            img_scale (list): Images scale base to multiply with ratio.
														
 
															+            ratio_range (tuple[float]): The minimum and maximum ratio to scale
														
 
															+                the ``img_scale``.
														
 
															+
														
 
															+        Returns:
														
 
															+            (tuple, None): Returns a tuple ``(scale, None)``, where \
														
 
															+                ``scale`` is sampled ratio multiplied with ``img_scale`` and \
														
 
															+                None is just a placeholder to be consistent with \
														
 
															+                :func:`random_select`.
														
 
															+        """
														
 
															+
														
 
															+        assert isinstance(img_scale, list) and len(img_scale) == 2
														
 
															+        min_ratio, max_ratio = ratio_range
														
 
															+        assert min_ratio <= max_ratio
														
 
															+        ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
														
 
															+        scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
														
 
															+        return scale, None
														
 
															+
														
 
															+    def _random_scale(self, results):
														
 
															+        """Randomly sample an img_scale according to ``ratio_range`` and
														
 
															+        ``multiscale_mode``.
														
 
															+
														
 
															+        If ``ratio_range`` is specified, a ratio will be sampled and be
														
 
															+        multiplied with ``img_scale``.
														
 
															+        If multiple scales are specified by ``img_scale``, a scale will be
														
 
															+        sampled according to ``multiscale_mode``.
														
 
															+        Otherwise, single scale will be used.
														
 
															+
														
 
															+        Args:
														
 
															+            results (dict): Result dict from :obj:`dataset`.
														
 
															+
														
 
															+        Returns:
														
 
															+            dict: Two new keys 'scale` and 'scale_idx` are added into \
														
 
															+                ``results``, which would be used by subsequent pipelines.
														
 
															+        """
														
 
															+
														
 
															+        if self.ratio_range is not None:
														
 
															+            scale, scale_idx = self.random_sample_ratio(self.img_scale[0],
														
 
															+                                                        self.ratio_range)
														
 
															+        elif len(self.img_scale) == 1:
														
 
															+            scale, scale_idx = self.img_scale[0], 0
														
 
															+        elif self.multiscale_mode == 'range':
														
 
															+            scale, scale_idx = self.random_sample(self.img_scale)
														
 
															+        elif self.multiscale_mode == 'value':
														
 
															+            scale, scale_idx = self.random_select(self.img_scale)
														
 
															+        else:
														
 
															+            raise NotImplementedError
														
 
															+        results['scale'] = scale
														
 
															+        results['scale_idx'] = scale_idx
														
 
															+
														
 
															+    def _resize_img(self, results):
														
 
															+        """Resize images with ``results['scale']``."""
														
 
															+        for key in ['image'] if 'image' in results else []:
														
 
															+            if self.keep_ratio:
														
 
															+                img, scale_factor = imrescale(
														
 
															+                    results[key],
														
 
															+                    results['scale'],
														
 
															+                    return_scale=True,
														
 
															+                    interpolation=self.interpolation,
														
 
															+                    backend=self.backend)
														
 
															+                # the w_scale and h_scale has minor difference
														
 
															+                # a real fix should be done in the imrescale in the future
														
 
															+                new_h, new_w = img.shape[:2]
														
 
															+                h, w = results[key].shape[:2]
														
 
															+                w_scale = new_w / w
														
 
															+                h_scale = new_h / h
														
 
															+            else:
														
 
															+                img, w_scale, h_scale = imresize(
														
 
															+                    results[key],
														
 
															+                    results['scale'],
														
 
															+                    return_scale=True,
														
 
															+                    interpolation=self.interpolation,
														
 
															+                    backend=self.backend)
														
 
															+
														
 
															+            scale_factor = np.array(
														
 
															+                [w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
														
 
															+            results['im_shape'] = np.array(img.shape)
														
 
															+            # in case that there is no padding
														
 
															+            results['pad_shape'] = img.shape
														
 
															+            results['scale_factor'] = scale_factor
														
 
															+            results['keep_ratio'] = self.keep_ratio
														
 
															+            # img_pad = self.impad(img, shape=results['scale'])
														
 
															+            results[key] = img
														
 
															+
														
 
															+    def _resize_bboxes(self, results):
														
 
															+        """Resize bounding boxes with ``results['scale_factor']``."""
														
 
															+        for key in ['gt_bbox'] if 'gt_bbox' in results else []:
														
 
															+            bboxes = results[key] * results['scale_factor']
														
 
															+            if self.bbox_clip_border:
														
 
															+                img_shape = results['im_shape']
														
 
															+                bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
														
 
															+                bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
														
 
															+            results[key] = bboxes
														
 
															+
														
 
															+    def _resize_masks(self, results):
														
 
															+        """Resize masks with ``results['scale']``"""
														
 
															+        for key in ['mask'] if 'mask' in results else []:
														
 
															+            if results[key] is None:
														
 
															+                continue
														
 
															+            if self.keep_ratio:
														
 
															+                results[key] = results[key].rescale(results['scale'])
														
 
															+            else:
														
 
															+                results[key] = results[key].resize(results['im_shape'][:2])
														
 
															+
														
 
															+    def _resize_seg(self, results):
														
 
															+        """Resize semantic segmentation map with ``results['scale']``."""
														
 
															+        for key in ['seg'] if 'seg' in results else []:
														
 
															+            if self.keep_ratio:
														
 
															+                gt_seg = imrescale(
														
 
															+                    results[key],
														
 
															+                    results['scale'],
														
 
															+                    interpolation='nearest',
														
 
															+                    backend=self.backend)
														
 
															+            else:
														
 
															+                gt_seg = imresize(
														
 
															+                    results[key],
														
 
															+                    results['scale'],
														
 
															+                    interpolation='nearest',
														
 
															+                    backend=self.backend)
														
 
															+            results[key] = gt_seg
														
 
															+
														
 
															+    def _resize_keypoints(self, results):
														
 
															+        """Resize keypoints with ``results['scale_factor']``."""
														
 
															+        for key in ['gt_joints'] if 'gt_joints' in results else []:
														
 
															+            keypoints = results[key].copy()
														
 
															+            keypoints[..., 0] = keypoints[..., 0] * results['scale_factor'][0]
														
 
															+            keypoints[..., 1] = keypoints[..., 1] * results['scale_factor'][1]
														
 
															+            if self.keypoint_clip_border:
														
 
															+                img_shape = results['im_shape']
														
 
															+                keypoints[..., 0] = np.clip(keypoints[..., 0], 0, img_shape[1])
														
 
															+                keypoints[..., 1] = np.clip(keypoints[..., 1], 0, img_shape[0])
														
 
															+            results[key] = keypoints
														
 
															+
														
 
															+    def _resize_areas(self, results):
														
 
															+        """Resize mask areas with ``results['scale_factor']``."""
														
 
															+        for key in ['gt_areas'] if 'gt_areas' in results else []:
														
 
															+            areas = results[key].copy()
														
 
															+            areas = areas * results['scale_factor'][0] * results[
														
 
															+                'scale_factor'][1]
														
 
															+            results[key] = areas
														
 
															+
														
 
															+    def __call__(self, results):
														
 
															+        """Call function to resize images, bounding boxes, masks, semantic
														
 
															+        segmentation map.
														
 
															+
														
 
															+        Args:
														
 
															+            results (dict): Result dict from loading pipeline.
														
 
															+
														
 
															+        Returns:
														
 
															+            dict: Resized results, 'im_shape', 'pad_shape', 'scale_factor', \
														
 
															+                'keep_ratio' keys are added into result dict.
														
 
															+        """
														
 
															+        if 'scale' not in results:
														
 
															+            if 'scale_factor' in results:
														
 
															+                img_shape = results['image'].shape[:2]
														
 
															+                scale_factor = results['scale_factor'][0]
														
 
															+                # assert isinstance(scale_factor, float)
														
 
															+                results['scale'] = [int(x * scale_factor)
														
 
															+                                    for x in img_shape][::-1]
														
 
															+            else:
														
 
															+                self._random_scale(results)
														
 
															+        else:
														
 
															+            if not self.override:
														
 
															+                assert 'scale_factor' not in results, (
														
 
															+                    'scale and scale_factor cannot be both set.')
														
 
															+            else:
														
 
															+                results.pop('scale')
														
 
															+                if 'scale_factor' in results:
														
 
															+                    results.pop('scale_factor')
														
 
															+                self._random_scale(results)
														
 
															+
														
 
															+        self._resize_img(results)
														
 
															+        self._resize_bboxes(results)
														
 
															+        self._resize_masks(results)
														
 
															+        self._resize_seg(results)
														
 
															+        self._resize_keypoints(results)
														
 
															+        self._resize_areas(results)
														
 
															+        return results
														
 
															+
														
 
															+    def __repr__(self):
														
 
															+        repr_str = self.__class__.__name__
														
 
															+        repr_str += f'(img_scale={self.img_scale}, '
														
 
															+        repr_str += f'multiscale_mode={self.multiscale_mode}, '
														
 
															+        repr_str += f'ratio_range={self.ratio_range}, '
														
 
															+        repr_str += f'keep_ratio={self.keep_ratio}, '
														
 
															+        repr_str += f'bbox_clip_border={self.bbox_clip_border})'
														
 
															+        repr_str += f'keypoint_clip_border={self.keypoint_clip_border})'
														
 
															+        return repr_str
														
--- a/paddlers/models/ppdet/data/transform/keypoints_3d_operators.py
+++ b/paddlers/models/ppdet/data/transform/keypoints_3d_operators.py
@@ -0,0 +1,296 @@
 
															+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+
														
 
															+try:
														
 
															+    from collections.abc import Sequence
														
 
															+except Exception:
														
 
															+    from collections import Sequence
														
 
															+import cv2
														
 
															+import numpy as np
														
 
															+import math
														
 
															+import copy
														
 
															+import random
														
 
															+import uuid
														
 
															+from numbers import Number, Integral
														
 
															+
														
 
															+from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
														
 
															+from paddlers.models.ppdet.core.workspace import serializable
														
 
															+from paddlers.models.ppdet.utils.logger import setup_logger
														
 
															+logger = setup_logger(__name__)
														
 
															+
														
 
															+registered_ops = []
														
 
															+
														
 
															+__all__ = [
														
 
															+    'CropAndFlipImages', 'PermuteImages', 'RandomFlipHalfBody3DTransformImages'
														
 
															+]
														
 
															+
														
 
															+import matplotlib.pyplot as plt
														
 
															+from PIL import Image, ImageDraw
														
 
															+from mpl_toolkits.mplot3d import Axes3D
														
 
															+
														
 
															+
														
 
															+def register_keypointop(cls):
														
 
															+    return serializable(cls)
														
 
															+
														
 
															+
														
 
															+def register_op(cls):
														
 
															+    registered_ops.append(cls.__name__)
														
 
															+    if not hasattr(BaseOperator, cls.__name__):
														
 
															+        setattr(BaseOperator, cls.__name__, cls)
														
 
															+    else:
														
 
															+        raise KeyError("The {} class has been registered.".format(cls.__name__))
														
 
															+    return serializable(cls)
														
 
															+
														
 
															+
														
 
															+class BaseOperator(object):
														
 
															+    def __init__(self, name=None):
														
 
															+        if name is None:
														
 
															+            name = self.__class__.__name__
														
 
															+        self._id = name + '_' + str(uuid.uuid4())[-6:]
														
 
															+
														
 
															+    def apply(self, sample, context=None):
														
 
															+        """ Process a sample.
														
 
															+        Args:
														
 
															+            sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
														
 
															+            context (dict): info about this sample processing
														
 
															+        Returns:
														
 
															+            result (dict): a processed sample
														
 
															+        """
														
 
															+        return sample
														
 
															+
														
 
															+    def __call__(self, sample, context=None):
														
 
															+        """ Process a sample.
														
 
															+        Args:
														
 
															+            sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
														
 
															+            context (dict): info about this sample processing
														
 
															+        Returns:
														
 
															+            result (dict): a processed sample
														
 
															+        """
														
 
															+        if isinstance(sample, Sequence):  # for batch_size
														
 
															+            for i in range(len(sample)):
														
 
															+                sample[i] = self.apply(sample[i], context)
														
 
															+        else:
														
 
															+            # image.shape changed
														
 
															+            sample = self.apply(sample, context)
														
 
															+        return sample
														
 
															+
														
 
															+    def __str__(self):
														
 
															+        return str(self._id)
														
 
															+
														
 
															+
														
 
															+@register_keypointop
														
 
															+class CropAndFlipImages(object):
														
 
															+    """Crop all images"""
														
 
															+
														
 
															+    def __init__(self, crop_range, flip_pairs=None):
														
 
															+        super(CropAndFlipImages, self).__init__()
														
 
															+        self.crop_range = crop_range
														
 
															+        self.flip_pairs = flip_pairs
														
 
															+
														
 
															+    def __call__(self, records):  # tuple
														
 
															+        images = records["image"]
														
 
															+        images = images[:, :, ::-1, :]
														
 
															+        images = images[:, :, self.crop_range[0]:self.crop_range[1]]
														
 
															+        records["image"] = images
														
 
															+
														
 
															+        if "kps2d" in records.keys():
														
 
															+            kps2d = records["kps2d"]
														
 
															+
														
 
															+            width, height = images.shape[2], images.shape[1]
														
 
															+            kps2d = np.array(kps2d)
														
 
															+            kps2d[:, :, 0] = kps2d[:, :, 0] - self.crop_range[0]
														
 
															+
														
 
															+            for pair in self.flip_pairs:
														
 
															+                kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
														
 
															+                    kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
														
 
															+
														
 
															+            records["kps2d"] = kps2d
														
 
															+
														
 
															+        return records
														
 
															+
														
 
															+
														
 
															+@register_op
														
 
															+class PermuteImages(BaseOperator):
														
 
															+    def __init__(self):
														
 
															+        """
														
 
															+        Change the channel to be (batch_size, C, H, W) #(6, 3, 1080, 1920)
														
 
															+        """
														
 
															+        super(PermuteImages, self).__init__()
														
 
															+
														
 
															+    def apply(self, sample, context=None):
														
 
															+        images = sample["image"]
														
 
															+        images = images.transpose((0, 3, 1, 2))
														
 
															+
														
 
															+        sample["image"] = images
														
 
															+
														
 
															+        return sample
														
 
															+
														
 
															+
														
 
															+@register_keypointop
														
 
															+class RandomFlipHalfBody3DTransformImages(object):
														
 
															+    """apply data augment to images and coords
														
 
															+    to achieve the flip, scale, rotate and half body transform effect for training image
														
 
															+    Args:
														
 
															+        trainsize (list):[w, h], Image target size
														
 
															+        upper_body_ids (list): The upper body joint ids
														
 
															+        flip_pairs (list): The left-right joints exchange order list
														
 
															+        pixel_std (int): The pixel std of the scale
														
 
															+        scale (float): The scale factor to transform the image
														
 
															+        rot (int): The rotate factor to transform the image
														
 
															+        num_joints_half_body (int): The joints threshold of the half body transform
														
 
															+        prob_half_body (float): The threshold of the half body transform
														
 
															+        flip (bool): Whether to flip the image
														
 
															+    Returns:
														
 
															+        records(dict): contain the image and coords after tranformed
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 trainsize,
														
 
															+                 upper_body_ids,
														
 
															+                 flip_pairs,
														
 
															+                 pixel_std,
														
 
															+                 scale=0.35,
														
 
															+                 rot=40,
														
 
															+                 num_joints_half_body=8,
														
 
															+                 prob_half_body=0.3,
														
 
															+                 flip=True,
														
 
															+                 rot_prob=0.6,
														
 
															+                 do_occlusion=False):
														
 
															+        super(RandomFlipHalfBody3DTransformImages, self).__init__()
														
 
															+        self.trainsize = trainsize
														
 
															+        self.upper_body_ids = upper_body_ids
														
 
															+        self.flip_pairs = flip_pairs
														
 
															+        self.pixel_std = pixel_std
														
 
															+        self.scale = scale
														
 
															+        self.rot = rot
														
 
															+        self.num_joints_half_body = num_joints_half_body
														
 
															+        self.prob_half_body = prob_half_body
														
 
															+        self.flip = flip
														
 
															+        self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
														
 
															+        self.rot_prob = rot_prob
														
 
															+        self.do_occlusion = do_occlusion
														
 
															+
														
 
															+    def halfbody_transform(self, joints, joints_vis):
														
 
															+        upper_joints = []
														
 
															+        lower_joints = []
														
 
															+        for joint_id in range(joints.shape[0]):
														
 
															+            if joints_vis[joint_id][0] > 0:
														
 
															+                if joint_id in self.upper_body_ids:
														
 
															+                    upper_joints.append(joints[joint_id])
														
 
															+                else:
														
 
															+                    lower_joints.append(joints[joint_id])
														
 
															+        if np.random.randn() < 0.5 and len(upper_joints) > 2:
														
 
															+            selected_joints = upper_joints
														
 
															+        else:
														
 
															+            selected_joints = lower_joints if len(
														
 
															+                lower_joints) > 2 else upper_joints
														
 
															+        if len(selected_joints) < 2:
														
 
															+            return None, None
														
 
															+        selected_joints = np.array(selected_joints, dtype=np.float32)
														
 
															+        center = selected_joints.mean(axis=0)[:2]
														
 
															+        left_top = np.amin(selected_joints, axis=0)
														
 
															+        right_bottom = np.amax(selected_joints, axis=0)
														
 
															+        w = right_bottom[0] - left_top[0]
														
 
															+        h = right_bottom[1] - left_top[1]
														
 
															+        if w > self.aspect_ratio * h:
														
 
															+            h = w * 1.0 / self.aspect_ratio
														
 
															+        elif w < self.aspect_ratio * h:
														
 
															+            w = h * self.aspect_ratio
														
 
															+        scale = np.array(
														
 
															+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
														
 
															+            dtype=np.float32)
														
 
															+        scale = scale * 1.5
														
 
															+
														
 
															+        return center, scale
														
 
															+
														
 
															+    def flip_joints(self, joints, joints_vis, width, matched_parts, kps2d=None):
														
 
															+        # joints: (6, 24, 3),(num_frames, num_joints, 3)
														
 
															+
														
 
															+        joints[:, :, 0] = width - joints[:, :, 0] - 1  # x
														
 
															+        if kps2d is not None:
														
 
															+            kps2d[:, :, 0] = width - kps2d[:, :, 0] - 1
														
 
															+
														
 
															+        for pair in matched_parts:
														
 
															+            joints[:, pair[0], :], joints[:,pair[1], :] = \
														
 
															+                joints[:,pair[1], :], joints[:,pair[0], :].copy()
														
 
															+
														
 
															+            joints_vis[:,pair[0], :], joints_vis[:,pair[1], :] = \
														
 
															+                joints_vis[:,pair[1], :], joints_vis[:,pair[0], :].copy()
														
 
															+
														
 
															+            if kps2d is not None:
														
 
															+                kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
														
 
															+                    kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
														
 
															+
														
 
															+        # move to zero
														
 
															+        joints -= joints[:, [0], :]  # (batch_size, 24, 3),numpy.ndarray
														
 
															+
														
 
															+        return joints, joints_vis, kps2d
														
 
															+
														
 
															+    def __call__(self, records):
														
 
															+        images = records[
														
 
															+            'image']  #kps3d, kps3d_vis, images. images.shape(num_frames, width, height, 3)
														
 
															+
														
 
															+        joints = records['kps3d']
														
 
															+        joints_vis = records['kps3d_vis']
														
 
															+
														
 
															+        kps2d = None
														
 
															+        if 'kps2d' in records.keys():
														
 
															+            kps2d = records['kps2d']
														
 
															+
														
 
															+        if self.flip and np.random.random() <= 0.5:
														
 
															+            images = images[:, :, ::-1, :]  # 图像水平翻转 (6, 1080, 810, 3)
														
 
															+            joints, joints_vis, kps2d = self.flip_joints(
														
 
															+                joints, joints_vis, images.shape[2], self.flip_pairs,
														
 
															+                kps2d)  # 关键点左右对称翻转
														
 
															+        occlusion = False
														
 
															+        if self.do_occlusion and random.random() <= 0.5:  # 随机遮挡
														
 
															+            height = images[0].shape[0]
														
 
															+            width = images[0].shape[1]
														
 
															+            occlusion = True
														
 
															+            while True:
														
 
															+                area_min = 0.0
														
 
															+                area_max = 0.2
														
 
															+                synth_area = (random.random() *
														
 
															+                              (area_max - area_min) + area_min) * width * height
														
 
															+
														
 
															+                ratio_min = 0.3
														
 
															+                ratio_max = 1 / 0.3
														
 
															+                synth_ratio = (random.random() *
														
 
															+                               (ratio_max - ratio_min) + ratio_min)
														
 
															+
														
 
															+                synth_h = math.sqrt(synth_area * synth_ratio)
														
 
															+                synth_w = math.sqrt(synth_area / synth_ratio)
														
 
															+                synth_xmin = random.random() * (width - synth_w - 1)
														
 
															+                synth_ymin = random.random() * (height - synth_h - 1)
														
 
															+
														
 
															+                if synth_xmin >= 0 and synth_ymin >= 0 and synth_xmin + synth_w < width and synth_ymin + synth_h < height:
														
 
															+                    xmin = int(synth_xmin)
														
 
															+                    ymin = int(synth_ymin)
														
 
															+                    w = int(synth_w)
														
 
															+                    h = int(synth_h)
														
 
															+
														
 
															+                    mask = np.random.rand(h, w, 3) * 255
														
 
															+                    images[:, ymin:ymin + h, xmin:xmin + w, :] = mask[
														
 
															+                        None, :, :, :]
														
 
															+                    break
														
 
															+
														
 
															+        records['image'] = images
														
 
															+        records['kps3d'] = joints
														
 
															+        records['kps3d_vis'] = joints_vis
														
 
															+        if kps2d is not None:
														
 
															+            records['kps2d'] = kps2d
														
 
															+
														
 
															+        return records
														
--- a/paddlers/models/ppdet/data/transform/operators.py
+++ b/paddlers/models/ppdet/data/transform/operators.py
--- a/paddlers/models/ppdet/engine/__init__.py
+++ b/paddlers/models/ppdet/engine/__init__.py
@@ -15,6 +15,9 @@
 
															 from . import trainer
														
 
															 from .trainer import *
														
 
															+from . import trainer_cot
														
 
															+from .trainer_cot import *
														
 
															+
														
 
															 from . import callbacks
														
 
															 from .callbacks import *
														
@@ -28,3 +31,7 @@ __all__ = trainer.__all__ \
 
															 from . import tracker
														
 
															 from .tracker import *
														
 
															 __all__ = __all__ + tracker.__all__
														
 
															+
														
 
															+from . import trainer_ssod
														
 
															+from .trainer_ssod import *
														
 
															+__all__ = __all__ + trainer_ssod.__all__
														
--- a/paddlers/models/ppdet/engine/callbacks.py
+++ b/paddlers/models/ppdet/engine/callbacks.py
@@ -152,15 +152,14 @@ class LogPrinter(Callback):
 
															             if mode == 'eval':
														
 
															                 sample_num = status['sample_num']
														
 
															                 cost_time = status['cost_time']
														
 
															-                logger.info('Total sample number: {}, averge FPS: {}'.format(
														
 
															+                logger.info('Total sample number: {}, average FPS: {}'.format(
														
 
															                     sample_num, sample_num / cost_time))
														
 
															 class Checkpointer(Callback):
														
 
															     def __init__(self, model):
														
 
															         super(Checkpointer, self).__init__(model)
														
 
															-        cfg = self.model.cfg
														
 
															-        self.best_ap = 0.
														
 
															+        self.best_ap = -1000.
														
 
															         self.save_dir = os.path.join(self.model.cfg.save_dir,
														
 
															                                      self.model.cfg.filename)
														
 
															         if hasattr(self.model.model, 'student_model'):
														
@@ -187,7 +186,11 @@ class Checkpointer(Callback):
 
															                 if 'save_best_model' in status and status['save_best_model']:
														
 
															                     for metric in self.model._metrics:
														
 
															                         map_res = metric.get_results()
														
 
															-                        if 'bbox' in map_res:
														
 
															+                        eval_func = "ap"
														
 
															+                        if 'pose3d' in map_res:
														
 
															+                            key = 'pose3d'
														
 
															+                            eval_func = "mpjpe"
														
 
															+                        elif 'bbox' in map_res:
														
 
															                             key = 'bbox'
														
 
															                         elif 'keypoint' in map_res:
														
 
															                             key = 'keypoint'
														
@@ -202,18 +205,36 @@ class Checkpointer(Callback):
 
															                             self.best_ap = map_res[key][0]
														
 
															                             save_name = 'best_model'
														
 
															                             weight = self.weight.state_dict()
														
 
															-                        logger.info("Best test {} ap is {:0.3f}.".format(
														
 
															-                            key, self.best_ap))
														
 
															+                        logger.info("Best test {} {} is {:0.3f}.".format(
														
 
															+                            key, eval_func, abs(self.best_ap)))
														
 
															             if weight:
														
 
															                 if self.model.use_ema:
														
 
															-                    # save model and ema_model
														
 
															-                    save_model(
														
 
															-                        status['weight'],
														
 
															-                        self.model.optimizer,
														
 
															-                        self.save_dir,
														
 
															-                        save_name,
														
 
															-                        epoch_id + 1,
														
 
															-                        ema_model=weight)
														
 
															+                    exchange_save_model = status.get('exchange_save_model',
														
 
															+                                                     False)
														
 
															+                    if not exchange_save_model:
														
 
															+                        # save model and ema_model
														
 
															+                        save_model(
														
 
															+                            status['weight'],
														
 
															+                            self.model.optimizer,
														
 
															+                            self.save_dir,
														
 
															+                            save_name,
														
 
															+                            epoch_id + 1,
														
 
															+                            ema_model=weight)
														
 
															+                    else:
														
 
															+                        # save model(student model) and ema_model(teacher model)
														
 
															+                        # in DenseTeacher SSOD, the teacher model will be higher,
														
 
															+                        # so exchange when saving pdparams
														
 
															+                        student_model = status['weight']  # model
														
 
															+                        teacher_model = weight  # ema_model
														
 
															+                        save_model(
														
 
															+                            teacher_model,
														
 
															+                            self.model.optimizer,
														
 
															+                            self.save_dir,
														
 
															+                            save_name,
														
 
															+                            epoch_id + 1,
														
 
															+                            ema_model=student_model)
														
 
															+                        del teacher_model
														
 
															+                        del student_model
														
 
															                 else:
														
 
															                     save_model(weight, self.model.optimizer, self.save_dir,
														
 
															                                save_name, epoch_id + 1)
														
@@ -288,6 +309,7 @@ class VisualDLWriter(Callback):
 
															                                                    self.vdl_mAP_step)
														
 
															                 self.vdl_mAP_step += 1
														
 
															+
														
 
															 class WandbCallback(Callback):
														
 
															     def __init__(self, model):
														
 
															         super(WandbCallback, self).__init__(model)
														
@@ -307,10 +329,8 @@ class WandbCallback(Callback):
 
															             self.wandb_params = {}
														
 
															         for k, v in model.cfg.items():
														
 
															             if k.startswith("wandb_"):
														
 
															-                self.wandb_params.update({
														
 
															-                    k.lstrip("wandb_"): v
														
 
															-                })
														
 
															-        
														
 
															+                self.wandb_params.update({k.lstrip("wandb_"): v})
														
 
															+
														
 
															         self._run = None
														
 
															         if dist.get_world_size() < 2 or dist.get_rank() == 0:
														
 
															             _ = self.run
														
@@ -318,37 +338,50 @@ class WandbCallback(Callback):
 
															             self.run.define_metric("epoch")
														
 
															             self.run.define_metric("eval/*", step_metric="epoch")
														
 
															-        self.best_ap = 0
														
 
															-    
														
 
															+        self.best_ap = -1000.
														
 
															+        self.fps = []
														
 
															+
														
 
															     @property
														
 
															     def run(self):
														
 
															         if self._run is None:
														
 
															             if self.wandb.run is not None:
														
 
															-                logger.info("There is an ongoing wandb run which will be used"
														
 
															-                        "for logging. Please use `wandb.finish()` to end that"
														
 
															-                        "if the behaviour is not intended")
														
 
															+                logger.info(
														
 
															+                    "There is an ongoing wandb run which will be used"
														
 
															+                    "for logging. Please use `wandb.finish()` to end that"
														
 
															+                    "if the behaviour is not intended")
														
 
															                 self._run = self.wandb.run
														
 
															             else:
														
 
															                 self._run = self.wandb.init(**self.wandb_params)
														
 
															         return self._run
														
 
															-    
														
 
															+
														
 
															     def save_model(self,
														
 
															-                optimizer,
														
 
															-                save_dir,
														
 
															-                save_name,
														
 
															-                last_epoch,
														
 
															-                ema_model=None,
														
 
															-                ap=None, 
														
 
															-                tags=None):
														
 
															+                   optimizer,
														
 
															+                   save_dir,
														
 
															+                   save_name,
														
 
															+                   last_epoch,
														
 
															+                   ema_model=None,
														
 
															+                   ap=None,
														
 
															+                   fps=None,
														
 
															+                   tags=None):
														
 
															         if dist.get_world_size() < 2 or dist.get_rank() == 0:
														
 
															             model_path = os.path.join(save_dir, save_name)
														
 
															             metadata = {}
														
 
															             metadata["last_epoch"] = last_epoch
														
 
															             if ap:
														
 
															                 metadata["ap"] = ap
														
 
															+
														
 
															+            if fps:
														
 
															+                metadata["fps"] = fps
														
 
															+
														
 
															             if ema_model is None:
														
 
															-                ema_artifact = self.wandb.Artifact(name="ema_model-{}".format(self.run.id), type="model", metadata=metadata)
														
 
															-                model_artifact = self.wandb.Artifact(name="model-{}".format(self.run.id), type="model", metadata=metadata)
														
 
															+                ema_artifact = self.wandb.Artifact(
														
 
															+                    name="ema_model-{}".format(self.run.id),
														
 
															+                    type="model",
														
 
															+                    metadata=metadata)
														
 
															+                model_artifact = self.wandb.Artifact(
														
 
															+                    name="model-{}".format(self.run.id),
														
 
															+                    type="model",
														
 
															+                    metadata=metadata)
														
 
															                 ema_artifact.add_file(model_path + ".pdema", name="model_ema")
														
 
															                 model_artifact.add_file(model_path + ".pdparams", name="model")
														
@@ -356,10 +389,13 @@ class WandbCallback(Callback):
 
															                 self.run.log_artifact(ema_artifact, aliases=tags)
														
 
															                 self.run.log_artfact(model_artifact, aliases=tags)
														
 
															             else:
														
 
															-                model_artifact = self.wandb.Artifact(name="model-{}".format(self.run.id), type="model", metadata=metadata)
														
 
															+                model_artifact = self.wandb.Artifact(
														
 
															+                    name="model-{}".format(self.run.id),
														
 
															+                    type="model",
														
 
															+                    metadata=metadata)
														
 
															                 model_artifact.add_file(model_path + ".pdparams", name="model")
														
 
															                 self.run.log_artifact(model_artifact, aliases=tags)
														
 
															-    
														
 
															+
														
 
															     def on_step_end(self, status):
														
 
															         mode = status['mode']
														
@@ -368,22 +404,41 @@ class WandbCallback(Callback):
 
															                 training_status = status['training_staus'].get()
														
 
															                 for k, v in training_status.items():
														
 
															                     training_status[k] = float(v)
														
 
															-                metrics = {
														
 
															-                    "train/" + k: v for k,v in training_status.items()
														
 
															-                }
														
 
															+
														
 
															+                # calculate ips, data_cost, batch_cost
														
 
															+                batch_time = status['batch_time']
														
 
															+                data_time = status['data_time']
														
 
															+                batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
														
 
															+                ))]['batch_size']
														
 
															+
														
 
															+                ips = float(batch_size) / float(batch_time.avg)
														
 
															+                data_cost = float(data_time.avg)
														
 
															+                batch_cost = float(batch_time.avg)
														
 
															+
														
 
															+                metrics = {"train/" + k: v for k, v in training_status.items()}
														
 
															+
														
 
															+                metrics["train/ips"] = ips
														
 
															+                metrics["train/data_cost"] = data_cost
														
 
															+                metrics["train/batch_cost"] = batch_cost
														
 
															+
														
 
															+                self.fps.append(ips)
														
 
															                 self.run.log(metrics)
														
 
															-    
														
 
															+
														
 
															     def on_epoch_end(self, status):
														
 
															         mode = status['mode']
														
 
															         epoch_id = status['epoch_id']
														
 
															         save_name = None
														
 
															         if dist.get_world_size() < 2 or dist.get_rank() == 0:
														
 
															             if mode == 'train':
														
 
															+                fps = sum(self.fps) / len(self.fps)
														
 
															+                self.fps = []
														
 
															+
														
 
															                 end_epoch = self.model.cfg.epoch
														
 
															                 if (
														
 
															                         epoch_id + 1
														
 
															                 ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
														
 
															-                    save_name = str(epoch_id) if epoch_id != end_epoch - 1 else "model_final"
														
 
															+                    save_name = str(
														
 
															+                        epoch_id) if epoch_id != end_epoch - 1 else "model_final"
														
 
															                     tags = ["latest", "epoch_{}".format(epoch_id)]
														
 
															                     self.save_model(
														
 
															                         self.model.optimizer,
														
@@ -391,20 +446,29 @@ class WandbCallback(Callback):
 
															                         save_name,
														
 
															                         epoch_id + 1,
														
 
															                         self.model.use_ema,
														
 
															-                        tags=tags
														
 
															-                    )
														
 
															+                        fps=fps,
														
 
															+                        tags=tags)
														
 
															             if mode == 'eval':
														
 
															+                sample_num = status['sample_num']
														
 
															+                cost_time = status['cost_time']
														
 
															+
														
 
															+                fps = sample_num / cost_time
														
 
															+
														
 
															                 merged_dict = {}
														
 
															                 for metric in self.model._metrics:
														
 
															                     for key, map_value in metric.get_results().items():
														
 
															                         merged_dict["eval/{}-mAP".format(key)] = map_value[0]
														
 
															                 merged_dict["epoch"] = status["epoch_id"]
														
 
															+                merged_dict["eval/fps"] = sample_num / cost_time
														
 
															+
														
 
															                 self.run.log(merged_dict)
														
 
															                 if 'save_best_model' in status and status['save_best_model']:
														
 
															                     for metric in self.model._metrics:
														
 
															                         map_res = metric.get_results()
														
 
															-                        if 'bbox' in map_res:
														
 
															+                        if 'pose3d' in map_res:
														
 
															+                            key = 'pose3d'
														
 
															+                        elif 'bbox' in map_res:
														
 
															                             key = 'bbox'
														
 
															                         elif 'keypoint' in map_res:
														
 
															                             key = 'keypoint'
														
@@ -426,10 +490,10 @@ class WandbCallback(Callback):
 
															                                 save_name,
														
 
															                                 last_epoch=epoch_id + 1,
														
 
															                                 ema_model=self.model.use_ema,
														
 
															-                                ap=self.best_ap,
														
 
															-                                tags=tags
														
 
															-                            )
														
 
															-    
														
 
															+                                ap=abs(self.best_ap),
														
 
															+                                fps=fps,
														
 
															+                                tags=tags)
														
 
															+
														
 
															     def on_train_end(self, status):
														
 
															         self.run.finish()
														
--- a/paddlers/models/ppdet/engine/export_utils.py
+++ b/paddlers/models/ppdet/engine/export_utils.py
@@ -29,6 +29,7 @@ logger = setup_logger('ppdet.engine')
 
															 # Global dictionary
														
 
															 TRT_MIN_SUBGRAPH = {
														
 
															     'YOLO': 3,
														
 
															+    'PPYOLOE': 3,
														
 
															     'SSD': 60,
														
 
															     'RCNN': 40,
														
 
															     'RetinaNet': 40,
														
@@ -42,6 +43,7 @@ TRT_MIN_SUBGRAPH = {
 
															     'HRNet': 3,
														
 
															     'DeepSORT': 3,
														
 
															     'ByteTrack': 10,
														
 
															+    'CenterTrack': 5,
														
 
															     'JDE': 10,
														
 
															     'FairMOT': 5,
														
 
															     'GFL': 16,
														
@@ -49,10 +51,46 @@ TRT_MIN_SUBGRAPH = {
 
															     'CenterNet': 5,
														
 
															     'TOOD': 5,
														
 
															     'YOLOX': 8,
														
 
															+    'YOLOF': 40,
														
 
															+    'METRO_Body': 3,
														
 
															+    'DETR': 3,
														
 
															 }
														
 
															 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
														
 
															-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
														
 
															+MOT_ARCH = ['JDE', 'FairMOT', 'DeepSORT', 'ByteTrack', 'CenterTrack']
														
 
															+
														
 
															+TO_STATIC_SPEC = {
														
 
															+    'yolov3_darknet53_270e_coco': [{
														
 
															+        'im_id': paddle.static.InputSpec(
														
 
															+            name='im_id', shape=[-1, 1], dtype='float32'),
														
 
															+        'is_crowd': paddle.static.InputSpec(
														
 
															+            name='is_crowd', shape=[-1, 50], dtype='float32'),
														
 
															+        'gt_bbox': paddle.static.InputSpec(
														
 
															+            name='gt_bbox', shape=[-1, 50, 4], dtype='float32'),
														
 
															+        'curr_iter': paddle.static.InputSpec(
														
 
															+            name='curr_iter', shape=[-1], dtype='float32'),
														
 
															+        'image': paddle.static.InputSpec(
														
 
															+            name='image', shape=[-1, 3, -1, -1], dtype='float32'),
														
 
															+        'im_shape': paddle.static.InputSpec(
														
 
															+            name='im_shape', shape=[-1, 2], dtype='float32'),
														
 
															+        'scale_factor': paddle.static.InputSpec(
														
 
															+            name='scale_factor', shape=[-1, 2], dtype='float32'),
														
 
															+        'target0': paddle.static.InputSpec(
														
 
															+            name='target0', shape=[-1, 3, 86, -1, -1], dtype='float32'),
														
 
															+        'target1': paddle.static.InputSpec(
														
 
															+            name='target1', shape=[-1, 3, 86, -1, -1], dtype='float32'),
														
 
															+        'target2': paddle.static.InputSpec(
														
 
															+            name='target2', shape=[-1, 3, 86, -1, -1], dtype='float32'),
														
 
															+    }],
														
 
															+}
														
 
															+
														
 
															+
														
 
															+def apply_to_static(config, model):
														
 
															+    filename = config.get('filename', None)
														
 
															+    spec = TO_STATIC_SPEC.get(filename, None)
														
 
															+    model = paddle.jit.to_static(model, input_spec=spec)
														
 
															+    logger.info("Successfully to apply @to_static with specs: {}".format(spec))
														
 
															+    return model
														
 
															 def _prune_input_spec(input_spec, program, targets):
														
@@ -140,10 +178,11 @@ def _dump_infer_config(config, path, image_shape, model):
 
															         infer_cfg['export_onnx'] = True
														
 
															         infer_cfg['export_eb'] = export_eb
														
 
															-
														
 
															     if infer_arch in MOT_ARCH:
														
 
															         if infer_arch == 'DeepSORT':
														
 
															             tracker_cfg = config['DeepSORTTracker']
														
 
															+        elif infer_arch == 'CenterTrack':
														
 
															+            tracker_cfg = config['CenterTracker']
														
 
															         else:
														
 
															             tracker_cfg = config['JDETracker']
														
 
															         infer_cfg['tracker'] = _parse_tracker(tracker_cfg)
														
@@ -155,7 +194,10 @@ def _dump_infer_config(config, path, image_shape, model):
 
															             arch_state = True
														
 
															             break
														
 
															-    if infer_arch == 'YOLOX':
														
 
															+    if infer_arch == 'PPYOLOEWithAuxHead':
														
 
															+        infer_arch = 'PPYOLOE'
														
 
															+
														
 
															+    if infer_arch in ['PPYOLOE', 'YOLOX', 'YOLOF']:
														
 
															         infer_cfg['arch'] = infer_arch
														
 
															         infer_cfg['min_subgraph_size'] = TRT_MIN_SUBGRAPH[infer_arch]
														
 
															         arch_state = True
														
@@ -174,9 +216,15 @@ def _dump_infer_config(config, path, image_shape, model):
 
															         label_arch = 'keypoint_arch'
														
 
															     if infer_arch in MOT_ARCH:
														
 
															-        label_arch = 'mot_arch'
														
 
															-        reader_cfg = config['TestMOTReader']
														
 
															-        dataset_cfg = config['TestMOTDataset']
														
 
															+        if config['metric'] in ['COCO', 'VOC']:
														
 
															+            # MOT model run as Detector
														
 
															+            reader_cfg = config['TestReader']
														
 
															+            dataset_cfg = config['TestDataset']
														
 
															+        else:
														
 
															+            # 'metric' in ['MOT', 'MCMOT', 'KITTI']
														
 
															+            label_arch = 'mot_arch'
														
 
															+            reader_cfg = config['TestMOTReader']
														
 
															+            dataset_cfg = config['TestMOTDataset']
														
 
															     else:
														
 
															         reader_cfg = config['TestReader']
														
 
															         dataset_cfg = config['TestDataset']
														
--- a/paddlers/models/ppdet/engine/tracker.py
+++ b/paddlers/models/ppdet/engine/tracker.py
@@ -29,9 +29,11 @@ from paddlers.models.ppdet.core.workspace import create
 
															 from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
														
 
															 from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
														
 
															 from paddlers.models.ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results
														
 
															-from paddlers.models.ppdet.modeling.mot.tracker import JDETracker, DeepSORTTracker, OCSORTTracker
														
 
															+from paddlers.models.ppdet.modeling.mot.tracker import JDETracker, CenterTracker
														
 
															+from paddlers.models.ppdet.modeling.mot.tracker import DeepSORTTracker, OCSORTTracker, BOTSORTTracker
														
 
															 from paddlers.models.ppdet.modeling.architectures import YOLOX
														
 
															 from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric, MCMOTMetric
														
 
															+from paddlers.models.ppdet.data.source.category import get_categories
														
 
															 import paddlers.models.ppdet.utils.stats as stats
														
 
															 from .callbacks import Callback, ComposeCallback
														
@@ -39,9 +41,9 @@ from .callbacks import Callback, ComposeCallback
 
															 from paddlers.models.ppdet.utils.logger import setup_logger
														
 
															 logger = setup_logger(__name__)
														
 
															-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
														
 
															-MOT_ARCH_JDE = ['JDE', 'FairMOT']
														
 
															-MOT_ARCH_SDE = ['DeepSORT', 'ByteTrack']
														
 
															+MOT_ARCH = ['JDE', 'FairMOT', 'DeepSORT', 'ByteTrack', 'CenterTrack']
														
 
															+MOT_ARCH_JDE = MOT_ARCH[:2]
														
 
															+MOT_ARCH_SDE = MOT_ARCH[2:4]
														
 
															 MOT_DATA_TYPE = ['mot', 'mcmot', 'kitti']
														
 
															 __all__ = ['Tracker']
														
@@ -67,6 +69,13 @@ class Tracker(object):
 
															                     m._epsilon = 1e-3  # for amp(fp16)
														
 
															                     m._momentum = 0.97  # 0.03 in pytorch
														
 
															+        anno_file = self.dataset.get_anno()
														
 
															+        clsid2catid, catid2name = get_categories(
														
 
															+            self.cfg.metric, anno_file=anno_file)
														
 
															+        self.ids2names = []
														
 
															+        for k, v in catid2name.items():
														
 
															+            self.ids2names.append(v)
														
 
															+
														
 
															         self.status = {}
														
 
															         self.start_epoch = 0
														
@@ -130,6 +139,53 @@ class Tracker(object):
 
															         else:
														
 
															             load_weight(self.model.reid, reid_weights)
														
 
															+    def _eval_seq_centertrack(self,
														
 
															+                              dataloader,
														
 
															+                              save_dir=None,
														
 
															+                              show_image=False,
														
 
															+                              frame_rate=30,
														
 
															+                              draw_threshold=0):
														
 
															+        assert isinstance(self.model.tracker, CenterTracker)
														
 
															+        if save_dir:
														
 
															+            if not os.path.exists(save_dir): os.makedirs(save_dir)
														
 
															+        tracker = self.model.tracker
														
 
															+
														
 
															+        timer = MOTTimer()
														
 
															+        frame_id = 0
														
 
															+        self.status['mode'] = 'track'
														
 
															+        self.model.eval()
														
 
															+        results = defaultdict(list)  # only support single class now
														
 
															+
														
 
															+        for step_id, data in enumerate(tqdm(dataloader)):
														
 
															+            self.status['step_id'] = step_id
														
 
															+            if step_id == 0:
														
 
															+                self.model.reset_tracking()
														
 
															+
														
 
															+            # forward
														
 
															+            timer.tic()
														
 
															+            pred_ret = self.model(data)
														
 
															+
														
 
															+            online_targets = tracker.update(pred_ret)
														
 
															+            online_tlwhs, online_scores, online_ids = [], [], []
														
 
															+            for t in online_targets:
														
 
															+                bbox = t['bbox']
														
 
															+                tlwh = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
														
 
															+                tscore = float(t['score'])
														
 
															+                tid = int(t['tracking_id'])
														
 
															+                if tlwh[2] * tlwh[3] > 0:
														
 
															+                    online_tlwhs.append(tlwh)
														
 
															+                    online_ids.append(tid)
														
 
															+                    online_scores.append(tscore)
														
 
															+            timer.toc()
														
 
															+            # save results
														
 
															+            results[0].append(
														
 
															+                (frame_id + 1, online_tlwhs, online_scores, online_ids))
														
 
															+            save_vis_results(data, frame_id, online_ids, online_tlwhs,
														
 
															+                             online_scores, timer.average_time, show_image,
														
 
															+                             save_dir, self.cfg.num_classes, self.ids2names)
														
 
															+            frame_id += 1
														
 
															+        return results, frame_id, timer.average_time, timer.calls
														
 
															+
														
 
															     def _eval_seq_jde(self,
														
 
															                       dataloader,
														
 
															                       save_dir=None,
														
@@ -180,7 +236,7 @@ class Tracker(object):
 
															             timer.toc()
														
 
															             save_vis_results(data, frame_id, online_ids, online_tlwhs,
														
 
															                              online_scores, timer.average_time, show_image,
														
 
															-                             save_dir, self.cfg.num_classes)
														
 
															+                             save_dir, self.cfg.num_classes, self.ids2names)
														
 
															             frame_id += 1
														
 
															         return results, frame_id, timer.average_time, timer.calls
														
@@ -197,7 +253,11 @@ class Tracker(object):
 
															         if save_dir:
														
 
															             if not os.path.exists(save_dir): os.makedirs(save_dir)
														
 
															         use_detector = False if not self.model.detector else True
														
 
															-        use_reid = False if not self.model.reid else True
														
 
															+        use_reid = hasattr(self.model, 'reid')
														
 
															+        if use_reid and self.model.reid is not None:
														
 
															+            use_reid = True
														
 
															+        else:
														
 
															+            use_reid = False
														
 
															         timer = MOTTimer()
														
 
															         results = defaultdict(list)
														
@@ -290,7 +350,7 @@ class Tracker(object):
 
															                 online_ids, online_tlwhs, online_scores = None, None, None
														
 
															                 save_vis_results(data, frame_id, online_ids, online_tlwhs,
														
 
															                                  online_scores, timer.average_time, show_image,
														
 
															-                                 save_dir, self.cfg.num_classes)
														
 
															+                                 save_dir, self.cfg.num_classes, self.ids2names)
														
 
															                 frame_id += 1
														
 
															                 # thus will not inference reid model
														
 
															                 continue
														
@@ -338,7 +398,7 @@ class Tracker(object):
 
															                     (frame_id + 1, online_tlwhs, online_scores, online_ids))
														
 
															                 save_vis_results(data, frame_id, online_ids, online_tlwhs,
														
 
															                                  online_scores, timer.average_time, show_image,
														
 
															-                                 save_dir, self.cfg.num_classes)
														
 
															+                                 save_dir, self.cfg.num_classes, self.ids2names)
														
 
															             elif isinstance(tracker, JDETracker):
														
 
															                 # trick hyperparams only used for MOTChallenge (MOT17, MOT20) Test-set
														
@@ -369,7 +429,8 @@ class Tracker(object):
 
															                 timer.toc()
														
 
															                 save_vis_results(data, frame_id, online_ids, online_tlwhs,
														
 
															                                  online_scores, timer.average_time, show_image,
														
 
															-                                 save_dir, self.cfg.num_classes)
														
 
															+                                 save_dir, self.cfg.num_classes, self.ids2names)
														
 
															+
														
 
															             elif isinstance(tracker, OCSORTTracker):
														
 
															                 # OC_SORT Tracker
														
 
															                 online_targets = tracker.update(pred_dets_old, pred_embs)
														
@@ -390,7 +451,31 @@ class Tracker(object):
 
															                     (frame_id + 1, online_tlwhs, online_scores, online_ids))
														
 
															                 save_vis_results(data, frame_id, online_ids, online_tlwhs,
														
 
															                                  online_scores, timer.average_time, show_image,
														
 
															-                                 save_dir, self.cfg.num_classes)
														
 
															+                                 save_dir, self.cfg.num_classes, self.ids2names)
														
 
															+
														
 
															+            elif isinstance(tracker, BOTSORTTracker):
														
 
															+                # BOTSORT Tracker
														
 
															+                online_targets = tracker.update(
														
 
															+                    pred_dets_old, img=ori_image.numpy())
														
 
															+                online_tlwhs = []
														
 
															+                online_ids = []
														
 
															+                online_scores = []
														
 
															+                for t in online_targets:
														
 
															+                    tlwh = t.tlwh
														
 
															+                    tid = t.track_id
														
 
															+                    tscore = t.score
														
 
															+                    if tlwh[2] * tlwh[3] > 0:
														
 
															+                        online_tlwhs.append(tlwh)
														
 
															+                        online_ids.append(tid)
														
 
															+                        online_scores.append(tscore)
														
 
															+                timer.toc()
														
 
															+                # save results
														
 
															+                results[0].append(
														
 
															+                    (frame_id + 1, online_tlwhs, online_scores, online_ids))
														
 
															+                save_vis_results(data, frame_id, online_ids, online_tlwhs,
														
 
															+                                 online_scores, timer.average_time, show_image,
														
 
															+                                 save_dir, self.cfg.num_classes, self.ids2names)
														
 
															+
														
 
															             else:
														
 
															                 raise ValueError(tracker)
														
 
															             frame_id += 1
														
@@ -461,6 +546,12 @@ class Tracker(object):
 
															                         scaled=scaled,
														
 
															                         det_file=os.path.join(det_results_dir,
														
 
															                                               '{}.txt'.format(seq)))
														
 
															+                elif model_type == 'CenterTrack':
														
 
															+                    results, nf, ta, tc = self._eval_seq_centertrack(
														
 
															+                        dataloader,
														
 
															+                        save_dir=save_dir,
														
 
															+                        show_image=show_image,
														
 
															+                        frame_rate=frame_rate)
														
 
															                 else:
														
 
															                     raise ValueError(model_type)
														
@@ -587,6 +678,12 @@ class Tracker(object):
 
															                     det_file=os.path.join(det_results_dir,
														
 
															                                           '{}.txt'.format(seq)),
														
 
															                     draw_threshold=draw_threshold)
														
 
															+            elif model_type == 'CenterTrack':
														
 
															+                results, nf, ta, tc = self._eval_seq_centertrack(
														
 
															+                    dataloader,
														
 
															+                    save_dir=save_dir,
														
 
															+                    show_image=show_image,
														
 
															+                    frame_rate=frame_rate)
														
 
															             else:
														
 
															                 raise ValueError(model_type)
														
--- a/paddlers/models/ppdet/engine/trainer.py
+++ b/paddlers/models/ppdet/engine/trainer.py
@@ -38,7 +38,7 @@ from paddlers.models.ppdet.optimizer import ModelEMA
 
															 from paddlers.models.ppdet.core.workspace import create
														
 
															 from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
														
 
															 from paddlers.models.ppdet.utils.visualizer import visualize_results, save_result
														
 
															-from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
														
 
															+from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval, Pose3DEval
														
 
															 from paddlers.models.ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
														
 
															 from paddlers.models.ppdet.data.source.sniper_coco import SniperCOCODataSet
														
 
															 from paddlers.models.ppdet.data.source.category import get_categories
														
@@ -48,7 +48,7 @@ from paddlers.models.ppdet.utils import profiler
 
															 from paddlers.models.ppdet.modeling.post_process import multiclass_nms
														
 
															 from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator, WandbCallback
														
 
															-from .export_utils import _dump_infer_config, _prune_input_spec
														
 
															+from .export_utils import _dump_infer_config, _prune_input_spec, apply_to_static
														
 
															 from paddle.distributed.fleet.utils.hybrid_parallel_util import fused_allreduce_gradients
														
@@ -57,12 +57,12 @@ logger = setup_logger('ppdet.engine')
 
															 __all__ = ['Trainer']
														
 
															-MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT', 'ByteTrack']
														
 
															+MOT_ARCH = ['JDE', 'FairMOT', 'DeepSORT', 'ByteTrack', 'CenterTrack']
														
 
															 class Trainer(object):
														
 
															     def __init__(self, cfg, mode='train'):
														
 
															-        self.cfg = cfg
														
 
															+        self.cfg = cfg.copy()
														
 
															         assert mode.lower() in ['train', 'eval', 'test'], \
														
 
															                 "mode should be 'train', 'eval' or 'test'"
														
 
															         self.mode = mode.lower()
														
@@ -72,10 +72,14 @@ class Trainer(object):
 
															         self.amp_level = self.cfg.get('amp_level', 'O1')
														
 
															         self.custom_white_list = self.cfg.get('custom_white_list', None)
														
 
															         self.custom_black_list = self.cfg.get('custom_black_list', None)
														
 
															+        if 'slim' in cfg and cfg['slim_type'] == 'PTQ':
														
 
															+            self.cfg['TestDataset'] = create('TestDataset')()
														
 
															         # build data loader
														
 
															         capital_mode = self.mode.capitalize()
														
 
															-        if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']:
														
 
															+        if cfg.architecture in MOT_ARCH and self.mode in [
														
 
															+                'eval', 'test'
														
 
															+        ] and cfg.metric not in ['COCO', 'VOC']:
														
 
															             self.dataset = self.cfg['{}MOTDataset'.format(
														
 
															                 capital_mode)] = create('{}MOTDataset'.format(capital_mode))()
														
 
															         else:
														
@@ -95,12 +99,12 @@ class Trainer(object):
 
															                 self.dataset, cfg.worker_num)
														
 
															         if cfg.architecture == 'JDE' and self.mode == 'train':
														
 
															-            cfg['JDEEmbeddingHead'][
														
 
															+            self.cfg['JDEEmbeddingHead'][
														
 
															                 'num_identities'] = self.dataset.num_identities_dict[0]
														
 
															             # JDE only support single class MOT now.
														
 
															         if cfg.architecture == 'FairMOT' and self.mode == 'train':
														
 
															-            cfg['FairMOTEmbeddingHead'][
														
 
															+            self.cfg['FairMOTEmbeddingHead'][
														
 
															                 'num_identities_dict'] = self.dataset.num_identities_dict
														
 
															             # FairMOT support single class and multi-class MOT now.
														
@@ -136,17 +140,30 @@ class Trainer(object):
 
															         if self.mode == 'eval':
														
 
															             if cfg.architecture == 'FairMOT':
														
 
															                 self.loader = create('EvalMOTReader')(self.dataset, 0)
														
 
															+            elif cfg.architecture == "METRO_Body":
														
 
															+                reader_name = '{}Reader'.format(self.mode.capitalize())
														
 
															+                self.loader = create(reader_name)(self.dataset, cfg.worker_num)
														
 
															             else:
														
 
															                 self._eval_batch_sampler = paddle.io.BatchSampler(
														
 
															                     self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
														
 
															                 reader_name = '{}Reader'.format(self.mode.capitalize())
														
 
															                 # If metric is VOC, need to be set collate_batch=False.
														
 
															                 if cfg.metric == 'VOC':
														
 
															-                    cfg[reader_name]['collate_batch'] = False
														
 
															+                    self.cfg[reader_name]['collate_batch'] = False
														
 
															                 self.loader = create(reader_name)(self.dataset, cfg.worker_num,
														
 
															                                                   self._eval_batch_sampler)
														
 
															         # TestDataset build after user set images, skip loader creation here
														
 
															+        # get Params
														
 
															+        print_params = self.cfg.get('print_params', False)
														
 
															+        if print_params:
														
 
															+            params = sum([
														
 
															+                p.numel() for n, p in self.model.named_parameters()
														
 
															+                if all([x not in n for x in ['_mean', '_variance', 'aux_']])
														
 
															+            ])  # exclude BatchNorm running status
														
 
															+            logger.info('Model Params : {} M.'.format((params / 1e6).numpy()[
														
 
															+                0]))
														
 
															+
														
 
															         # build optimizer in train mode
														
 
															         if self.mode == 'train':
														
 
															             steps_per_epoch = len(self.loader)
														
@@ -172,12 +189,14 @@ class Trainer(object):
 
															             ema_decay_type = self.cfg.get('ema_decay_type', 'threshold')
														
 
															             cycle_epoch = self.cfg.get('cycle_epoch', -1)
														
 
															             ema_black_list = self.cfg.get('ema_black_list', None)
														
 
															+            ema_filter_no_grad = self.cfg.get('ema_filter_no_grad', False)
														
 
															             self.ema = ModelEMA(
														
 
															                 self.model,
														
 
															                 decay=ema_decay,
														
 
															                 ema_decay_type=ema_decay_type,
														
 
															                 cycle_epoch=cycle_epoch,
														
 
															-                ema_black_list=ema_black_list)
														
 
															+                ema_black_list=ema_black_list,
														
 
															+                ema_filter_no_grad=ema_filter_no_grad)
														
 
															         self._nranks = dist.get_world_size()
														
 
															         self._local_rank = dist.get_rank()
														
@@ -342,6 +361,13 @@ class Trainer(object):
 
															                     self.cfg.save_dir,
														
 
															                     save_prediction_only=save_prediction_only)
														
 
															             ]
														
 
															+        elif self.cfg.metric == 'Pose3DEval':
														
 
															+            save_prediction_only = self.cfg.get('save_prediction_only', False)
														
 
															+            self._metrics = [
														
 
															+                Pose3DEval(
														
 
															+                    self.cfg.save_dir,
														
 
															+                    save_prediction_only=save_prediction_only)
														
 
															+            ]
														
 
															         elif self.cfg.metric == 'MOTDet':
														
 
															             self._metrics = [JDEDetMetric(), ]
														
 
															         else:
														
@@ -378,7 +404,8 @@ class Trainer(object):
 
															     def load_weights_sde(self, det_weights, reid_weights):
														
 
															         if self.model.detector:
														
 
															             load_weight(self.model.detector, det_weights)
														
 
															-            load_weight(self.model.reid, reid_weights)
														
 
															+            if self.model.reid:
														
 
															+                load_weight(self.model.reid, reid_weights)
														
 
															         else:
														
 
															             load_weight(self.model.reid, reid_weights)
														
@@ -400,15 +427,19 @@ class Trainer(object):
 
															                 "EvalDataset")()
														
 
															         model = self.model
														
 
															-        sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
														
 
															-                   self.cfg.use_gpu and self._nranks > 1)
														
 
															+        if self.cfg.get('to_static', False):
														
 
															+            model = apply_to_static(self.cfg, model)
														
 
															+        sync_bn = (
														
 
															+            getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
														
 
															+            (self.cfg.use_gpu or self.cfg.use_npu or self.cfg.use_mlu) and
														
 
															+            self._nranks > 1)
														
 
															         if sync_bn:
														
 
															             model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
														
 
															         # enabel auto mixed precision mode
														
 
															         if self.use_amp:
														
 
															             scaler = paddle.amp.GradScaler(
														
 
															-                enable=self.cfg.use_gpu or self.cfg.use_npu,
														
 
															+                enable=self.cfg.use_gpu or self.cfg.use_npu or self.cfg.use_mlu,
														
 
															                 init_loss_scaling=self.cfg.get('init_loss_scaling', 1024))
														
 
															         # get distributed model
														
 
															         if self.cfg.get('fleet', False):
														
@@ -463,7 +494,8 @@ class Trainer(object):
 
															                             DataParallel) and use_fused_allreduce_gradients:
														
 
															                         with model.no_sync():
														
 
															                             with paddle.amp.auto_cast(
														
 
															-                                    enable=self.cfg.use_gpu,
														
 
															+                                    enable=self.cfg.use_gpu or
														
 
															+                                    self.cfg.use_npu or self.cfg.use_mlu,
														
 
															                                     custom_white_list=self.custom_white_list,
														
 
															                                     custom_black_list=self.custom_black_list,
														
 
															                                     level=self.amp_level):
														
@@ -477,7 +509,8 @@ class Trainer(object):
 
															                             list(model.parameters()), None)
														
 
															                     else:
														
 
															                         with paddle.amp.auto_cast(
														
 
															-                                enable=self.cfg.use_gpu,
														
 
															+                                enable=self.cfg.use_gpu or self.cfg.use_npu or
														
 
															+                                self.cfg.use_mlu,
														
 
															                                 custom_white_list=self.custom_white_list,
														
 
															                                 custom_black_list=self.custom_black_list,
														
 
															                                 level=self.amp_level):
														
@@ -527,7 +560,7 @@ class Trainer(object):
 
															             if self.cfg.get('unstructured_prune'):
														
 
															                 self.pruner.update_params()
														
 
															-            is_snapshot = (self._nranks < 2 or self._local_rank == 0) \
														
 
															+            is_snapshot = (self._nranks < 2 or (self._local_rank == 0 or self.cfg.metric == "Pose3DEval")) \
														
 
															                        and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 or epoch_id == self.end_epoch - 1)
														
 
															             if is_snapshot and self.use_ema:
														
 
															                 # apply ema weight on model
														
@@ -548,10 +581,14 @@ class Trainer(object):
 
															                     # If metric is VOC, need to be set collate_batch=False.
														
 
															                     if self.cfg.metric == 'VOC':
														
 
															                         self.cfg['EvalReader']['collate_batch'] = False
														
 
															-                    self._eval_loader = create('EvalReader')(
														
 
															-                        self._eval_dataset,
														
 
															-                        self.cfg.worker_num,
														
 
															-                        batch_sampler=self._eval_batch_sampler)
														
 
															+                    if self.cfg.metric == "Pose3DEval":
														
 
															+                        self._eval_loader = create('EvalReader')(
														
 
															+                            self._eval_dataset, self.cfg.worker_num)
														
 
															+                    else:
														
 
															+                        self._eval_loader = create('EvalReader')(
														
 
															+                            self._eval_dataset,
														
 
															+                            self.cfg.worker_num,
														
 
															+                            batch_sampler=self._eval_batch_sampler)
														
 
															                 # if validation in training is enabled, metrics should be re-init
														
 
															                 # Init_mark makes sure this code will only execute once
														
 
															                 if validate and Init_mark == False:
														
@@ -575,6 +612,7 @@ class Trainer(object):
 
															         tic = time.time()
														
 
															         self._compose_callback.on_epoch_begin(self.status)
														
 
															         self.status['mode'] = 'eval'
														
 
															+
														
 
															         self.model.eval()
														
 
															         if self.cfg.get('print_flops', False):
														
 
															             flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
														
@@ -586,7 +624,8 @@ class Trainer(object):
 
															             # forward
														
 
															             if self.use_amp:
														
 
															                 with paddle.amp.auto_cast(
														
 
															-                        enable=self.cfg.use_gpu,
														
 
															+                        enable=self.cfg.use_gpu or self.cfg.use_npu or
														
 
															+                        self.cfg.use_mlu,
														
 
															                         custom_white_list=self.custom_white_list,
														
 
															                         custom_black_list=self.custom_black_list,
														
 
															                         level=self.amp_level):
														
@@ -617,6 +656,15 @@ class Trainer(object):
 
															         self._reset_metrics()
														
 
															     def evaluate(self):
														
 
															+        # get distributed model
														
 
															+        if self.cfg.get('fleet', False):
														
 
															+            self.model = fleet.distributed_model(self.model)
														
 
															+            self.optimizer = fleet.distributed_optimizer(self.optimizer)
														
 
															+        elif self._nranks > 1:
														
 
															+            find_unused_parameters = self.cfg[
														
 
															+                'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
														
 
															+            self.model = paddle.DataParallel(
														
 
															+                self.model, find_unused_parameters=find_unused_parameters)
														
 
															         with paddle.no_grad():
														
 
															             self._eval_with_loader(self.loader)
														
@@ -644,7 +692,8 @@ class Trainer(object):
 
															             # forward
														
 
															             if self.use_amp:
														
 
															                 with paddle.amp.auto_cast(
														
 
															-                        enable=self.cfg.use_gpu,
														
 
															+                        enable=self.cfg.use_gpu or self.cfg.use_npu or
														
 
															+                        self.cfg.use_mlu,
														
 
															                         custom_white_list=self.custom_white_list,
														
 
															                         custom_black_list=self.custom_black_list,
														
 
															                         level=self.amp_level):
														
@@ -722,11 +771,51 @@ class Trainer(object):
 
															                       output_dir='output',
														
 
															                       save_results=False,
														
 
															                       visualize=True):
														
 
															+        if not os.path.exists(output_dir):
														
 
															+            os.makedirs(output_dir)
														
 
															+
														
 
															         self.dataset.set_slice_images(images, slice_size, overlap_ratio)
														
 
															         loader = create('TestReader')(self.dataset, 0)
														
 
															-
														
 
															         imid2path = self.dataset.get_imid2path()
														
 
															+        def setup_metrics_for_loader():
														
 
															+            # mem
														
 
															+            metrics = copy.deepcopy(self._metrics)
														
 
															+            mode = self.mode
														
 
															+            save_prediction_only = self.cfg[
														
 
															+                'save_prediction_only'] if 'save_prediction_only' in self.cfg else None
														
 
															+            output_eval = self.cfg[
														
 
															+                'output_eval'] if 'output_eval' in self.cfg else None
														
 
															+
														
 
															+            # modify
														
 
															+            self.mode = '_test'
														
 
															+            self.cfg['save_prediction_only'] = True
														
 
															+            self.cfg['output_eval'] = output_dir
														
 
															+            self.cfg['imid2path'] = imid2path
														
 
															+            self._init_metrics()
														
 
															+
														
 
															+            # restore
														
 
															+            self.mode = mode
														
 
															+            self.cfg.pop('save_prediction_only')
														
 
															+            if save_prediction_only is not None:
														
 
															+                self.cfg['save_prediction_only'] = save_prediction_only
														
 
															+
														
 
															+            self.cfg.pop('output_eval')
														
 
															+            if output_eval is not None:
														
 
															+                self.cfg['output_eval'] = output_eval
														
 
															+
														
 
															+            self.cfg.pop('imid2path')
														
 
															+
														
 
															+            _metrics = copy.deepcopy(self._metrics)
														
 
															+            self._metrics = metrics
														
 
															+
														
 
															+            return _metrics
														
 
															+
														
 
															+        if save_results:
														
 
															+            metrics = setup_metrics_for_loader()
														
 
															+        else:
														
 
															+            metrics = []
														
 
															+
														
 
															         anno_file = self.dataset.get_anno()
														
 
															         clsid2catid, catid2name = get_categories(
														
 
															             self.cfg.metric, anno_file=anno_file)
														
@@ -772,6 +861,9 @@ class Trainer(object):
 
															                 merged_bboxs = []
														
 
															                 data['im_id'] = data['ori_im_id']
														
 
															+                for _m in metrics:
														
 
															+                    _m.update(data, merged_results)
														
 
															+
														
 
															                 for key in ['im_shape', 'scale_factor', 'im_id']:
														
 
															                     if isinstance(data, typing.Sequence):
														
 
															                         merged_results[key] = data[0][key]
														
@@ -782,23 +874,36 @@ class Trainer(object):
 
															                         merged_results[key] = value.numpy()
														
 
															                 results.append(merged_results)
														
 
															+        for _m in metrics:
														
 
															+            _m.accumulate()
														
 
															+            _m.reset()
														
 
															+
														
 
															         if visualize:
														
 
															             for outs in results:
														
 
															                 batch_res = get_infer_results(outs, clsid2catid)
														
 
															                 bbox_num = outs['bbox_num']
														
 
															+
														
 
															                 start = 0
														
 
															                 for i, im_id in enumerate(outs['im_id']):
														
 
															                     image_path = imid2path[int(im_id)]
														
 
															                     image = Image.open(image_path).convert('RGB')
														
 
															                     image = ImageOps.exif_transpose(image)
														
 
															                     self.status['original_image'] = np.array(image.copy())
														
 
															+
														
 
															                     end = start + bbox_num[i]
														
 
															                     bbox_res = batch_res['bbox'][start:end] \
														
 
															                             if 'bbox' in batch_res else None
														
 
															-                    mask_res, segm_res, keypoint_res = None, None, None
														
 
															+                    mask_res = batch_res['mask'][start:end] \
														
 
															+                            if 'mask' in batch_res else None
														
 
															+                    segm_res = batch_res['segm'][start:end] \
														
 
															+                            if 'segm' in batch_res else None
														
 
															+                    keypoint_res = batch_res['keypoint'][start:end] \
														
 
															+                            if 'keypoint' in batch_res else None
														
 
															+                    pose3d_res = batch_res['pose3d'][start:end] \
														
 
															+                            if 'pose3d' in batch_res else None
														
 
															                     image = visualize_results(
														
 
															                         image, bbox_res, mask_res, segm_res, keypoint_res,
														
 
															-                        int(im_id), catid2name, draw_threshold)
														
 
															+                        pose3d_res, int(im_id), catid2name, draw_threshold)
														
 
															                     self.status['result_image'] = np.array(image.copy())
														
 
															                     if self._compose_callback:
														
 
															                         self._compose_callback.on_step_end(self.status)
														
@@ -808,6 +913,7 @@ class Trainer(object):
 
															                     logger.info("Detection bbox results save in {}".format(
														
 
															                         save_name))
														
 
															                     image.save(save_name, quality=95)
														
 
															+
														
 
															                     start = end
														
 
															     def predict(self,
														
@@ -921,9 +1027,11 @@ class Trainer(object):
 
															                             if 'segm' in batch_res else None
														
 
															                     keypoint_res = batch_res['keypoint'][start:end] \
														
 
															                             if 'keypoint' in batch_res else None
														
 
															+                    pose3d_res = batch_res['pose3d'][start:end] \
														
 
															+                            if 'pose3d' in batch_res else None
														
 
															                     image = visualize_results(
														
 
															                         image, bbox_res, mask_res, segm_res, keypoint_res,
														
 
															-                        int(im_id), catid2name, draw_threshold)
														
 
															+                        pose3d_res, int(im_id), catid2name, draw_threshold)
														
 
															                     self.status['result_image'] = np.array(image.copy())
														
 
															                     if self._compose_callback:
														
 
															                         self._compose_callback.on_step_end(self.status)
														
@@ -935,6 +1043,7 @@ class Trainer(object):
 
															                     image.save(save_name, quality=95)
														
 
															                     start = end
														
 
															+        return results
														
 
															     def _get_save_image_name(self, output_dir, image_path):
														
 
															         """
														
@@ -976,6 +1085,10 @@ class Trainer(object):
 
															                 if hasattr(layer, 'convert_to_deploy'):
														
 
															                     layer.convert_to_deploy()
														
 
															+        if hasattr(self.cfg, 'export') and 'fuse_conv_bn' in self.cfg[
														
 
															+                'export'] and self.cfg['export']['fuse_conv_bn']:
														
 
															+            self.model = fuse_conv_bn(self.model)
														
 
															+
														
 
															         export_post_process = self.cfg['export'].get(
														
 
															             'post_process', False) if hasattr(self.cfg, 'export') else True
														
 
															         export_nms = self.cfg['export'].get('nms', False) if hasattr(
														
@@ -1045,12 +1158,12 @@ class Trainer(object):
 
															         return static_model, pruned_input_spec
														
 
															     def export(self, output_dir='output_inference'):
														
 
															+        if hasattr(self.model, 'aux_neck'):
														
 
															+            self.model.__delattr__('aux_neck')
														
 
															+        if hasattr(self.model, 'aux_head'):
														
 
															+            self.model.__delattr__('aux_head')
														
 
															         self.model.eval()
														
 
															-        if hasattr(self.cfg, 'export') and 'fuse_conv_bn' in self.cfg[
														
 
															-                'export'] and self.cfg['export']['fuse_conv_bn']:
														
 
															-            self.model = fuse_conv_bn(self.model)
														
 
															-
														
 
															         model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
														
 
															         save_dir = os.path.join(output_dir, model_name)
														
 
															         if not os.path.exists(save_dir):
														
@@ -1095,6 +1208,10 @@ class Trainer(object):
 
															         logger.info("Export Post-Quant model and saved in {}".format(save_dir))
														
 
															     def _flops(self, loader):
														
 
															+        if hasattr(self.model, 'aux_neck'):
														
 
															+            self.model.__delattr__('aux_neck')
														
 
															+        if hasattr(self.model, 'aux_head'):
														
 
															+            self.model.__delattr__('aux_head')
														
 
															         self.model.eval()
														
 
															         try:
														
 
															             import paddleslim
														
--- a/paddlers/models/ppdet/engine/trainer_cot.py
+++ b/paddlers/models/ppdet/engine/trainer_cot.py
@@ -0,0 +1,42 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import create
														
 
															+from paddlers.models.ppdet.utils.logger import setup_logger
														
 
															+logger = setup_logger('ppdet.engine')
														
 
															+
														
 
															+from . import Trainer
														
 
															+__all__ = ['TrainerCot']
														
 
															+
														
 
															+class TrainerCot(Trainer):
														
 
															+    """
														
 
															+    Trainer for label-cotuning
														
 
															+    calculate the relationship between base_classes and novel_classes
														
 
															+    """
														
 
															+    def __init__(self, cfg, mode='train'):
														
 
															+        super(TrainerCot, self).__init__(cfg, mode)
														
 
															+        self.cotuning_init()
														
 
															+
														
 
															+    def cotuning_init(self):    
														
 
															+        num_classes_novel = self.cfg['num_classes']
														
 
															+
														
 
															+        self.load_weights(self.cfg.pretrain_weights)
														
 
															+
														
 
															+        self.model.eval()
														
 
															+        relationship = self.model.relationship_learning(self.loader, num_classes_novel)
														
 
															+    
														
 
															+        self.model.init_cot_head(relationship)
														
 
															+        self.optimizer = create('OptimizerBuilder')(self.lr, self.model)
														
 
															+
														
 
															+
														
--- a/paddlers/models/ppdet/engine/trainer_ssod.py
+++ b/paddlers/models/ppdet/engine/trainer_ssod.py
@@ -0,0 +1,475 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import copy
														
 
															+import time
														
 
															+import typing
														
 
															+import numpy as np
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.distributed as dist
														
 
															+from paddle.distributed import fleet
														
 
															+from paddlers.models.ppdet.optimizer import ModelEMA, SimpleModelEMA
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import create
														
 
															+from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
														
 
															+import paddlers.models.ppdet.utils.stats as stats
														
 
															+from paddlers.models.ppdet.utils import profiler
														
 
															+from paddlers.models.ppdet.modeling.ssod.utils import align_weak_strong_shape
														
 
															+from .trainer import Trainer
														
 
															+
														
 
															+from paddlers.models.ppdet.utils.logger import setup_logger
														
 
															+logger = setup_logger('ppdet.engine')
														
 
															+
														
 
															+__all__ = ['Trainer_DenseTeacher']
														
 
															+
														
 
															+
														
 
															+class Trainer_DenseTeacher(Trainer):
														
 
															+    def __init__(self, cfg, mode='train'):
														
 
															+        self.cfg = cfg
														
 
															+        assert mode.lower() in ['train', 'eval', 'test'], \
														
 
															+                "mode should be 'train', 'eval' or 'test'"
														
 
															+        self.mode = mode.lower()
														
 
															+        self.optimizer = None
														
 
															+        self.is_loaded_weights = False
														
 
															+        self.use_amp = self.cfg.get('amp', False)
														
 
															+        self.amp_level = self.cfg.get('amp_level', 'O1')
														
 
															+        self.custom_white_list = self.cfg.get('custom_white_list', None)
														
 
															+        self.custom_black_list = self.cfg.get('custom_black_list', None)
														
 
															+
														
 
															+        # build data loader
														
 
															+        capital_mode = self.mode.capitalize()
														
 
															+        self.dataset = self.cfg['{}Dataset'.format(capital_mode)] = create(
														
 
															+            '{}Dataset'.format(capital_mode))()
														
 
															+
														
 
															+        if self.mode == 'train':
														
 
															+            self.dataset_unlabel = self.cfg['UnsupTrainDataset'] = create(
														
 
															+                'UnsupTrainDataset')
														
 
															+            self.loader = create('SemiTrainReader')(
														
 
															+                self.dataset, self.dataset_unlabel, cfg.worker_num)
														
 
															+
														
 
															+        # build model
														
 
															+        if 'model' not in self.cfg:
														
 
															+            self.model = create(cfg.architecture)
														
 
															+        else:
														
 
															+            self.model = self.cfg.model
														
 
															+            self.is_loaded_weights = True
														
 
															+
														
 
															+        # EvalDataset build with BatchSampler to evaluate in single device
														
 
															+        # TODO: multi-device evaluate
														
 
															+        if self.mode == 'eval':
														
 
															+            self._eval_batch_sampler = paddle.io.BatchSampler(
														
 
															+                self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
														
 
															+            # If metric is VOC, need to be set collate_batch=False.
														
 
															+            if cfg.metric == 'VOC':
														
 
															+                cfg['EvalReader']['collate_batch'] = False
														
 
															+            self.loader = create('EvalReader')(self.dataset, cfg.worker_num,
														
 
															+                                               self._eval_batch_sampler)
														
 
															+        # TestDataset build after user set images, skip loader creation here
														
 
															+
														
 
															+        # build optimizer in train mode
														
 
															+        if self.mode == 'train':
														
 
															+            steps_per_epoch = len(self.loader)
														
 
															+            if steps_per_epoch < 1:
														
 
															+                logger.warning(
														
 
															+                    "Samples in dataset are less than batch_size, please set smaller batch_size in TrainReader."
														
 
															+                )
														
 
															+            self.lr = create('LearningRate')(steps_per_epoch)
														
 
															+            self.optimizer = create('OptimizerBuilder')(self.lr, self.model)
														
 
															+
														
 
															+            # Unstructured pruner is only enabled in the train mode.
														
 
															+            if self.cfg.get('unstructured_prune'):
														
 
															+                self.pruner = create('UnstructuredPruner')(self.model,
														
 
															+                                                           steps_per_epoch)
														
 
															+        if self.use_amp and self.amp_level == 'O2':
														
 
															+            self.model, self.optimizer = paddle.amp.decorate(
														
 
															+                models=self.model,
														
 
															+                optimizers=self.optimizer,
														
 
															+                level=self.amp_level)
														
 
															+
														
 
															+        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
														
 
															+        if self.use_ema:
														
 
															+            ema_decay = self.cfg.get('ema_decay', 0.9998)
														
 
															+            ema_decay_type = self.cfg.get('ema_decay_type', 'threshold')
														
 
															+            cycle_epoch = self.cfg.get('cycle_epoch', -1)
														
 
															+            ema_black_list = self.cfg.get('ema_black_list', None)
														
 
															+            self.ema = ModelEMA(
														
 
															+                self.model,
														
 
															+                decay=ema_decay,
														
 
															+                ema_decay_type=ema_decay_type,
														
 
															+                cycle_epoch=cycle_epoch,
														
 
															+                ema_black_list=ema_black_list)
														
 
															+            self.ema_start_iters = self.cfg.get('ema_start_iters', 0)
														
 
															+
														
 
															+        # simple_ema for SSOD
														
 
															+        self.use_simple_ema = ('use_simple_ema' in cfg and
														
 
															+                               cfg['use_simple_ema'])
														
 
															+        if self.use_simple_ema:
														
 
															+            self.use_ema = True
														
 
															+            ema_decay = self.cfg.get('ema_decay', 0.9996)
														
 
															+            self.ema = SimpleModelEMA(self.model, decay=ema_decay)
														
 
															+            self.ema_start_iters = self.cfg.get('ema_start_iters', 0)
														
 
															+
														
 
															+        self._nranks = dist.get_world_size()
														
 
															+        self._local_rank = dist.get_rank()
														
 
															+
														
 
															+        self.status = {}
														
 
															+
														
 
															+        self.start_epoch = 0
														
 
															+        self.end_epoch = 0 if 'epoch' not in cfg else cfg.epoch
														
 
															+
														
 
															+        # initial default callbacks
														
 
															+        self._init_callbacks()
														
 
															+
														
 
															+        # initial default metrics
														
 
															+        self._init_metrics()
														
 
															+        self._reset_metrics()
														
 
															+
														
 
															+    def load_weights(self, weights):
														
 
															+        if self.is_loaded_weights:
														
 
															+            return
														
 
															+        self.start_epoch = 0
														
 
															+        load_pretrain_weight(self.model, weights)
														
 
															+        load_pretrain_weight(self.ema.model, weights)
														
 
															+        logger.info("Load weights {} to start training for teacher and student".
														
 
															+                    format(weights))
														
 
															+
														
 
															+    def resume_weights(self, weights, exchange=True):
														
 
															+        # support Distill resume weights
														
 
															+        if hasattr(self.model, 'student_model'):
														
 
															+            self.start_epoch = load_weight(self.model.student_model, weights,
														
 
															+                                           self.optimizer, exchange)
														
 
															+        else:
														
 
															+            self.start_epoch = load_weight(self.model, weights, self.optimizer,
														
 
															+                                           self.ema
														
 
															+                                           if self.use_ema else None, exchange)
														
 
															+        logger.debug("Resume weights of epoch {}".format(self.start_epoch))
														
 
															+
														
 
															+    def train(self, validate=False):
														
 
															+        self.semi_start_iters = self.cfg.get('semi_start_iters', 5000)
														
 
															+        Init_mark = False
														
 
															+        if validate:
														
 
															+            self.cfg['EvalDataset'] = self.cfg.EvalDataset = create(
														
 
															+                "EvalDataset")()
														
 
															+
														
 
															+        sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
														
 
															+                   self.cfg.use_gpu and self._nranks > 1)
														
 
															+        if sync_bn:
														
 
															+            self.model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(
														
 
															+                self.model)
														
 
															+
														
 
															+        if self.cfg.get('fleet', False):
														
 
															+            self.model = fleet.distributed_model(self.model)
														
 
															+            self.optimizer = fleet.distributed_optimizer(self.optimizer)
														
 
															+        elif self._nranks > 1:
														
 
															+            find_unused_parameters = self.cfg[
														
 
															+                'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
														
 
															+            self.model = paddle.DataParallel(
														
 
															+                self.model, find_unused_parameters=find_unused_parameters)
														
 
															+            self.ema.model = paddle.DataParallel(
														
 
															+                self.ema.model, find_unused_parameters=find_unused_parameters)
														
 
															+
														
 
															+        self.status.update({
														
 
															+            'epoch_id': self.start_epoch,
														
 
															+            'step_id': 0,
														
 
															+            'steps_per_epoch': len(self.loader),
														
 
															+            'exchange_save_model': True,
														
 
															+        })
														
 
															+        # Note: exchange_save_model
														
 
															+        # in DenseTeacher SSOD, the teacher model will be higher, so exchange when saving pdparams
														
 
															+
														
 
															+        self.status['batch_time'] = stats.SmoothedValue(
														
 
															+            self.cfg.log_iter, fmt='{avg:.4f}')
														
 
															+        self.status['data_time'] = stats.SmoothedValue(
														
 
															+            self.cfg.log_iter, fmt='{avg:.4f}')
														
 
															+        self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
														
 
															+
														
 
															+        if self.cfg.get('print_flops', False):
														
 
															+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
														
 
															+                self.dataset, self.cfg.worker_num)
														
 
															+            self._flops(flops_loader)
														
 
															+        profiler_options = self.cfg.get('profiler_options', None)
														
 
															+        self._compose_callback.on_train_begin(self.status)
														
 
															+
														
 
															+        train_cfg = self.cfg.DenseTeacher['train_cfg']
														
 
															+        concat_sup_data = train_cfg.get('concat_sup_data', True)
														
 
															+
														
 
															+        for param in self.ema.model.parameters():
														
 
															+            param.stop_gradient = True
														
 
															+
														
 
															+        for epoch_id in range(self.start_epoch, self.cfg.epoch):
														
 
															+            self.status['mode'] = 'train'
														
 
															+            self.status['epoch_id'] = epoch_id
														
 
															+            self._compose_callback.on_epoch_begin(self.status)
														
 
															+            self.loader.dataset_label.set_epoch(epoch_id)
														
 
															+            self.loader.dataset_unlabel.set_epoch(epoch_id)
														
 
															+            iter_tic = time.time()
														
 
															+            loss_dict = {
														
 
															+                'loss': paddle.to_tensor([0]),
														
 
															+                'loss_sup_sum': paddle.to_tensor([0]),
														
 
															+                'loss_unsup_sum': paddle.to_tensor([0]),
														
 
															+                'fg_sum': paddle.to_tensor([0]),
														
 
															+            }
														
 
															+            if self._nranks > 1:
														
 
															+                for k in self.model._layers.get_loss_keys():
														
 
															+                    loss_dict.update({k: paddle.to_tensor([0.])})
														
 
															+                for k in self.model._layers.get_loss_keys():
														
 
															+                    loss_dict.update({'distill_' + k: paddle.to_tensor([0.])})
														
 
															+            else:
														
 
															+                for k in self.model.get_loss_keys():
														
 
															+                    loss_dict.update({k: paddle.to_tensor([0.])})
														
 
															+                for k in self.model.get_loss_keys():
														
 
															+                    loss_dict.update({'distill_' + k: paddle.to_tensor([0.])})
														
 
															+
														
 
															+            # Note: for step_id, data in enumerate(self.loader): # enumerate bug
														
 
															+            for step_id in range(len(self.loader)):
														
 
															+                data = next(self.loader)
														
 
															+
														
 
															+                self.model.train()
														
 
															+                self.ema.model.eval()
														
 
															+                data_sup_w, data_sup_s, data_unsup_w, data_unsup_s = data
														
 
															+
														
 
															+                self.status['data_time'].update(time.time() - iter_tic)
														
 
															+                self.status['step_id'] = step_id
														
 
															+                profiler.add_profiler_step(profiler_options)
														
 
															+                self._compose_callback.on_step_begin(self.status)
														
 
															+
														
 
															+                if data_sup_w['image'].shape != data_sup_s['image'].shape:
														
 
															+                    data_sup_w, data_sup_s = align_weak_strong_shape(data_sup_w,
														
 
															+                                                                     data_sup_s)
														
 
															+
														
 
															+                data_sup_w['epoch_id'] = epoch_id
														
 
															+                data_sup_s['epoch_id'] = epoch_id
														
 
															+                if concat_sup_data:
														
 
															+                    for k, v in data_sup_s.items():
														
 
															+                        if k in ['epoch_id']:
														
 
															+                            continue
														
 
															+                        data_sup_s[k] = paddle.concat([v, data_sup_w[k]])
														
 
															+                    loss_dict_sup = self.model(data_sup_s)
														
 
															+                else:
														
 
															+                    loss_dict_sup_w = self.model(data_sup_w)
														
 
															+                    loss_dict_sup = self.model(data_sup_s)
														
 
															+                    for k, v in loss_dict_sup_w.items():
														
 
															+                        loss_dict_sup[k] = (loss_dict_sup[k] + v) * 0.5
														
 
															+
														
 
															+                losses_sup = loss_dict_sup['loss'] * train_cfg['sup_weight']
														
 
															+                losses_sup.backward()
														
 
															+
														
 
															+                losses = losses_sup.detach()
														
 
															+                loss_dict.update(loss_dict_sup)
														
 
															+                loss_dict.update({'loss_sup_sum': loss_dict['loss']})
														
 
															+
														
 
															+                curr_iter = len(self.loader) * epoch_id + step_id
														
 
															+                st_iter = self.semi_start_iters
														
 
															+                if curr_iter == st_iter:
														
 
															+                    logger.info("***" * 30)
														
 
															+                    logger.info('Semi starting ...')
														
 
															+                    logger.info("***" * 30)
														
 
															+                if curr_iter > st_iter:
														
 
															+                    unsup_weight = train_cfg['unsup_weight']
														
 
															+                    if train_cfg['suppress'] == 'linear':
														
 
															+                        tar_iter = st_iter * 2
														
 
															+                        if curr_iter <= tar_iter:
														
 
															+                            unsup_weight *= (curr_iter - st_iter) / st_iter
														
 
															+                    elif train_cfg['suppress'] == 'exp':
														
 
															+                        tar_iter = st_iter + 2000
														
 
															+                        if curr_iter <= tar_iter:
														
 
															+                            scale = np.exp((curr_iter - tar_iter) / 1000)
														
 
															+                            unsup_weight *= scale
														
 
															+                    elif train_cfg['suppress'] == 'step':
														
 
															+                        tar_iter = st_iter * 2
														
 
															+                        if curr_iter <= tar_iter:
														
 
															+                            unsup_weight *= 0.25
														
 
															+                    else:
														
 
															+                        raise ValueError
														
 
															+
														
 
															+                    if data_unsup_w['image'].shape != data_unsup_s[
														
 
															+                            'image'].shape:
														
 
															+                        data_unsup_w, data_unsup_s = align_weak_strong_shape(
														
 
															+                            data_unsup_w, data_unsup_s)
														
 
															+
														
 
															+                    data_unsup_w['epoch_id'] = epoch_id
														
 
															+                    data_unsup_s['epoch_id'] = epoch_id
														
 
															+
														
 
															+                    data_unsup_s['get_data'] = True
														
 
															+                    student_preds = self.model(data_unsup_s)
														
 
															+
														
 
															+                    with paddle.no_grad():
														
 
															+                        data_unsup_w['is_teacher'] = True
														
 
															+                        teacher_preds = self.ema.model(data_unsup_w)
														
 
															+
														
 
															+                    train_cfg['curr_iter'] = curr_iter
														
 
															+                    train_cfg['st_iter'] = st_iter
														
 
															+                    if self._nranks > 1:
														
 
															+                        loss_dict_unsup = self.model._layers.get_ssod_loss(
														
 
															+                            student_preds, teacher_preds, train_cfg)
														
 
															+                    else:
														
 
															+                        loss_dict_unsup = self.model.get_ssod_loss(
														
 
															+                            student_preds, teacher_preds, train_cfg)
														
 
															+
														
 
															+                    fg_num = loss_dict_unsup["fg_sum"]
														
 
															+                    del loss_dict_unsup["fg_sum"]
														
 
															+                    distill_weights = train_cfg['loss_weight']
														
 
															+                    loss_dict_unsup = {
														
 
															+                        k: v * distill_weights[k]
														
 
															+                        for k, v in loss_dict_unsup.items()
														
 
															+                    }
														
 
															+
														
 
															+                    losses_unsup = sum([
														
 
															+                        metrics_value
														
 
															+                        for metrics_value in loss_dict_unsup.values()
														
 
															+                    ]) * unsup_weight
														
 
															+                    losses_unsup.backward()
														
 
															+
														
 
															+                    loss_dict.update(loss_dict_unsup)
														
 
															+                    loss_dict.update({'loss_unsup_sum': losses_unsup})
														
 
															+                    losses += losses_unsup.detach()
														
 
															+                    loss_dict.update({"fg_sum": fg_num})
														
 
															+                    loss_dict['loss'] = losses
														
 
															+
														
 
															+                self.optimizer.step()
														
 
															+                curr_lr = self.optimizer.get_lr()
														
 
															+                self.lr.step()
														
 
															+                self.optimizer.clear_grad()
														
 
															+                self.status['learning_rate'] = curr_lr
														
 
															+                if self._nranks < 2 or self._local_rank == 0:
														
 
															+                    self.status['training_staus'].update(loss_dict)
														
 
															+
														
 
															+                self.status['batch_time'].update(time.time() - iter_tic)
														
 
															+                self._compose_callback.on_step_end(self.status)
														
 
															+                # Note: ema_start_iters
														
 
															+                if self.use_ema and curr_iter == self.ema_start_iters:
														
 
															+                    logger.info("***" * 30)
														
 
															+                    logger.info('EMA starting ...')
														
 
															+                    logger.info("***" * 30)
														
 
															+                    self.ema.update(self.model, decay=0)
														
 
															+                elif self.use_ema and curr_iter > self.ema_start_iters:
														
 
															+                    self.ema.update(self.model)
														
 
															+                iter_tic = time.time()
														
 
															+
														
 
															+            is_snapshot = (self._nranks < 2 or self._local_rank == 0) \
														
 
															+                       and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 or epoch_id == self.end_epoch - 1)
														
 
															+            if is_snapshot and self.use_ema:
														
 
															+                # apply ema weight on model
														
 
															+                weight = copy.deepcopy(self.ema.model.state_dict())
														
 
															+                for k, v in weight.items():
														
 
															+                    if paddle.is_floating_point(v):
														
 
															+                        weight[k].stop_gradient = True
														
 
															+                self.status['weight'] = weight
														
 
															+
														
 
															+            self._compose_callback.on_epoch_end(self.status)
														
 
															+
														
 
															+            if validate and is_snapshot:
														
 
															+                if not hasattr(self, '_eval_loader'):
														
 
															+                    # build evaluation dataset and loader
														
 
															+                    self._eval_dataset = self.cfg.EvalDataset
														
 
															+                    self._eval_batch_sampler = \
														
 
															+                        paddle.io.BatchSampler(
														
 
															+                            self._eval_dataset,
														
 
															+                            batch_size=self.cfg.EvalReader['batch_size'])
														
 
															+                    # If metric is VOC, need to be set collate_batch=False.
														
 
															+                    if self.cfg.metric == 'VOC':
														
 
															+                        self.cfg['EvalReader']['collate_batch'] = False
														
 
															+                    self._eval_loader = create('EvalReader')(
														
 
															+                        self._eval_dataset,
														
 
															+                        self.cfg.worker_num,
														
 
															+                        batch_sampler=self._eval_batch_sampler)
														
 
															+                # if validation in training is enabled, metrics should be re-init
														
 
															+                # Init_mark makes sure this code will only execute once
														
 
															+                if validate and Init_mark == False:
														
 
															+                    Init_mark = True
														
 
															+                    self._init_metrics(validate=validate)
														
 
															+                    self._reset_metrics()
														
 
															+
														
 
															+                with paddle.no_grad():
														
 
															+                    self.status['save_best_model'] = True
														
 
															+                    self._eval_with_loader(self._eval_loader)
														
 
															+
														
 
															+            if is_snapshot and self.use_ema:
														
 
															+                self.status.pop('weight')
														
 
															+
														
 
															+        self._compose_callback.on_train_end(self.status)
														
 
															+
														
 
															+    def evaluate(self):
														
 
															+        # get distributed model
														
 
															+        if self.cfg.get('fleet', False):
														
 
															+            self.model = fleet.distributed_model(self.model)
														
 
															+            self.optimizer = fleet.distributed_optimizer(self.optimizer)
														
 
															+        elif self._nranks > 1:
														
 
															+            find_unused_parameters = self.cfg[
														
 
															+                'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
														
 
															+            self.model = paddle.DataParallel(
														
 
															+                self.model, find_unused_parameters=find_unused_parameters)
														
 
															+        with paddle.no_grad():
														
 
															+            self._eval_with_loader(self.loader)
														
 
															+
														
 
															+    def _eval_with_loader(self, loader):
														
 
															+        sample_num = 0
														
 
															+        tic = time.time()
														
 
															+        self._compose_callback.on_epoch_begin(self.status)
														
 
															+        self.status['mode'] = 'eval'
														
 
															+
														
 
															+        test_cfg = self.cfg.DenseTeacher['test_cfg']
														
 
															+        if test_cfg['inference_on'] == 'teacher':
														
 
															+            logger.info("***** teacher model evaluating *****")
														
 
															+            eval_model = self.ema.model
														
 
															+        else:
														
 
															+            logger.info("***** student model evaluating *****")
														
 
															+            eval_model = self.model
														
 
															+
														
 
															+        eval_model.eval()
														
 
															+        if self.cfg.get('print_flops', False):
														
 
															+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
														
 
															+                self.dataset, self.cfg.worker_num, self._eval_batch_sampler)
														
 
															+            self._flops(flops_loader)
														
 
															+        for step_id, data in enumerate(loader):
														
 
															+            self.status['step_id'] = step_id
														
 
															+            self._compose_callback.on_step_begin(self.status)
														
 
															+            # forward
														
 
															+            if self.use_amp:
														
 
															+                with paddle.amp.auto_cast(
														
 
															+                        enable=self.cfg.use_gpu or self.cfg.use_mlu,
														
 
															+                        custom_white_list=self.custom_white_list,
														
 
															+                        custom_black_list=self.custom_black_list,
														
 
															+                        level=self.amp_level):
														
 
															+                    outs = eval_model(data)
														
 
															+            else:
														
 
															+                outs = eval_model(data)
														
 
															+
														
 
															+            # update metrics
														
 
															+            for metric in self._metrics:
														
 
															+                metric.update(data, outs)
														
 
															+
														
 
															+            # multi-scale inputs: all inputs have same im_id
														
 
															+            if isinstance(data, typing.Sequence):
														
 
															+                sample_num += data[0]['im_id'].numpy().shape[0]
														
 
															+            else:
														
 
															+                sample_num += data['im_id'].numpy().shape[0]
														
 
															+            self._compose_callback.on_step_end(self.status)
														
 
															+
														
 
															+        self.status['sample_num'] = sample_num
														
 
															+        self.status['cost_time'] = time.time() - tic
														
 
															+
														
 
															+        # accumulate metric to log out
														
 
															+        for metric in self._metrics:
														
 
															+            metric.accumulate()
														
 
															+            metric.log()
														
 
															+        self._compose_callback.on_epoch_end(self.status)
														
 
															+        # reset metric states for metric may performed multiple times
														
 
															+        self._reset_metrics()
														
--- a/paddlers/models/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cc
+++ b/paddlers/models/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cc
@@ -13,14 +13,14 @@
 
															 // limitations under the License.
														
 
															 //
														
 
															 // The code is based on
														
 
															-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
														
 
															+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
														
 
															+#include "../rbox_iou/rbox_iou_utils.h"
														
 
															 #include "paddle/extension.h"
														
 
															-#include "rbox_iou_op.h"
														
 
															 template <typename T>
														
 
															 void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr,
														
 
															-                            const T *rbox2_data_ptr, T *output_data_ptr) {
														
 
															+                                 const T *rbox2_data_ptr, T *output_data_ptr) {
														
 
															   int i;
														
 
															   for (i = 0; i < rbox_num; i++) {
														
@@ -30,42 +30,43 @@ void matched_rbox_iou_cpu_kernel(const int rbox_num, const T *rbox1_data_ptr,
 
															 }
														
 
															 #define CHECK_INPUT_CPU(x)                                                     \
														
 
															-  PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
														
 
															+  PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
														
 
															-std::vector<paddle::Tensor> MatchedRboxIouCPUForward(const paddle::Tensor &rbox1,
														
 
															-                                                 const paddle::Tensor &rbox2) {
														
 
															+std::vector<paddle::Tensor>
														
 
															+MatchedRboxIouCPUForward(const paddle::Tensor &rbox1,
														
 
															+                         const paddle::Tensor &rbox2) {
														
 
															   CHECK_INPUT_CPU(rbox1);
														
 
															   CHECK_INPUT_CPU(rbox2);
														
 
															   PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
														
 
															   auto rbox_num = rbox1.shape()[0];
														
 
															-  auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox_num});
														
 
															+  auto output = paddle::empty({rbox_num}, rbox1.dtype(), paddle::CPUPlace());
														
 
															-  PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rotated_iou_cpu_kernel", ([&] {
														
 
															+  PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "matched_rbox_iou_cpu_kernel", ([&] {
														
 
															                                matched_rbox_iou_cpu_kernel<data_t>(
														
 
															                                    rbox_num, rbox1.data<data_t>(),
														
 
															-                                   rbox2.data<data_t>(),
														
 
															-                                   output.mutable_data<data_t>());
														
 
															+                                   rbox2.data<data_t>(), output.data<data_t>());
														
 
															                              }));
														
 
															   return {output};
														
 
															 }
														
 
															 #ifdef PADDLE_WITH_CUDA
														
 
															-std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
														
 
															-                                                  const paddle::Tensor &rbox2);
														
 
															+std::vector<paddle::Tensor>
														
 
															+MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
														
 
															+                          const paddle::Tensor &rbox2);
														
 
															 #endif
														
 
															 #define CHECK_INPUT_SAME(x1, x2)                                               \
														
 
															   PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
														
 
															 std::vector<paddle::Tensor> MatchedRboxIouForward(const paddle::Tensor &rbox1,
														
 
															-                                              const paddle::Tensor &rbox2) {
														
 
															+                                                  const paddle::Tensor &rbox2) {
														
 
															   CHECK_INPUT_SAME(rbox1, rbox2);
														
 
															-  if (rbox1.place() == paddle::PlaceType::kCPU) {
														
 
															+  if (rbox1.is_cpu()) {
														
 
															     return MatchedRboxIouCPUForward(rbox1, rbox2);
														
 
															 #ifdef PADDLE_WITH_CUDA
														
 
															-  } else if (rbox1.place() == paddle::PlaceType::kGPU) {
														
 
															+  } else if (rbox1.is_gpu()) {
														
 
															     return MatchedRboxIouCUDAForward(rbox1, rbox2);
														
 
															 #endif
														
 
															   }
														
@@ -73,12 +74,12 @@ std::vector<paddle::Tensor> MatchedRboxIouForward(const paddle::Tensor &rbox1,
 
															 std::vector<std::vector<int64_t>>
														
 
															 MatchedRboxIouInferShape(std::vector<int64_t> rbox1_shape,
														
 
															-                     std::vector<int64_t> rbox2_shape) {
														
 
															+                         std::vector<int64_t> rbox2_shape) {
														
 
															   return {{rbox1_shape[0]}};
														
 
															 }
														
 
															 std::vector<paddle::DataType> MatchedRboxIouInferDtype(paddle::DataType t1,
														
 
															-                                                   paddle::DataType t2) {
														
 
															+                                                       paddle::DataType t2) {
														
 
															   return {t1};
														
 
															 }
														
--- a/paddlers/models/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cu
+++ b/paddlers/models/ppdet/ext_op/csrc/matched_rbox_iou/matched_rbox_iou.cu
@@ -13,21 +13,15 @@
 
															 // limitations under the License.
														
 
															 //
														
 
															 // The code is based on
														
 
															-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
														
 
															+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
														
 
															+#include "../rbox_iou/rbox_iou_utils.h"
														
 
															 #include "paddle/extension.h"
														
 
															-#include "rbox_iou_op.h"
														
 
															-
														
 
															-/**
														
 
															-   Computes ceil(a / b)
														
 
															-*/
														
 
															-
														
 
															-static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
														
 
															 template <typename T>
														
 
															 __global__ void
														
 
															 matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr,
														
 
															-                        const T *rbox2_data_ptr, T *output_data_ptr) {
														
 
															+                             const T *rbox2_data_ptr, T *output_data_ptr) {
														
 
															   for (int tid = blockIdx.x * blockDim.x + threadIdx.x; tid < rbox_num;
														
 
															        tid += blockDim.x * gridDim.x) {
														
 
															     output_data_ptr[tid] =
														
@@ -36,17 +30,18 @@ matched_rbox_iou_cuda_kernel(const int rbox_num, const T *rbox1_data_ptr,
 
															 }
														
 
															 #define CHECK_INPUT_GPU(x)                                                     \
														
 
															-  PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
														
 
															+  PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
														
 
															-std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
														
 
															-                                                  const paddle::Tensor &rbox2) {
														
 
															+std::vector<paddle::Tensor>
														
 
															+MatchedRboxIouCUDAForward(const paddle::Tensor &rbox1,
														
 
															+                          const paddle::Tensor &rbox2) {
														
 
															   CHECK_INPUT_GPU(rbox1);
														
 
															   CHECK_INPUT_GPU(rbox2);
														
 
															   PD_CHECK(rbox1.shape()[0] == rbox2.shape()[0], "inputs must be same dim");
														
 
															   auto rbox_num = rbox1.shape()[0];
														
 
															-  auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox_num});
														
 
															+  auto output = paddle::empty({rbox_num}, rbox1.dtype(), paddle::GPUPlace());
														
 
															   const int thread_per_block = 512;
														
 
															   const int block_per_grid = CeilDiv(rbox_num, thread_per_block);
														
@@ -56,7 +51,7 @@ std::vector<paddle::Tensor> MatchedRboxIouCUDAForward(const paddle::Tensor &rbox
 
															         matched_rbox_iou_cuda_kernel<
														
 
															             data_t><<<block_per_grid, thread_per_block, 0, rbox1.stream()>>>(
														
 
															             rbox_num, rbox1.data<data_t>(), rbox2.data<data_t>(),
														
 
															-            output.mutable_data<data_t>());
														
 
															+            output.data<data_t>());
														
 
															       }));
														
 
															   return {output};
														
--- a/paddlers/models/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cc
+++ b/paddlers/models/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cc
@@ -0,0 +1,121 @@
 
															+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include "../rbox_iou/rbox_iou_utils.h"
														
 
															+#include "paddle/extension.h"
														
 
															+
														
 
															+template <typename T>
														
 
															+void nms_rotated_cpu_kernel(const T *boxes_data, const float threshold,
														
 
															+                            const int64_t num_boxes, int64_t *num_keep_boxes,
														
 
															+                            int64_t *output_data) {
														
 
															+
														
 
															+  int num_masks = CeilDiv(num_boxes, 64);
														
 
															+  std::vector<int64_t> masks(num_masks, 0);
														
 
															+  for (int64_t i = 0; i < num_boxes; ++i) {
														
 
															+    if (masks[i / 64] & 1ULL << (i % 64))
														
 
															+      continue;
														
 
															+    T box_1[5];
														
 
															+    for (int k = 0; k < 5; ++k) {
														
 
															+      box_1[k] = boxes_data[i * 5 + k];
														
 
															+    }
														
 
															+    for (int64_t j = i + 1; j < num_boxes; ++j) {
														
 
															+      if (masks[j / 64] & 1ULL << (j % 64))
														
 
															+        continue;
														
 
															+      T box_2[5];
														
 
															+      for (int k = 0; k < 5; ++k) {
														
 
															+        box_2[k] = boxes_data[j * 5 + k];
														
 
															+      }
														
 
															+      if (rbox_iou_single<T>(box_1, box_2) > threshold) {
														
 
															+        masks[j / 64] |= 1ULL << (j % 64);
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+  int64_t output_data_idx = 0;
														
 
															+  for (int64_t i = 0; i < num_boxes; ++i) {
														
 
															+    if (masks[i / 64] & 1ULL << (i % 64))
														
 
															+      continue;
														
 
															+    output_data[output_data_idx++] = i;
														
 
															+  }
														
 
															+  *num_keep_boxes = output_data_idx;
														
 
															+  for (; output_data_idx < num_boxes; ++output_data_idx) {
														
 
															+    output_data[output_data_idx] = 0;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+#define CHECK_INPUT_CPU(x)                                                     \
														
 
															+  PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
														
 
															+
														
 
															+std::vector<paddle::Tensor> NMSRotatedCPUForward(const paddle::Tensor &boxes,
														
 
															+                                                 const paddle::Tensor &scores,
														
 
															+                                                 float threshold) {
														
 
															+  CHECK_INPUT_CPU(boxes);
														
 
															+  CHECK_INPUT_CPU(scores);
														
 
															+
														
 
															+  auto num_boxes = boxes.shape()[0];
														
 
															+
														
 
															+  auto order_t =
														
 
															+      std::get<1>(paddle::argsort(scores, /* axis=*/0, /* descending=*/true));
														
 
															+  auto boxes_sorted = paddle::gather(boxes, order_t, /* axis=*/0);
														
 
															+
														
 
															+  auto keep =
														
 
															+      paddle::empty({num_boxes}, paddle::DataType::INT64, paddle::CPUPlace());
														
 
															+  int64_t num_keep_boxes = 0;
														
 
															+
														
 
															+  PD_DISPATCH_FLOATING_TYPES(boxes.type(), "nms_rotated_cpu_kernel", ([&] {
														
 
															+                               nms_rotated_cpu_kernel<data_t>(
														
 
															+                                   boxes_sorted.data<data_t>(), threshold,
														
 
															+                                   num_boxes, &num_keep_boxes,
														
 
															+                                   keep.data<int64_t>());
														
 
															+                             }));
														
 
															+
														
 
															+  keep = keep.slice(0, num_keep_boxes);
														
 
															+  return {paddle::gather(order_t, keep, /* axis=*/0)};
														
 
															+}
														
 
															+
														
 
															+#ifdef PADDLE_WITH_CUDA
														
 
															+std::vector<paddle::Tensor> NMSRotatedCUDAForward(const paddle::Tensor &boxes,
														
 
															+                                                  const paddle::Tensor &scores,
														
 
															+                                                  float threshold);
														
 
															+#endif
														
 
															+
														
 
															+std::vector<paddle::Tensor> NMSRotatedForward(const paddle::Tensor &boxes,
														
 
															+                                              const paddle::Tensor &scores,
														
 
															+                                              float threshold) {
														
 
															+  if (boxes.is_cpu()) {
														
 
															+    return NMSRotatedCPUForward(boxes, scores, threshold);
														
 
															+#ifdef PADDLE_WITH_CUDA
														
 
															+  } else if (boxes.is_gpu()) {
														
 
															+    return NMSRotatedCUDAForward(boxes, scores, threshold);
														
 
															+#endif
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+std::vector<std::vector<int64_t>>
														
 
															+NMSRotatedInferShape(std::vector<int64_t> boxes_shape,
														
 
															+                     std::vector<int64_t> scores_shape) {
														
 
															+  return {{-1}};
														
 
															+}
														
 
															+
														
 
															+std::vector<paddle::DataType> NMSRotatedInferDtype(paddle::DataType t1,
														
 
															+                                                   paddle::DataType t2) {
														
 
															+  return {paddle::DataType::INT64};
														
 
															+}
														
 
															+
														
 
															+PD_BUILD_OP(nms_rotated)
														
 
															+    .Inputs({"Boxes", "Scores"})
														
 
															+    .Outputs({"Output"})
														
 
															+    .Attrs({"threshold: float"})
														
 
															+    .SetKernelFn(PD_KERNEL(NMSRotatedForward))
														
 
															+    .SetInferShapeFn(PD_INFER_SHAPE(NMSRotatedInferShape))
														
 
															+    .SetInferDtypeFn(PD_INFER_DTYPE(NMSRotatedInferDtype));
														
--- a/paddlers/models/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cu
+++ b/paddlers/models/ppdet/ext_op/csrc/nms_rotated/nms_rotated.cu
@@ -0,0 +1,96 @@
 
															+//   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include "../rbox_iou/rbox_iou_utils.h"
														
 
															+#include "paddle/extension.h"
														
 
															+
														
 
															+static const int64_t threadsPerBlock = sizeof(int64_t) * 8;
														
 
															+
														
 
															+template <typename T>
														
 
															+__global__ void
														
 
															+nms_rotated_cuda_kernel(const T *boxes_data, const float threshold,
														
 
															+                        const int64_t num_boxes, int64_t *masks) {
														
 
															+  auto raw_start = blockIdx.y;
														
 
															+  auto col_start = blockIdx.x;
														
 
															+  if (raw_start > col_start)
														
 
															+    return;
														
 
															+  const int raw_last_storage =
														
 
															+      min(num_boxes - raw_start * threadsPerBlock, threadsPerBlock);
														
 
															+  const int col_last_storage =
														
 
															+      min(num_boxes - col_start * threadsPerBlock, threadsPerBlock);
														
 
															+  if (threadIdx.x < raw_last_storage) {
														
 
															+    int64_t mask = 0;
														
 
															+    auto current_box_idx = raw_start * threadsPerBlock + threadIdx.x;
														
 
															+    const T *current_box = boxes_data + current_box_idx * 5;
														
 
															+    for (int i = 0; i < col_last_storage; ++i) {
														
 
															+      const T *target_box = boxes_data + (col_start * threadsPerBlock + i) * 5;
														
 
															+      if (rbox_iou_single<T>(current_box, target_box) > threshold) {
														
 
															+        mask |= 1ULL << i;
														
 
															+      }
														
 
															+    }
														
 
															+    const int blocks_per_line = CeilDiv(num_boxes, threadsPerBlock);
														
 
															+    masks[current_box_idx * blocks_per_line + col_start] = mask;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+#define CHECK_INPUT_GPU(x)                                                     \
														
 
															+  PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
														
 
															+
														
 
															+std::vector<paddle::Tensor> NMSRotatedCUDAForward(const paddle::Tensor &boxes,
														
 
															+                                                  const paddle::Tensor &scores,
														
 
															+                                                  float threshold) {
														
 
															+  CHECK_INPUT_GPU(boxes);
														
 
															+  CHECK_INPUT_GPU(scores);
														
 
															+
														
 
															+  auto num_boxes = boxes.shape()[0];
														
 
															+  auto order_t =
														
 
															+      std::get<1>(paddle::argsort(scores, /* axis=*/0, /* descending=*/true));
														
 
															+  auto boxes_sorted = paddle::gather(boxes, order_t, /* axis=*/0);
														
 
															+
														
 
															+  const auto blocks_per_line = CeilDiv(num_boxes, threadsPerBlock);
														
 
															+  dim3 block(threadsPerBlock);
														
 
															+  dim3 grid(blocks_per_line, blocks_per_line);
														
 
															+  auto mask_dev = paddle::empty({num_boxes * blocks_per_line},
														
 
															+                                paddle::DataType::INT64, paddle::GPUPlace());
														
 
															+
														
 
															+  PD_DISPATCH_FLOATING_TYPES(
														
 
															+      boxes.type(), "nms_rotated_cuda_kernel", ([&] {
														
 
															+        nms_rotated_cuda_kernel<data_t><<<grid, block, 0, boxes.stream()>>>(
														
 
															+            boxes_sorted.data<data_t>(), threshold, num_boxes,
														
 
															+            mask_dev.data<int64_t>());
														
 
															+      }));
														
 
															+
														
 
															+  auto mask_host = mask_dev.copy_to(paddle::CPUPlace(), true);
														
 
															+  auto keep_host =
														
 
															+      paddle::empty({num_boxes}, paddle::DataType::INT64, paddle::CPUPlace());
														
 
															+  int64_t *keep_host_ptr = keep_host.data<int64_t>();
														
 
															+  int64_t *mask_host_ptr = mask_host.data<int64_t>();
														
 
															+  std::vector<int64_t> remv(blocks_per_line);
														
 
															+  int64_t last_box_num = 0;
														
 
															+  for (int64_t i = 0; i < num_boxes; ++i) {
														
 
															+    auto remv_element_id = i / threadsPerBlock;
														
 
															+    auto remv_bit_id = i % threadsPerBlock;
														
 
															+    if (!(remv[remv_element_id] & 1ULL << remv_bit_id)) {
														
 
															+      keep_host_ptr[last_box_num++] = i;
														
 
															+      int64_t *current_mask = mask_host_ptr + i * blocks_per_line;
														
 
															+      for (auto j = remv_element_id; j < blocks_per_line; ++j) {
														
 
															+        remv[j] |= current_mask[j];
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  keep_host = keep_host.slice(0, last_box_num);
														
 
															+  auto keep_dev = keep_host.copy_to(paddle::GPUPlace(), true);
														
 
															+  return {paddle::gather(order_t, keep_dev, /* axis=*/0)};
														
 
															+}
														
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cc
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou.cc
@@ -0,0 +1,95 @@
 
															+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+//
														
 
															+// The code is based on
														
 
															+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
														
 
															+
														
 
															+#include "paddle/extension.h"
														
 
															+#include "rbox_iou_utils.h"
														
 
															+
														
 
															+template <typename T>
														
 
															+void rbox_iou_cpu_kernel(const int rbox1_num, const int rbox2_num,
														
 
															+                         const T *rbox1_data_ptr, const T *rbox2_data_ptr,
														
 
															+                         T *output_data_ptr) {
														
 
															+
														
 
															+  int i, j;
														
 
															+  for (i = 0; i < rbox1_num; i++) {
														
 
															+    for (j = 0; j < rbox2_num; j++) {
														
 
															+      int offset = i * rbox2_num + j;
														
 
															+      output_data_ptr[offset] =
														
 
															+          rbox_iou_single<T>(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5);
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+#define CHECK_INPUT_CPU(x)                                                     \
														
 
															+  PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
														
 
															+
														
 
															+std::vector<paddle::Tensor> RboxIouCPUForward(const paddle::Tensor &rbox1,
														
 
															+                                              const paddle::Tensor &rbox2) {
														
 
															+  CHECK_INPUT_CPU(rbox1);
														
 
															+  CHECK_INPUT_CPU(rbox2);
														
 
															+
														
 
															+  auto rbox1_num = rbox1.shape()[0];
														
 
															+  auto rbox2_num = rbox2.shape()[0];
														
 
															+
														
 
															+  auto output =
														
 
															+      paddle::empty({rbox1_num, rbox2_num}, rbox1.dtype(), paddle::CPUPlace());
														
 
															+
														
 
															+  PD_DISPATCH_FLOATING_TYPES(rbox1.type(), "rbox_iou_cpu_kernel", ([&] {
														
 
															+                               rbox_iou_cpu_kernel<data_t>(
														
 
															+                                   rbox1_num, rbox2_num, rbox1.data<data_t>(),
														
 
															+                                   rbox2.data<data_t>(), output.data<data_t>());
														
 
															+                             }));
														
 
															+
														
 
															+  return {output};
														
 
															+}
														
 
															+
														
 
															+#ifdef PADDLE_WITH_CUDA
														
 
															+std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor &rbox1,
														
 
															+                                               const paddle::Tensor &rbox2);
														
 
															+#endif
														
 
															+
														
 
															+#define CHECK_INPUT_SAME(x1, x2)                                               \
														
 
															+  PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
														
 
															+
														
 
															+std::vector<paddle::Tensor> RboxIouForward(const paddle::Tensor &rbox1,
														
 
															+                                           const paddle::Tensor &rbox2) {
														
 
															+  CHECK_INPUT_SAME(rbox1, rbox2);
														
 
															+  if (rbox1.is_cpu()) {
														
 
															+    return RboxIouCPUForward(rbox1, rbox2);
														
 
															+#ifdef PADDLE_WITH_CUDA
														
 
															+  } else if (rbox1.is_gpu()) {
														
 
															+    return RboxIouCUDAForward(rbox1, rbox2);
														
 
															+#endif
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+std::vector<std::vector<int64_t>>
														
 
															+RboxIouInferShape(std::vector<int64_t> rbox1_shape,
														
 
															+                  std::vector<int64_t> rbox2_shape) {
														
 
															+  return {{rbox1_shape[0], rbox2_shape[0]}};
														
 
															+}
														
 
															+
														
 
															+std::vector<paddle::DataType> RboxIouInferDtype(paddle::DataType t1,
														
 
															+                                                paddle::DataType t2) {
														
 
															+  return {t1};
														
 
															+}
														
 
															+
														
 
															+PD_BUILD_OP(rbox_iou)
														
 
															+    .Inputs({"RBox1", "RBox2"})
														
 
															+    .Outputs({"Output"})
														
 
															+    .SetKernelFn(PD_KERNEL(RboxIouForward))
														
 
															+    .SetInferShapeFn(PD_INFER_SHAPE(RboxIouInferShape))
														
 
															+    .SetInferDtypeFn(PD_INFER_DTYPE(RboxIouInferDtype));
														
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cu
@@ -13,21 +13,15 @@
 
															 // limitations under the License.
														
 
															 //
														
 
															 // The code is based on
														
 
															-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
														
 
															+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
														
 
															 #include "paddle/extension.h"
														
 
															-#include "rbox_iou_op.h"
														
 
															+#include "rbox_iou_utils.h"
														
 
															 // 2D block with 32 * 16 = 512 threads per block
														
 
															 const int BLOCK_DIM_X = 32;
														
 
															 const int BLOCK_DIM_Y = 16;
														
 
															-/**
														
 
															-   Computes ceil(a / b)
														
 
															-*/
														
 
															-
														
 
															-static inline int CeilDiv(const int a, const int b) { return (a + b - 1) / b; }
														
 
															-
														
 
															 template <typename T>
														
 
															 __global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num,
														
 
															                                      const T *rbox1_data_ptr,
														
@@ -85,7 +79,7 @@ __global__ void rbox_iou_cuda_kernel(const int rbox1_num, const int rbox2_num,
 
															 }
														
 
															 #define CHECK_INPUT_GPU(x)                                                     \
														
 
															-  PD_CHECK(x.place() == paddle::PlaceType::kGPU, #x " must be a GPU Tensor.")
														
 
															+  PD_CHECK(x.is_gpu(), #x " must be a GPU Tensor.")
														
 
															 std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor &rbox1,
														
 
															                                                const paddle::Tensor &rbox2) {
														
@@ -95,7 +89,8 @@ std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor &rbox1,
 
															   auto rbox1_num = rbox1.shape()[0];
														
 
															   auto rbox2_num = rbox2.shape()[0];
														
 
															-  auto output = paddle::Tensor(paddle::PlaceType::kGPU, {rbox1_num, rbox2_num});
														
 
															+  auto output =
														
 
															+      paddle::empty({rbox1_num, rbox2_num}, rbox1.dtype(), paddle::GPUPlace());
														
 
															   const int blocks_x = CeilDiv(rbox1_num, BLOCK_DIM_X);
														
 
															   const int blocks_y = CeilDiv(rbox2_num, BLOCK_DIM_Y);
														
@@ -107,7 +102,7 @@ std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor &rbox1,
 
															       rbox1.type(), "rbox_iou_cuda_kernel", ([&] {
														
 
															         rbox_iou_cuda_kernel<data_t><<<blocks, threads, 0, rbox1.stream()>>>(
														
 
															             rbox1_num, rbox2_num, rbox1.data<data_t>(), rbox2.data<data_t>(),
														
 
															-            output.mutable_data<data_t>());
														
 
															+            output.data<data_t>());
														
 
															       }));
														
 
															   return {output};
														
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_op.cc
@@ -1,97 +0,0 @@
 
															-//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
														
 
															-//
														
 
															-// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-// you may not use this file except in compliance with the License.
														
 
															-// You may obtain a copy of the License at
														
 
															-//
														
 
															-//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															-//
														
 
															-// Unless required by applicable law or agreed to in writing, software
														
 
															-// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-// See the License for the specific language governing permissions and
														
 
															-// limitations under the License.
														
 
															-//
														
 
															-// The code is based on https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
														
 
															-
														
 
															-#include "rbox_iou_op.h"
														
 
															-#include "paddle/extension.h"
														
 
															-
														
 
															-
														
 
															-template <typename T>
														
 
															-void rbox_iou_cpu_kernel(
														
 
															-    const int rbox1_num,
														
 
															-    const int rbox2_num,
														
 
															-    const T* rbox1_data_ptr,
														
 
															-    const T* rbox2_data_ptr,
														
 
															-    T* output_data_ptr) {
														
 
															-
														
 
															-    int i, j;
														
 
															-    for (i = 0; i < rbox1_num; i++) {
														
 
															-        for (j = 0; j < rbox2_num; j++) {
														
 
															-		int offset = i * rbox2_num + j;
														
 
															-		output_data_ptr[offset] = rbox_iou_single<T>(rbox1_data_ptr + i * 5, rbox2_data_ptr + j * 5);
														
 
															-        }
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-
														
 
															-#define CHECK_INPUT_CPU(x) PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
														
 
															-
														
 
															-std::vector<paddle::Tensor> RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
														
 
															-    CHECK_INPUT_CPU(rbox1);
														
 
															-    CHECK_INPUT_CPU(rbox2);
														
 
															-
														
 
															-    auto rbox1_num = rbox1.shape()[0];
														
 
															-    auto rbox2_num = rbox2.shape()[0];
														
 
															-
														
 
															-    auto output = paddle::Tensor(paddle::PlaceType::kCPU, {rbox1_num, rbox2_num});
														
 
															-
														
 
															-    PD_DISPATCH_FLOATING_TYPES(
														
 
															-        rbox1.type(),
														
 
															-        "rbox_iou_cpu_kernel",
														
 
															-        ([&] {
														
 
															-            rbox_iou_cpu_kernel<data_t>(
														
 
															-                rbox1_num,
														
 
															-                rbox2_num,
														
 
															-                rbox1.data<data_t>(),
														
 
															-                rbox2.data<data_t>(),
														
 
															-                output.mutable_data<data_t>());
														
 
															-        }));
														
 
															-    
														
 
															-    return {output};
														
 
															-}
														
 
															-
														
 
															-
														
 
															-#ifdef PADDLE_WITH_CUDA
														
 
															-std::vector<paddle::Tensor> RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2);
														
 
															-#endif
														
 
															-
														
 
															-
														
 
															-#define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
														
 
															-
														
 
															-std::vector<paddle::Tensor> RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
														
 
															-    CHECK_INPUT_SAME(rbox1, rbox2);
														
 
															-    if (rbox1.place() == paddle::PlaceType::kCPU) {
														
 
															-        return RboxIouCPUForward(rbox1, rbox2);
														
 
															-#ifdef PADDLE_WITH_CUDA
														
 
															-    } else if (rbox1.place() == paddle::PlaceType::kGPU) {
														
 
															-        return RboxIouCUDAForward(rbox1, rbox2);
														
 
															-#endif
														
 
															-    }
														
 
															-}
														
 
															-
														
 
															-std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> rbox1_shape, std::vector<int64_t> rbox2_shape) {
														
 
															-    return {{rbox1_shape[0], rbox2_shape[0]}};
														
 
															-}
														
 
															-
														
 
															-std::vector<paddle::DataType> InferDtype(paddle::DataType t1, paddle::DataType t2) {
														
 
															-    return {t1};
														
 
															-}
														
 
															-
														
 
															-PD_BUILD_OP(rbox_iou)
														
 
															-    .Inputs({"RBOX1", "RBOX2"})
														
 
															-    .Outputs({"Output"})
														
 
															-    .SetKernelFn(PD_KERNEL(RboxIouForward))
														
 
															-    .SetInferShapeFn(PD_INFER_SHAPE(InferShape))
														
 
															-    .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));
														
--- a/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_utils.h
+++ b/paddlers/models/ppdet/ext_op/csrc/rbox_iou/rbox_iou_utils.h
@@ -13,7 +13,7 @@
 
															 // limitations under the License.
														
 
															 //
														
 
															 // The code is based on
														
 
															-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/box_iou_rotated
														
 
															+// https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/box_iou_rotated/
														
 
															 #pragma once
														
@@ -336,13 +336,21 @@ HOST_DEVICE_INLINE T rbox_iou_single(T const *const box1_raw,
 
															   box2.h = box2_raw[3];
														
 
															   box2.a = box2_raw[4];
														
 
															-  const T area1 = box1.w * box1.h;
														
 
															-  const T area2 = box2.w * box2.h;
														
 
															-  if (area1 < 1e-14 || area2 < 1e-14) {
														
 
															+  if (box1.w < 1e-2 || box1.h < 1e-2 || box2.w < 1e-2 || box2.h < 1e-2) {
														
 
															     return 0.f;
														
 
															   }
														
 
															+  const T area1 = box1.w * box1.h;
														
 
															+  const T area2 = box2.w * box2.h;
														
 
															   const T intersection = rboxes_intersection<T>(box1, box2);
														
 
															   const T iou = intersection / (area1 + area2 - intersection);
														
 
															   return iou;
														
 
															 }
														
 
															+
														
 
															+/**
														
 
															+   Computes ceil(a / b)
														
 
															+*/
														
 
															+
														
 
															+HOST_DEVICE inline int CeilDiv(const int a, const int b) {
														
 
															+  return (a + b - 1) / b;
														
 
															+}
														
--- a/paddlers/models/ppdet/hash.txt
+++ b/paddlers/models/ppdet/hash.txt
@@ -1 +1 @@
 
															-e3f8dd16bffca04060ec1edc388c5a618e15bbf8
														
 
															+00fe2a1c35603b6fb37b73265aecf6282e5e2ad4
														
--- a/paddlers/models/ppdet/metrics/__init__.py
+++ b/paddlers/models/ppdet/metrics/__init__.py
@@ -17,6 +17,7 @@ from . import keypoint_metrics
 
															 from .metrics import *
														
 
															 from .keypoint_metrics import *
														
 
															+from .pose3d_metrics import *
														
 
															 __all__ = metrics.__all__ + keypoint_metrics.__all__
														
@@ -26,4 +27,4 @@ __all__ = metrics.__all__ + mot_metrics.__all__
 
															 from . import mcmot_metrics
														
 
															 from .mcmot_metrics import *
														
 
															-__all__ = metrics.__all__ + mcmot_metrics.__all__
														
 
															+__all__ = metrics.__all__ + mcmot_metrics.__all__ 
														
--- a/paddlers/models/ppdet/metrics/coco_utils.py
+++ b/paddlers/models/ppdet/metrics/coco_utils.py
@@ -21,7 +21,7 @@ import sys
 
															 import numpy as np
														
 
															 import itertools
														
 
															-from paddlers.models.ppdet.metrics.json_results import get_det_res, get_det_poly_res, get_seg_res, get_solov2_segm_res, get_keypoint_res
														
 
															+from paddlers.models.ppdet.metrics.json_results import get_det_res, get_det_poly_res, get_seg_res, get_solov2_segm_res, get_keypoint_res, get_pose3d_res
														
 
															 from paddlers.models.ppdet.metrics.map_utils import draw_pr_curve
														
 
															 from paddlers.models.ppdet.utils.logger import setup_logger
														
@@ -64,6 +64,10 @@ def get_infer_results(outs, catid, bias=0):
 
															         infer_res['keypoint'] = get_keypoint_res(outs, im_id)
														
 
															         outs['bbox_num'] = [len(infer_res['keypoint'])]
														
 
															+    if 'pose3d' in outs:
														
 
															+        infer_res['pose3d'] = get_pose3d_res(outs, im_id)
														
 
															+        outs['bbox_num'] = [len(infer_res['pose3d'])]
														
 
															+
														
 
															     return infer_res
														
@@ -150,7 +154,7 @@ def cocoapi_eval(jsonfile,
 
															         results_flatten = list(itertools.chain(*results_per_category))
														
 
															         headers = ['category', 'AP'] * (num_columns // 2)
														
 
															         results_2d = itertools.zip_longest(
														
 
															-            *[results_flatten[i::num_columns] for i in range(num_columns)])
														
 
															+            * [results_flatten[i::num_columns] for i in range(num_columns)])
														
 
															         table_data = [headers]
														
 
															         table_data += [result for result in results_2d]
														
 
															         table = AsciiTable(table_data)
														
--- a/paddlers/models/ppdet/metrics/json_results.py
+++ b/paddlers/models/ppdet/metrics/json_results.py
@@ -157,3 +157,19 @@ def get_keypoint_res(results, im_id):
 
															             ann['bbox'] = [x0, y0, x1 - x0, y1 - y0]
														
 
															             anns.append(ann)
														
 
															     return anns
														
 
															+
														
 
															+
														
 
															+def get_pose3d_res(results, im_id):
														
 
															+    anns = []
														
 
															+    preds = results['pose3d']
														
 
															+    for idx in range(im_id.shape[0]):
														
 
															+        image_id = im_id[idx].item()
														
 
															+        pose3d = preds[idx]
														
 
															+        ann = {
														
 
															+            'image_id': image_id,
														
 
															+            'category_id': 1,  # XXX hard code
														
 
															+            'pose3d': pose3d.tolist(),
														
 
															+            'score': float(1.)
														
 
															+        }
														
 
															+        anns.append(ann)
														
 
															+    return anns
														
--- a/paddlers/models/ppdet/metrics/metrics.py
+++ b/paddlers/models/ppdet/metrics/metrics.py
@@ -350,7 +350,7 @@ class WiderFaceMetric(Metric):
 
															 class RBoxMetric(Metric):
														
 
															     def __init__(self, anno_file, **kwargs):
														
 
															         self.anno_file = anno_file
														
 
															-        self.clsid2catid, self.catid2name = get_categories('COCO', anno_file)
														
 
															+        self.clsid2catid, self.catid2name = get_categories('RBOX', anno_file)
														
 
															         self.catid2clsid = {v: k for k, v in self.clsid2catid.items()}
														
 
															         self.classwise = kwargs.get('classwise', False)
														
 
															         self.output_eval = kwargs.get('output_eval', None)
														
--- a/paddlers/models/ppdet/metrics/pose3d_metrics.py
+++ b/paddlers/models/ppdet/metrics/pose3d_metrics.py
@@ -0,0 +1,200 @@
 
															+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
														
 
															+#   
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");   
														
 
															+# you may not use this file except in compliance with the License.  
														
 
															+# You may obtain a copy of the License at   
														
 
															+#   
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0    
														
 
															+#   
														
 
															+# Unless required by applicable law or agreed to in writing, software   
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS, 
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
														
 
															+# See the License for the specific language governing permissions and   
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+from paddle.distributed import ParallelEnv
														
 
															+import os
														
 
															+import json
														
 
															+from collections import defaultdict, OrderedDict
														
 
															+import numpy as np
														
 
															+from paddlers.models.ppdet.utils.logger import setup_logger
														
 
															+logger = setup_logger(__name__)
														
 
															+
														
 
															+__all__ = ['Pose3DEval']
														
 
															+
														
 
															+
														
 
															+class AverageMeter(object):
														
 
															+    def __init__(self):
														
 
															+        self.reset()
														
 
															+
														
 
															+    def reset(self):
														
 
															+        self.val = 0
														
 
															+        self.avg = 0
														
 
															+        self.sum = 0
														
 
															+        self.count = 0
														
 
															+
														
 
															+    def update(self, val, n=1):
														
 
															+        self.val = val
														
 
															+        self.sum += val * n
														
 
															+        self.count += n
														
 
															+        self.avg = self.sum / self.count
														
 
															+
														
 
															+
														
 
															+def mean_per_joint_position_error(pred, gt, has_3d_joints):
														
 
															+    """ 
														
 
															+    Compute mPJPE
														
 
															+    """
														
 
															+    gt = gt[has_3d_joints == 1]
														
 
															+    gt = gt[:, :, :3]
														
 
															+    pred = pred[has_3d_joints == 1]
														
 
															+
														
 
															+    with paddle.no_grad():
														
 
															+        gt_pelvis = (gt[:, 2, :] + gt[:, 3, :]) / 2
														
 
															+        gt = gt - gt_pelvis[:, None, :]
														
 
															+        pred_pelvis = (pred[:, 2, :] + pred[:, 3, :]) / 2
														
 
															+        pred = pred - pred_pelvis[:, None, :]
														
 
															+        error = paddle.sqrt(((pred - gt)**2).sum(axis=-1)).mean(axis=-1).numpy()
														
 
															+        return error
														
 
															+
														
 
															+
														
 
															+def compute_similarity_transform(S1, S2):
														
 
															+    """Computes a similarity transform (sR, t) that takes
														
 
															+    a set of 3D points S1 (3 x N) closest to a set of 3D points S2,
														
 
															+    where R is an 3x3 rotation matrix, t 3x1 translation, s scale.
														
 
															+    i.e. solves the orthogonal Procrutes problem.
														
 
															+    """
														
 
															+    transposed = False
														
 
															+    if S1.shape[0] != 3 and S1.shape[0] != 2:
														
 
															+        S1 = S1.T
														
 
															+        S2 = S2.T
														
 
															+        transposed = True
														
 
															+    assert (S2.shape[1] == S1.shape[1])
														
 
															+
														
 
															+    # 1. Remove mean.
														
 
															+    mu1 = S1.mean(axis=1, keepdims=True)
														
 
															+    mu2 = S2.mean(axis=1, keepdims=True)
														
 
															+    X1 = S1 - mu1
														
 
															+    X2 = S2 - mu2
														
 
															+
														
 
															+    # 2. Compute variance of X1 used for scale.
														
 
															+    var1 = np.sum(X1**2)
														
 
															+
														
 
															+    # 3. The outer product of X1 and X2.
														
 
															+    K = X1.dot(X2.T)
														
 
															+
														
 
															+    # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are
														
 
															+    # singular vectors of K.
														
 
															+    U, s, Vh = np.linalg.svd(K)
														
 
															+    V = Vh.T
														
 
															+    # Construct Z that fixes the orientation of R to get det(R)=1.
														
 
															+    Z = np.eye(U.shape[0])
														
 
															+    Z[-1, -1] *= np.sign(np.linalg.det(U.dot(V.T)))
														
 
															+    # Construct R.
														
 
															+    R = V.dot(Z.dot(U.T))
														
 
															+
														
 
															+    # 5. Recover scale.
														
 
															+    scale = np.trace(R.dot(K)) / var1
														
 
															+
														
 
															+    # 6. Recover translation.
														
 
															+    t = mu2 - scale * (R.dot(mu1))
														
 
															+
														
 
															+    # 7. Error:
														
 
															+    S1_hat = scale * R.dot(S1) + t
														
 
															+
														
 
															+    if transposed:
														
 
															+        S1_hat = S1_hat.T
														
 
															+
														
 
															+    return S1_hat
														
 
															+
														
 
															+
														
 
															+def compute_similarity_transform_batch(S1, S2):
														
 
															+    """Batched version of compute_similarity_transform."""
														
 
															+    S1_hat = np.zeros_like(S1)
														
 
															+    for i in range(S1.shape[0]):
														
 
															+        S1_hat[i] = compute_similarity_transform(S1[i], S2[i])
														
 
															+    return S1_hat
														
 
															+
														
 
															+
														
 
															+def reconstruction_error(S1, S2, reduction='mean'):
														
 
															+    """Do Procrustes alignment and compute reconstruction error."""
														
 
															+    S1_hat = compute_similarity_transform_batch(S1, S2)
														
 
															+    re = np.sqrt(((S1_hat - S2)**2).sum(axis=-1)).mean(axis=-1)
														
 
															+    if reduction == 'mean':
														
 
															+        re = re.mean()
														
 
															+    elif reduction == 'sum':
														
 
															+        re = re.sum()
														
 
															+    return re
														
 
															+
														
 
															+
														
 
															+def all_gather(data):
														
 
															+    if paddle.distributed.get_world_size() == 1:
														
 
															+        return data
														
 
															+    vlist = []
														
 
															+    paddle.distributed.all_gather(vlist, data)
														
 
															+    data = paddle.concat(vlist, 0)
														
 
															+    return data
														
 
															+
														
 
															+
														
 
															+class Pose3DEval(object):
														
 
															+    def __init__(self, output_eval, save_prediction_only=False):
														
 
															+        super(Pose3DEval, self).__init__()
														
 
															+        self.output_eval = output_eval
														
 
															+        self.res_file = os.path.join(output_eval, "pose3d_results.json")
														
 
															+        self.save_prediction_only = save_prediction_only
														
 
															+        self.reset()
														
 
															+
														
 
															+    def reset(self):
														
 
															+        self.PAmPJPE = AverageMeter()
														
 
															+        self.mPJPE = AverageMeter()
														
 
															+        self.eval_results = {}
														
 
															+
														
 
															+    def get_human36m_joints(self, input):
														
 
															+        J24_TO_J14 = paddle.to_tensor(
														
 
															+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18])
														
 
															+        J24_TO_J17 = paddle.to_tensor(
														
 
															+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 19])
														
 
															+        return paddle.index_select(input, J24_TO_J14, axis=1)
														
 
															+
														
 
															+    def update(self, inputs, outputs):
														
 
															+        gt_3d_joints = all_gather(inputs['joints_3d'].cuda(ParallelEnv()
														
 
															+                                                           .local_rank))
														
 
															+        has_3d_joints = all_gather(inputs['has_3d_joints'].cuda(ParallelEnv()
														
 
															+                                                                .local_rank))
														
 
															+        pred_3d_joints = all_gather(outputs['pose3d'])
														
 
															+        if gt_3d_joints.shape[1] == 24:
														
 
															+            gt_3d_joints = self.get_human36m_joints(gt_3d_joints)
														
 
															+        if pred_3d_joints.shape[1] == 24:
														
 
															+            pred_3d_joints = self.get_human36m_joints(pred_3d_joints)
														
 
															+        mPJPE_val = mean_per_joint_position_error(pred_3d_joints, gt_3d_joints,
														
 
															+                                                  has_3d_joints).mean()
														
 
															+        PAmPJPE_val = reconstruction_error(
														
 
															+            pred_3d_joints.numpy(),
														
 
															+            gt_3d_joints[:, :, :3].numpy(),
														
 
															+            reduction=None).mean()
														
 
															+        count = int(np.sum(has_3d_joints.numpy()))
														
 
															+        self.PAmPJPE.update(PAmPJPE_val * 1000., count)
														
 
															+        self.mPJPE.update(mPJPE_val * 1000., count)
														
 
															+
														
 
															+    def accumulate(self):
														
 
															+        if self.save_prediction_only:
														
 
															+            logger.info(f'The pose3d result is saved to {self.res_file} '
														
 
															+                        'and do not evaluate the model.')
														
 
															+            return
														
 
															+        self.eval_results['pose3d'] = [-self.mPJPE.avg, -self.PAmPJPE.avg]
														
 
															+
														
 
															+    def log(self):
														
 
															+        if self.save_prediction_only:
														
 
															+            return
														
 
															+        stats_names = ['mPJPE', 'PAmPJPE']
														
 
															+        num_values = len(stats_names)
														
 
															+        print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |')
														
 
															+        print('|---' * (num_values + 1) + '|')
														
 
															+
														
 
															+        print(' '.join([
														
 
															+            '| {:.3f}'.format(abs(value))
														
 
															+            for value in self.eval_results['pose3d']
														
 
															+        ]) + ' |')
														
 
															+
														
 
															+    def get_results(self):
														
 
															+        return self.eval_results
														
--- a/paddlers/models/ppdet/modeling/__init__.py
+++ b/paddlers/models/ppdet/modeling/__init__.py
@@ -30,6 +30,7 @@ from . import mot
 
															 from . import transformers
														
 
															 from . import assigners
														
 
															 from . import rbox_utils
														
 
															+from . import ssod
														
 
															 from .ops import *
														
 
															 from .backbones import *
														
@@ -45,3 +46,4 @@ from .mot import *
 
															 from .transformers import *
														
 
															 from .assigners import *
														
 
															 from .rbox_utils import *
														
 
															+from .ssod import *
														
--- a/paddlers/models/ppdet/modeling/architectures/__init__.py
+++ b/paddlers/models/ppdet/modeling/architectures/__init__.py
@@ -16,6 +16,7 @@ from . import meta_arch
 
															 from . import faster_rcnn
														
 
															 from . import mask_rcnn
														
 
															 from . import yolo
														
 
															+from . import ppyoloe
														
 
															 from . import cascade_rcnn
														
 
															 from . import ssd
														
 
															 from . import fcos
														
@@ -36,11 +37,16 @@ from . import tood
 
															 from . import retinanet
														
 
															 from . import bytetrack
														
 
															 from . import yolox
														
 
															+from . import yolof
														
 
															+from . import pose3d_metro
														
 
															+from . import centertrack
														
 
															+from . import queryinst
														
 
															 from .meta_arch import *
														
 
															 from .faster_rcnn import *
														
 
															 from .mask_rcnn import *
														
 
															 from .yolo import *
														
 
															+from .ppyoloe import *
														
 
															 from .cascade_rcnn import *
														
 
															 from .ssd import *
														
 
															 from .fcos import *
														
@@ -62,3 +68,8 @@ from .tood import *
 
															 from .retinanet import *
														
 
															 from .bytetrack import *
														
 
															 from .yolox import *
														
 
															+from .yolof import *
														
 
															+from .pose3d_metro import *
														
 
															+from .centertrack import *
														
 
															+from .queryinst import *
														
 
															+from .keypoint_petr import *
														
--- a/paddlers/models/ppdet/modeling/architectures/blazeface.py
+++ b/paddlers/models/ppdet/modeling/architectures/blazeface.py
@@ -18,6 +18,8 @@ from __future__ import print_function
 
															 from paddlers.models.ppdet.core.workspace import register, create
														
 
															 from .meta_arch import BaseArch
														
 
															+import paddle
														
 
															+import paddle.nn.functional as F
														
 
															 __all__ = ['BlazeFace']
														
@@ -74,18 +76,42 @@ class BlazeFace(BaseArch):
 
															                                    self.inputs['gt_class'])
														
 
															         else:
														
 
															             preds, anchors = self.blaze_head(neck_feats, self.inputs['image'])
														
 
															-            bbox, bbox_num = self.post_process(preds, anchors,
														
 
															-                                               self.inputs['im_shape'],
														
 
															-                                               self.inputs['scale_factor'])
														
 
															-            return bbox, bbox_num
														
 
															+            bbox, bbox_num, nms_keep_idx = self.post_process(
														
 
															+                preds, anchors, self.inputs['im_shape'],
														
 
															+                self.inputs['scale_factor'])
														
 
															+            if self.use_extra_data:
														
 
															+                extra_data = {}  # record the bbox output before nms, such like scores and nms_keep_idx
														
 
															+                """extra_data:{
														
 
															+                            'scores': predict scores,
														
 
															+                            'nms_keep_idx': bbox index before nms,
														
 
															+                           }
														
 
															+                           """
														
 
															+                preds_logits = preds[1]  # [[1xNumBBoxNumClass]]
														
 
															+                extra_data['scores'] = F.softmax(paddle.concat(
														
 
															+                    preds_logits, axis=1)).transpose([0, 2, 1])
														
 
															+                extra_data['logits'] = paddle.concat(
														
 
															+                    preds_logits, axis=1).transpose([0, 2, 1])
														
 
															+                extra_data['nms_keep_idx'] = nms_keep_idx  # bbox index before nms
														
 
															+                return bbox, bbox_num, extra_data
														
 
															+            else:
														
 
															+                return bbox, bbox_num
														
 
															     def get_loss(self, ):
														
 
															         return {"loss": self._forward()}
														
 
															     def get_pred(self):
														
 
															-        bbox_pred, bbox_num = self._forward()
														
 
															-        output = {
														
 
															-            "bbox": bbox_pred,
														
 
															-            "bbox_num": bbox_num,
														
 
															-        }
														
 
															+        if self.use_extra_data:
														
 
															+            bbox_pred, bbox_num, extra_data = self._forward()
														
 
															+            output = {
														
 
															+                "bbox": bbox_pred,
														
 
															+                "bbox_num": bbox_num,
														
 
															+                "extra_data": extra_data
														
 
															+            }
														
 
															+        else:
														
 
															+            bbox_pred, bbox_num = self._forward()
														
 
															+            output = {
														
 
															+                "bbox": bbox_pred,
														
 
															+                "bbox_num": bbox_num,
														
 
															+            }
														
 
															+
														
 
															         return output
														
--- a/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
@@ -108,7 +108,7 @@ class CascadeRCNN(BaseArch):
 
															             im_shape = self.inputs['im_shape']
														
 
															             scale_factor = self.inputs['scale_factor']
														
 
															-            bbox, bbox_num = self.bbox_post_process(
														
 
															+            bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
														
 
															                 preds, (refined_rois, rois_num), im_shape, scale_factor)
														
 
															             # rescale the prediction back to origin image
														
 
															             bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
														
--- a/paddlers/models/ppdet/modeling/architectures/centernet.py
+++ b/paddlers/models/ppdet/modeling/architectures/centernet.py
@@ -78,30 +78,25 @@ class CenterNet(BaseArch):
 
															     def get_pred(self):
														
 
															         head_out = self._forward()
														
 
															+        bbox, bbox_num, bbox_inds, topk_clses, topk_ys, topk_xs = self.post_process(
														
 
															+            head_out['heatmap'],
														
 
															+            head_out['size'],
														
 
															+            head_out['offset'],
														
 
															+            im_shape=self.inputs['im_shape'],
														
 
															+            scale_factor=self.inputs['scale_factor'])
														
 
															+
														
 
															         if self.for_mot:
														
 
															-            bbox, bbox_inds, topk_clses = self.post_process(
														
 
															-                head_out['heatmap'],
														
 
															-                head_out['size'],
														
 
															-                head_out['offset'],
														
 
															-                im_shape=self.inputs['im_shape'],
														
 
															-                scale_factor=self.inputs['scale_factor'])
														
 
															             output = {
														
 
															                 "bbox": bbox,
														
 
															+                "bbox_num": bbox_num,
														
 
															                 "bbox_inds": bbox_inds,
														
 
															                 "topk_clses": topk_clses,
														
 
															+                "topk_ys": topk_ys,
														
 
															+                "topk_xs": topk_xs,
														
 
															                 "neck_feat": head_out['neck_feat']
														
 
															             }
														
 
															         else:
														
 
															-            bbox, bbox_num, _ = self.post_process(
														
 
															-                head_out['heatmap'],
														
 
															-                head_out['size'],
														
 
															-                head_out['offset'],
														
 
															-                im_shape=self.inputs['im_shape'],
														
 
															-                scale_factor=self.inputs['scale_factor'])
														
 
															-            output = {
														
 
															-                "bbox": bbox,
														
 
															-                "bbox_num": bbox_num,
														
 
															-            }
														
 
															+            output = {"bbox": bbox, "bbox_num": bbox_num}
														
 
															         return output
														
 
															     def get_loss(self):
														
--- a/paddlers/models/ppdet/modeling/architectures/centertrack.py
+++ b/paddlers/models/ppdet/modeling/architectures/centertrack.py
@@ -0,0 +1,176 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
														
 
															+#   
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");   
														
 
															+# you may not use this file except in compliance with the License.  
														
 
															+# You may obtain a copy of the License at   
														
 
															+#   
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0    
														
 
															+#   
														
 
															+# Unless required by applicable law or agreed to in writing, software   
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS, 
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
														
 
															+# See the License for the specific language governing permissions and   
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import copy
														
 
															+import math
														
 
															+import numpy as np
														
 
															+import paddle
														
 
															+from paddlers.models.ppdet.core.workspace import register, create
														
 
															+from .meta_arch import BaseArch
														
 
															+
														
 
															+from ..keypoint_utils import affine_transform
														
 
															+from paddlers.models.ppdet.data.transform.op_helper import gaussian_radius, gaussian2D, draw_umich_gaussian
														
 
															+
														
 
															+__all__ = ['CenterTrack']
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class CenterTrack(BaseArch):
														
 
															+    """
														
 
															+    CenterTrack network, see http://arxiv.org/abs/2004.01177
														
 
															+
														
 
															+    Args:
														
 
															+        detector (object): 'CenterNet' instance
														
 
															+        plugin_head (object): 'CenterTrackHead' instance
														
 
															+        tracker (object): 'CenterTracker' instance
														
 
															+    """
														
 
															+    __category__ = 'architecture'
														
 
															+    __shared__ = ['mot_metric']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 detector='CenterNet',
														
 
															+                 plugin_head='CenterTrackHead',
														
 
															+                 tracker='CenterTracker',
														
 
															+                 mot_metric=False):
														
 
															+        super(CenterTrack, self).__init__()
														
 
															+        self.detector = detector
														
 
															+        self.plugin_head = plugin_head
														
 
															+        self.tracker = tracker
														
 
															+        self.mot_metric = mot_metric
														
 
															+        self.pre_image = None
														
 
															+        self.deploy = False
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_config(cls, cfg, *args, **kwargs):
														
 
															+        detector = create(cfg['detector'])
														
 
															+        detector_out_shape = detector.neck and detector.neck.out_shape or detector.backbone.out_shape
														
 
															+
														
 
															+        kwargs = {'input_shape': detector_out_shape}
														
 
															+        plugin_head = create(cfg['plugin_head'], **kwargs)
														
 
															+        tracker = create(cfg['tracker'])
														
 
															+
														
 
															+        return {
														
 
															+            'detector': detector,
														
 
															+            'plugin_head': plugin_head,
														
 
															+            'tracker': tracker,
														
 
															+        }
														
 
															+
														
 
															+    def _forward(self):
														
 
															+        if self.training:
														
 
															+            det_outs = self.detector(self.inputs)
														
 
															+            neck_feat = det_outs['neck_feat']
														
 
															+
														
 
															+            losses = {}
														
 
															+            for k, v in det_outs.items():
														
 
															+                if 'loss' not in k: continue
														
 
															+                losses.update({k: v})
														
 
															+
														
 
															+            plugin_outs = self.plugin_head(neck_feat, self.inputs)
														
 
															+            for k, v in plugin_outs.items():
														
 
															+                if 'loss' not in k: continue
														
 
															+                losses.update({k: v})
														
 
															+
														
 
															+            losses['loss'] = det_outs['det_loss'] + plugin_outs['plugin_loss']
														
 
															+            return losses
														
 
															+
														
 
															+        else:
														
 
															+            if not self.mot_metric:
														
 
															+                # detection, support bs>=1
														
 
															+                det_outs = self.detector(self.inputs)
														
 
															+                return {
														
 
															+                    'bbox': det_outs['bbox'],
														
 
															+                    'bbox_num': det_outs['bbox_num']
														
 
															+                }
														
 
															+
														
 
															+            else:
														
 
															+                # MOT, only support bs=1
														
 
															+                if not self.deploy:
														
 
															+                    if self.pre_image is None:
														
 
															+                        self.pre_image = self.inputs['image']
														
 
															+                        # initializing tracker for the first frame
														
 
															+                        self.tracker.init_track([])
														
 
															+                    self.inputs['pre_image'] = self.pre_image
														
 
															+                    self.pre_image = self.inputs[
														
 
															+                        'image']  # Note: update for next image
														
 
															+
														
 
															+                    # render input heatmap from tracker status
														
 
															+                    pre_hm = self.get_additional_inputs(
														
 
															+                        self.tracker.tracks, self.inputs, with_hm=True)
														
 
															+                    self.inputs['pre_hm'] = paddle.to_tensor(pre_hm)
														
 
															+
														
 
															+                # model inference
														
 
															+                det_outs = self.detector(self.inputs)
														
 
															+                neck_feat = det_outs['neck_feat']
														
 
															+                result = self.plugin_head(
														
 
															+                    neck_feat, self.inputs, det_outs['bbox'],
														
 
															+                    det_outs['bbox_inds'], det_outs['topk_clses'],
														
 
															+                    det_outs['topk_ys'], det_outs['topk_xs'])
														
 
															+
														
 
															+                if not self.deploy:
														
 
															+                    # convert the cropped and 4x downsampled output coordinate system
														
 
															+                    # back to the input image coordinate system
														
 
															+                    result = self.plugin_head.centertrack_post_process(
														
 
															+                        result, self.inputs, self.tracker.out_thresh)
														
 
															+                return result
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        return self._forward()
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        return self._forward()
														
 
															+
														
 
															+    def reset_tracking(self):
														
 
															+        self.tracker.reset()
														
 
															+        self.pre_image = None
														
 
															+
														
 
															+    def get_additional_inputs(self, dets, meta, with_hm=True):
														
 
															+        # Render input heatmap from previous trackings.
														
 
															+        trans_input = meta['trans_input'][0].numpy()
														
 
															+        inp_width, inp_height = int(meta['inp_width'][0]), int(meta[
														
 
															+            'inp_height'][0])
														
 
															+        input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)
														
 
															+
														
 
															+        for det in dets:
														
 
															+            if det['score'] < self.tracker.pre_thresh:
														
 
															+                continue
														
 
															+            bbox = affine_transform_bbox(det['bbox'], trans_input, inp_width,
														
 
															+                                         inp_height)
														
 
															+            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
														
 
															+            if (h > 0 and w > 0):
														
 
															+                radius = gaussian_radius(
														
 
															+                    (math.ceil(h), math.ceil(w)), min_overlap=0.7)
														
 
															+                radius = max(0, int(radius))
														
 
															+                ct = np.array(
														
 
															+                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
														
 
															+                    dtype=np.float32)
														
 
															+                ct_int = ct.astype(np.int32)
														
 
															+                if with_hm:
														
 
															+                    input_hm[0] = draw_umich_gaussian(input_hm[0], ct_int,
														
 
															+                                                      radius)
														
 
															+        if with_hm:
														
 
															+            input_hm = input_hm[np.newaxis]
														
 
															+        return input_hm
														
 
															+
														
 
															+
														
 
															+def affine_transform_bbox(bbox, trans, width, height):
														
 
															+    bbox = np.array(copy.deepcopy(bbox), dtype=np.float32)
														
 
															+    bbox[:2] = affine_transform(bbox[:2], trans)
														
 
															+    bbox[2:] = affine_transform(bbox[2:], trans)
														
 
															+    bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1)
														
 
															+    bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1)
														
 
															+    return bbox
														
--- a/paddlers/models/ppdet/modeling/architectures/detr.py
+++ b/paddlers/models/ppdet/modeling/architectures/detr.py
@@ -27,17 +27,20 @@ __all__ = ['DETR']
 
															 class DETR(BaseArch):
														
 
															     __category__ = 'architecture'
														
 
															     __inject__ = ['post_process']
														
 
															+    __shared__ = ['exclude_post_process']
														
 
															     def __init__(self,
														
 
															                  backbone,
														
 
															                  transformer,
														
 
															                  detr_head,
														
 
															-                 post_process='DETRBBoxPostProcess'):
														
 
															+                 post_process='DETRBBoxPostProcess',
														
 
															+                 exclude_post_process=False):
														
 
															         super(DETR, self).__init__()
														
 
															         self.backbone = backbone
														
 
															         self.transformer = transformer
														
 
															         self.detr_head = detr_head
														
 
															         self.post_process = post_process
														
 
															+        self.exclude_post_process = exclude_post_process
														
 
															     @classmethod
														
 
															     def from_config(cls, cfg, *args, **kwargs):
														
@@ -65,18 +68,23 @@ class DETR(BaseArch):
 
															         body_feats = self.backbone(self.inputs)
														
 
															         # Transformer
														
 
															-        out_transformer = self.transformer(body_feats, self.inputs['pad_mask'])
														
 
															+        pad_mask = self.inputs['pad_mask'] if self.training else None
														
 
															+        out_transformer = self.transformer(body_feats, pad_mask, self.inputs)
														
 
															         # DETR Head
														
 
															         if self.training:
														
 
															             return self.detr_head(out_transformer, body_feats, self.inputs)
														
 
															         else:
														
 
															             preds = self.detr_head(out_transformer, body_feats)
														
 
															-            bbox, bbox_num = self.post_process(preds, self.inputs['im_shape'],
														
 
															-                                               self.inputs['scale_factor'])
														
 
															+            if self.exclude_post_process:
														
 
															+                bboxes, logits, masks = preds
														
 
															+                return bboxes, logits
														
 
															+            else:
														
 
															+                bbox, bbox_num = self.post_process(
														
 
															+                    preds, self.inputs['im_shape'], self.inputs['scale_factor'])
														
 
															             return bbox, bbox_num
														
 
															-    def get_loss(self, ):
														
 
															+    def get_loss(self):
														
 
															         losses = self._forward()
														
 
															         losses.update({
														
 
															             'loss':
														
--- a/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
															 import paddle
														
 
															 from paddlers.models.ppdet.core.workspace import register, create
														
 
															 from .meta_arch import BaseArch
														
 
															+import numpy as np
														
 
															 __all__ = ['FasterRCNN']
														
@@ -51,6 +52,9 @@ class FasterRCNN(BaseArch):
 
															         self.bbox_head = bbox_head
														
 
															         self.bbox_post_process = bbox_post_process
														
 
															+    def init_cot_head(self, relationship):
														
 
															+        self.bbox_head.init_cot_head(relationship)
														
 
															+
														
 
															     @classmethod
														
 
															     def from_config(cls, cfg, *args, **kwargs):
														
 
															         backbone = create(cfg['backbone'])
														
@@ -80,16 +84,29 @@ class FasterRCNN(BaseArch):
 
															         else:
														
 
															             rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
														
 
															             preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
														
 
															-
														
 
															             im_shape = self.inputs['im_shape']
														
 
															             scale_factor = self.inputs['scale_factor']
														
 
															-            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
														
 
															+            bbox, bbox_num, nms_keep_idx = self.bbox_post_process(preds, (rois, rois_num),
														
 
															                                                     im_shape, scale_factor)
														
 
															             # rescale the prediction back to origin image
														
 
															             bboxes, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
														
 
															                 bbox, bbox_num, im_shape, scale_factor)
														
 
															-            return bbox_pred, bbox_num
														
 
															+
														
 
															+            if self.use_extra_data:
														
 
															+                extra_data = {}  # record the bbox output before nms, such like scores and nms_keep_idx
														
 
															+                """extra_data:{
														
 
															+                            'scores': predict scores,
														
 
															+                            'nms_keep_idx': bbox index before nms,
														
 
															+                           }
														
 
															+                """
														
 
															+                extra_data['scores'] = preds[1]  # predict scores (probability)
														
 
															+                # Todo: get logits output
														
 
															+                extra_data['nms_keep_idx'] = nms_keep_idx  # bbox index before nms
														
 
															+                return bbox_pred, bbox_num, extra_data
														
 
															+            else:
														
 
															+                return bbox_pred, bbox_num
														
 
															+
														
 
															     def get_loss(self, ):
														
 
															         rpn_loss, bbox_loss = self._forward()
														
@@ -101,6 +118,45 @@ class FasterRCNN(BaseArch):
 
															         return loss
														
 
															     def get_pred(self):
														
 
															-        bbox_pred, bbox_num = self._forward()
														
 
															-        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
														
 
															+        if self.use_extra_data:
														
 
															+            bbox_pred, bbox_num, extra_data = self._forward()
														
 
															+            output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'extra_data': extra_data}
														
 
															+        else:
														
 
															+            bbox_pred, bbox_num = self._forward()
														
 
															+            output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
														
 
															         return output
														
 
															+
														
 
															+    def target_bbox_forward(self, data):
														
 
															+        body_feats = self.backbone(data)
														
 
															+        if self.neck is not None:
														
 
															+            body_feats = self.neck(body_feats)
														
 
															+        rois = [roi for roi in data['gt_bbox']]
														
 
															+        rois_num = paddle.concat([paddle.shape(roi)[0] for roi in rois])
														
 
															+
														
 
															+        preds, _ = self.bbox_head(body_feats, rois, rois_num, None, cot=True)
														
 
															+        return preds
														
 
															+
														
 
															+    def relationship_learning(self, loader, num_classes_novel):
														
 
															+        print('computing relationship')
														
 
															+        train_labels_list = []
														
 
															+        label_list = []
														
 
															+
														
 
															+        for step_id, data in enumerate(loader):
														
 
															+            _, bbox_prob = self.target_bbox_forward(data)      
														
 
															+            batch_size = data['im_id'].shape[0]
														
 
															+            for i in range(batch_size):
														
 
															+                num_bbox = data['gt_class'][i].shape[0]           
														
 
															+                train_labels = data['gt_class'][i]
														
 
															+                train_labels_list.append(train_labels.numpy().squeeze(1))
														
 
															+            base_labels = bbox_prob.detach().numpy()[:,:-1]
														
 
															+            label_list.append(base_labels)
														
 
															+
														
 
															+        labels = np.concatenate(train_labels_list, 0)
														
 
															+        probabilities = np.concatenate(label_list, 0)
														
 
															+        N_t = np.max(labels) + 1
														
 
															+        conditional = []
														
 
															+        for i in range(N_t):
														
 
															+            this_class = probabilities[labels == i]
														
 
															+            average = np.mean(this_class, axis=0, keepdims=True)
														
 
															+            conditional.append(average)
														
 
															+        return np.concatenate(conditional) 
														
--- a/paddlers/models/ppdet/modeling/architectures/fcos.py
+++ b/paddlers/models/ppdet/modeling/architectures/fcos.py
@@ -16,7 +16,6 @@ from __future__ import absolute_import
 
															 from __future__ import division
														
 
															 from __future__ import print_function
														
 
															-import paddle
														
 
															 from paddlers.models.ppdet.core.workspace import register, create
														
 
															 from .meta_arch import BaseArch
														
@@ -32,22 +31,25 @@ class FCOS(BaseArch):
 
															         backbone (object): backbone instance
														
 
															         neck (object): 'FPN' instance
														
 
															         fcos_head (object): 'FCOSHead' instance
														
 
															-        post_process (object): 'FCOSPostProcess' instance
														
 
															+        ssod_loss (object): 'SSODFCOSLoss' instance, only used for semi-det(ssod)
														
 
															     """
														
 
															     __category__ = 'architecture'
														
 
															-    __inject__ = ['fcos_post_process']
														
 
															+    __inject__ = ['ssod_loss']
														
 
															     def __init__(self,
														
 
															-                 backbone,
														
 
															-                 neck,
														
 
															+                 backbone='ResNet',
														
 
															+                 neck='FPN',
														
 
															                  fcos_head='FCOSHead',
														
 
															-                 fcos_post_process='FCOSPostProcess'):
														
 
															+                 ssod_loss='SSODFCOSLoss'):
														
 
															         super(FCOS, self).__init__()
														
 
															         self.backbone = backbone
														
 
															         self.neck = neck
														
 
															         self.fcos_head = fcos_head
														
 
															-        self.fcos_post_process = fcos_post_process
														
 
															+
														
 
															+        # for ssod, semi-det
														
 
															+        self.is_teacher = False
														
 
															+        self.ssod_loss = ssod_loss
														
 
															     @classmethod
														
 
															     def from_config(cls, cfg, *args, **kwargs):
														
@@ -68,38 +70,27 @@ class FCOS(BaseArch):
 
															     def _forward(self):
														
 
															         body_feats = self.backbone(self.inputs)
														
 
															         fpn_feats = self.neck(body_feats)
														
 
															-        fcos_head_outs = self.fcos_head(fpn_feats, self.training)
														
 
															-        if not self.training:
														
 
															-            scale_factor = self.inputs['scale_factor']
														
 
															-            bboxes = self.fcos_post_process(fcos_head_outs, scale_factor)
														
 
															-            return bboxes
														
 
															+
														
 
															+        self.is_teacher = self.inputs.get('is_teacher', False)
														
 
															+        if self.training or self.is_teacher:
														
 
															+            losses = self.fcos_head(fpn_feats, self.inputs)
														
 
															+            return losses
														
 
															         else:
														
 
															-            return fcos_head_outs
														
 
															-
														
 
															-    def get_loss(self, ):
														
 
															-        loss = {}
														
 
															-        tag_labels, tag_bboxes, tag_centerness = [], [], []
														
 
															-        for i in range(len(self.fcos_head.fpn_stride)):
														
 
															-            # labels, reg_target, centerness
														
 
															-            k_lbl = 'labels{}'.format(i)
														
 
															-            if k_lbl in self.inputs:
														
 
															-                tag_labels.append(self.inputs[k_lbl])
														
 
															-            k_box = 'reg_target{}'.format(i)
														
 
															-            if k_box in self.inputs:
														
 
															-                tag_bboxes.append(self.inputs[k_box])
														
 
															-            k_ctn = 'centerness{}'.format(i)
														
 
															-            if k_ctn in self.inputs:
														
 
															-                tag_centerness.append(self.inputs[k_ctn])
														
 
															-
														
 
															-        fcos_head_outs = self._forward()
														
 
															-        loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels,
														
 
															-                                            tag_bboxes, tag_centerness)
														
 
															-        loss.update(loss_fcos)
														
 
															-        total_loss = paddle.add_n(list(loss.values()))
														
 
															-        loss.update({'loss': total_loss})
														
 
															-        return loss
														
 
															+            fcos_head_outs = self.fcos_head(fpn_feats)
														
 
															+            bbox_pred, bbox_num = self.fcos_head.post_process(
														
 
															+                fcos_head_outs, self.inputs['scale_factor'])
														
 
															+            return {'bbox': bbox_pred, 'bbox_num': bbox_num}
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        return self._forward()
														
 
															     def get_pred(self):
														
 
															-        bbox_pred, bbox_num = self._forward()
														
 
															-        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
														
 
															-        return output
														
 
															+        return self._forward()
														
 
															+
														
 
															+    def get_loss_keys(self):
														
 
															+        return ['loss_cls', 'loss_box', 'loss_quality']
														
 
															+
														
 
															+    def get_ssod_loss(self, student_head_outs, teacher_head_outs, train_cfg):
														
 
															+        ssod_losses = self.ssod_loss(student_head_outs, teacher_head_outs,
														
 
															+                                     train_cfg)
														
 
															+        return ssod_losses
														
--- a/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
@@ -24,8 +24,9 @@ from paddlers.models.ppdet.core.workspace import register, create
 
															 from .meta_arch import BaseArch
														
 
															 from ..keypoint_utils import transform_preds
														
 
															 from .. import layers as L
														
 
															+from paddle.nn import functional as F
														
 
															-__all__ = ['TopDownHRNet']
														
 
															+__all__ = ['TopDownHRNet', 'TinyPose3DHRNet', 'TinyPose3DHRHeatmapNet']
														
 
															 @register
														
@@ -45,7 +46,7 @@ class TopDownHRNet(BaseArch):
 
															                  use_dark=True):
														
 
															         """
														
 
															         HRNet network, see https://arxiv.org/abs/1902.09212
														
 
															-
														
 
															+ 
														
 
															         Args:
														
 
															             backbone (nn.Layer): backbone instance
														
 
															             post_process (object): `HRNetPostProcess` instance
														
@@ -131,10 +132,10 @@ class HRNetPostProcess(object):
 
															     def get_max_preds(self, heatmaps):
														
 
															         '''get predictions from score maps
														
 
															-
														
 
															+ 
														
 
															         Args:
														
 
															             heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
														
 
															-
														
 
															+ 
														
 
															         Returns:
														
 
															             preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
														
 
															             maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
														
@@ -219,12 +220,12 @@ class HRNetPostProcess(object):
 
															     def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
														
 
															         """the highest heatvalue location with a quarter offset in the
														
 
															         direction from the highest response to the second highest response.
														
 
															-
														
 
															+ 
														
 
															         Args:
														
 
															             heatmaps (numpy.ndarray): The predicted heatmaps
														
 
															             center (numpy.ndarray): The boxes center
														
 
															             scale (numpy.ndarray): The scale factor
														
 
															-
														
 
															+ 
														
 
															         Returns:
														
 
															             preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
														
 
															             maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
														
@@ -265,3 +266,203 @@ class HRNetPostProcess(object):
 
															                     maxvals, axis=1)
														
 
															         ]]
														
 
															         return outputs
														
 
															+
														
 
															+
														
 
															+class TinyPose3DPostProcess(object):
														
 
															+    def __init__(self):
														
 
															+        pass
														
 
															+
														
 
															+    def __call__(self, output, center, scale):
														
 
															+        """
														
 
															+        Args:
														
 
															+            output (numpy.ndarray): numpy.ndarray([batch_size, num_joints, 3]), keypoints coords
														
 
															+            scale (numpy.ndarray): The scale factor
														
 
															+        Returns:
														
 
															+            preds: numpy.ndarray([batch_size, num_joints, 3]), keypoints coords
														
 
															+        """
														
 
															+
														
 
															+        preds = output.numpy().copy()
														
 
															+
														
 
															+        # Transform back
														
 
															+        for i in range(output.shape[0]):  # batch_size
														
 
															+            preds[i][:, 0] = preds[i][:, 0] * scale[i][0]
														
 
															+            preds[i][:, 1] = preds[i][:, 1] * scale[i][1]
														
 
															+
														
 
															+        return preds
														
 
															+
														
 
															+
														
 
															+def soft_argmax(heatmaps, joint_num):
														
 
															+    dims = heatmaps.shape
														
 
															+    depth_dim = (int)(dims[1] / joint_num)
														
 
															+    heatmaps = heatmaps.reshape((-1, joint_num, depth_dim * dims[2] * dims[3]))
														
 
															+    heatmaps = F.softmax(heatmaps, 2)
														
 
															+    heatmaps = heatmaps.reshape((-1, joint_num, depth_dim, dims[2], dims[3]))
														
 
															+
														
 
															+    accu_x = heatmaps.sum(axis=(2, 3))
														
 
															+    accu_y = heatmaps.sum(axis=(2, 4))
														
 
															+    accu_z = heatmaps.sum(axis=(3, 4))
														
 
															+
														
 
															+    accu_x = accu_x * paddle.arange(1, 33)
														
 
															+    accu_y = accu_y * paddle.arange(1, 33)
														
 
															+    accu_z = accu_z * paddle.arange(1, 33)
														
 
															+
														
 
															+    accu_x = accu_x.sum(axis=2, keepdim=True) - 1
														
 
															+    accu_y = accu_y.sum(axis=2, keepdim=True) - 1
														
 
															+    accu_z = accu_z.sum(axis=2, keepdim=True) - 1
														
 
															+
														
 
															+    coord_out = paddle.concat(
														
 
															+        (accu_x, accu_y, accu_z), axis=2)  # [batch_size, joint_num, 3]
														
 
															+
														
 
															+    return coord_out
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class TinyPose3DHRHeatmapNet(BaseArch):
														
 
															+    __category__ = 'architecture'
														
 
															+    __inject__ = ['loss']
														
 
															+
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            width,  # 40, backbone输出的channel数目
														
 
															+            num_joints,
														
 
															+            backbone='HRNet',
														
 
															+            loss='KeyPointRegressionMSELoss',
														
 
															+            post_process=TinyPose3DPostProcess):
														
 
															+        """
														
 
															+        Args:
														
 
															+            backbone (nn.Layer): backbone instance
														
 
															+            post_process (object): post process instance
														
 
															+        """
														
 
															+        super(TinyPose3DHRHeatmapNet, self).__init__()
														
 
															+
														
 
															+        self.backbone = backbone
														
 
															+        self.post_process = TinyPose3DPostProcess()
														
 
															+        self.loss = loss
														
 
															+        self.deploy = False
														
 
															+        self.num_joints = num_joints
														
 
															+
														
 
															+        self.final_conv = L.Conv2d(width, num_joints * 32, 1, 1, 0, bias=True)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_config(cls, cfg, *args, **kwargs):
														
 
															+        # backbone
														
 
															+        backbone = create(cfg['backbone'])
														
 
															+
														
 
															+        return {'backbone': backbone, }
														
 
															+
														
 
															+    def _forward(self):
														
 
															+        feats = self.backbone(self.inputs)  # feats:[[batch_size, 40, 32, 24]]
														
 
															+
														
 
															+        hrnet_outputs = self.final_conv(feats[0])
														
 
															+        res = soft_argmax(hrnet_outputs, self.num_joints)
														
 
															+        return res
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        pose3d = self._forward()
														
 
															+        loss = self.loss(pose3d, None, self.inputs)
														
 
															+        outputs = {'loss': loss}
														
 
															+        return outputs
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        res_lst = self._forward()
														
 
															+        outputs = {'pose3d': res_lst}
														
 
															+        return outputs
														
 
															+
														
 
															+    def flip_back(self, output_flipped, matched_parts):
														
 
															+        assert output_flipped.ndim == 4,\
														
 
															+                'output_flipped should be [batch_size, num_joints, height, width]'
														
 
															+
														
 
															+        output_flipped = output_flipped[:, :, :, ::-1]
														
 
															+
														
 
															+        for pair in matched_parts:
														
 
															+            tmp = output_flipped[:, pair[0], :, :].copy()
														
 
															+            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
														
 
															+            output_flipped[:, pair[1], :, :] = tmp
														
 
															+
														
 
															+        return output_flipped
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class TinyPose3DHRNet(BaseArch):
														
 
															+    __category__ = 'architecture'
														
 
															+    __inject__ = ['loss']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 width,
														
 
															+                 num_joints,
														
 
															+                 fc_channel=768,
														
 
															+                 backbone='HRNet',
														
 
															+                 loss='KeyPointRegressionMSELoss',
														
 
															+                 post_process=TinyPose3DPostProcess):
														
 
															+        """
														
 
															+        Args:
														
 
															+            backbone (nn.Layer): backbone instance
														
 
															+            post_process (object): post process instance
														
 
															+        """
														
 
															+        super(TinyPose3DHRNet, self).__init__()
														
 
															+        self.backbone = backbone
														
 
															+        self.post_process = TinyPose3DPostProcess()
														
 
															+        self.loss = loss
														
 
															+        self.deploy = False
														
 
															+        self.num_joints = num_joints
														
 
															+
														
 
															+        self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
														
 
															+
														
 
															+        self.flatten = paddle.nn.Flatten(start_axis=2, stop_axis=3)
														
 
															+        self.fc1 = paddle.nn.Linear(fc_channel, 256)
														
 
															+        self.act1 = paddle.nn.ReLU()
														
 
															+        self.fc2 = paddle.nn.Linear(256, 64)
														
 
															+        self.act2 = paddle.nn.ReLU()
														
 
															+        self.fc3 = paddle.nn.Linear(64, 3)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_config(cls, cfg, *args, **kwargs):
														
 
															+        # backbone
														
 
															+        backbone = create(cfg['backbone'])
														
 
															+
														
 
															+        return {'backbone': backbone, }
														
 
															+
														
 
															+    def _forward(self):
														
 
															+        '''
														
 
															+        self.inputs is a dict
														
 
															+        '''
														
 
															+        feats = self.backbone(
														
 
															+            self.inputs)  # feats:[[batch_size, 40, width/4, height/4]]
														
 
															+
														
 
															+        hrnet_outputs = self.final_conv(
														
 
															+            feats[0])  # hrnet_outputs: [batch_size, num_joints*32,32,32]
														
 
															+
														
 
															+        flatten_res = self.flatten(
														
 
															+            hrnet_outputs)  # [batch_size,num_joints*32,32*32]
														
 
															+
														
 
															+        res = self.fc1(flatten_res)
														
 
															+        res = self.act1(res)
														
 
															+        res = self.fc2(res)
														
 
															+        res = self.act2(res)
														
 
															+        res = self.fc3(res)
														
 
															+
														
 
															+        if self.training:
														
 
															+            return self.loss(res, self.inputs)
														
 
															+        else:  # export model need
														
 
															+            return res
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        return self._forward()
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        res_lst = self._forward()
														
 
															+        outputs = {'pose3d': res_lst}
														
 
															+        return outputs
														
 
															+
														
 
															+    def flip_back(self, output_flipped, matched_parts):
														
 
															+        assert output_flipped.ndim == 4,\
														
 
															+                'output_flipped should be [batch_size, num_joints, height, width]'
														
 
															+
														
 
															+        output_flipped = output_flipped[:, :, :, ::-1]
														
 
															+
														
 
															+        for pair in matched_parts:
														
 
															+            tmp = output_flipped[:, pair[0], :, :].copy()
														
 
															+            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
														
 
															+            output_flipped[:, pair[1], :, :] = tmp
														
 
															+
														
 
															+        return output_flipped
														
--- a/paddlers/models/ppdet/modeling/architectures/keypoint_petr.py
+++ b/paddlers/models/ppdet/modeling/architectures/keypoint_petr.py
@@ -0,0 +1,217 @@
 
															+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License. 
														
 
															+# You may obtain a copy of the License at 
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and 
														
 
															+# limitations under the License.
														
 
															+"""
														
 
															+this code is base on https://github.com/hikvision-research/opera/blob/main/opera/models/detectors/petr.py
														
 
															+"""
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import paddle
														
 
															+from paddlers.models.ppdet.core.workspace import register
														
 
															+from .meta_arch import BaseArch
														
 
															+from .. import layers as L
														
 
															+
														
 
															+__all__ = ['PETR']
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class PETR(BaseArch):
														
 
															+    __category__ = 'architecture'
														
 
															+    __inject__ = ['backbone', 'neck', 'bbox_head']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 backbone='ResNet',
														
 
															+                 neck='ChannelMapper',
														
 
															+                 bbox_head='PETRHead'):
														
 
															+        """
														
 
															+        PETR, see https://openaccess.thecvf.com/content/CVPR2022/papers/Shi_End-to-End_Multi-Person_Pose_Estimation_With_Transformers_CVPR_2022_paper.pdf
														
 
															+
														
 
															+        Args:
														
 
															+            backbone (nn.Layer): backbone instance
														
 
															+            neck (nn.Layer): neck between backbone and head
														
 
															+            bbox_head (nn.Layer): model output and loss
														
 
															+        """
														
 
															+        super(PETR, self).__init__()
														
 
															+        self.backbone = backbone
														
 
															+        if neck is not None:
														
 
															+            self.with_neck = True
														
 
															+        self.neck = neck
														
 
															+        self.bbox_head = bbox_head
														
 
															+        self.deploy = False
														
 
															+
														
 
															+    def extract_feat(self, img):
														
 
															+        """Directly extract features from the backbone+neck."""
														
 
															+        x = self.backbone(img)
														
 
															+        if self.with_neck:
														
 
															+            x = self.neck(x)
														
 
															+        return x
														
 
															+
														
 
															+    def get_inputs(self):
														
 
															+        img_metas = []
														
 
															+        gt_bboxes = []
														
 
															+        gt_labels = []
														
 
															+        gt_keypoints = []
														
 
															+        gt_areas = []
														
 
															+        pad_gt_mask = self.inputs['pad_gt_mask'].astype("bool").squeeze(-1)
														
 
															+        for idx, im_shape in enumerate(self.inputs['im_shape']):
														
 
															+            img_meta = {
														
 
															+                'img_shape': im_shape.astype("int32").tolist() + [1, ],
														
 
															+                'batch_input_shape': self.inputs['image'].shape[-2:],
														
 
															+                'image_name': self.inputs['image_file'][idx]
														
 
															+            }
														
 
															+            img_metas.append(img_meta)
														
 
															+            if (not pad_gt_mask[idx].any()):
														
 
															+                gt_keypoints.append(self.inputs['gt_joints'][idx][:1])
														
 
															+                gt_labels.append(self.inputs['gt_class'][idx][:1])
														
 
															+                gt_bboxes.append(self.inputs['gt_bbox'][idx][:1])
														
 
															+                gt_areas.append(self.inputs['gt_areas'][idx][:1])
														
 
															+                continue
														
 
															+
														
 
															+            gt_keypoints.append(self.inputs['gt_joints'][idx][pad_gt_mask[idx]])
														
 
															+            gt_labels.append(self.inputs['gt_class'][idx][pad_gt_mask[idx]])
														
 
															+            gt_bboxes.append(self.inputs['gt_bbox'][idx][pad_gt_mask[idx]])
														
 
															+            gt_areas.append(self.inputs['gt_areas'][idx][pad_gt_mask[idx]])
														
 
															+
														
 
															+        return img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        """
														
 
															+        Args:
														
 
															+            img (Tensor): Input images of shape (N, C, H, W).
														
 
															+                Typically these should be mean centered and std scaled.
														
 
															+            img_metas (list[dict]): A List of image info dict where each dict
														
 
															+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
														
 
															+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
														
 
															+                For details on the values of these keys see
														
 
															+                :class:`mmdet.datasets.pipelines.Collect`.
														
 
															+            gt_bboxes (list[Tensor]): Each item are the truth boxes for each
														
 
															+                image in [tl_x, tl_y, br_x, br_y] format.
														
 
															+            gt_labels (list[Tensor]): Class indices corresponding to each box.
														
 
															+            gt_keypoints (list[Tensor]): Each item are the truth keypoints for
														
 
															+                each image in [p^{1}_x, p^{1}_y, p^{1}_v, ..., p^{K}_x,
														
 
															+                p^{K}_y, p^{K}_v] format.
														
 
															+            gt_areas (list[Tensor]): mask areas corresponding to each box.
														
 
															+            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
														
 
															+                boxes can be ignored when computing the loss.
														
 
															+
														
 
															+        Returns:
														
 
															+            dict[str, Tensor]: A dictionary of loss components.
														
 
															+        """
														
 
															+
														
 
															+        img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas = self.get_inputs(
														
 
															+        )
														
 
															+        gt_bboxes_ignore = getattr(self.inputs, 'gt_bboxes_ignore', None)
														
 
															+
														
 
															+        x = self.extract_feat(self.inputs)
														
 
															+        losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
														
 
															+                                              gt_labels, gt_keypoints, gt_areas,
														
 
															+                                              gt_bboxes_ignore)
														
 
															+        loss = 0
														
 
															+        for k, v in losses.items():
														
 
															+            loss += v
														
 
															+        losses['loss'] = loss
														
 
															+
														
 
															+        return losses
														
 
															+
														
 
															+    def get_pred_numpy(self):
														
 
															+        """Used for computing network flops.
														
 
															+        """
														
 
															+
														
 
															+        img = self.inputs['image']
														
 
															+        batch_size, _, height, width = img.shape
														
 
															+        dummy_img_metas = [
														
 
															+            dict(
														
 
															+                batch_input_shape=(height, width),
														
 
															+                img_shape=(height, width, 3),
														
 
															+                scale_factor=(1., 1., 1., 1.)) for _ in range(batch_size)
														
 
															+        ]
														
 
															+        x = self.extract_feat(img)
														
 
															+        outs = self.bbox_head(x, img_metas=dummy_img_metas)
														
 
															+        bbox_list = self.bbox_head.get_bboxes(
														
 
															+            *outs, dummy_img_metas, rescale=True)
														
 
															+        return bbox_list
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        """
														
 
															+        """
														
 
															+        img = self.inputs['image']
														
 
															+        batch_size, _, height, width = img.shape
														
 
															+        img_metas = [
														
 
															+            dict(
														
 
															+                batch_input_shape=(height, width),
														
 
															+                img_shape=(height, width, 3),
														
 
															+                scale_factor=self.inputs['scale_factor'][i])
														
 
															+            for i in range(batch_size)
														
 
															+        ]
														
 
															+        kptpred = self.simple_test(
														
 
															+            self.inputs, img_metas=img_metas, rescale=True)
														
 
															+        keypoints = kptpred[0][1][0]
														
 
															+        bboxs = kptpred[0][0][0]
														
 
															+        keypoints[..., 2] = bboxs[:, None, 4]
														
 
															+        res_lst = [[keypoints, bboxs[:, 4]]]
														
 
															+        outputs = {'keypoint': res_lst}
														
 
															+        return outputs
														
 
															+
														
 
															+    def simple_test(self, inputs, img_metas, rescale=False):
														
 
															+        """Test function without test time augmentation.
														
 
															+
														
 
															+        Args:
														
 
															+            inputs (list[paddle.Tensor]): List of multiple images.
														
 
															+            img_metas (list[dict]): List of image information.
														
 
															+            rescale (bool, optional): Whether to rescale the results.
														
 
															+                Defaults to False.
														
 
															+
														
 
															+        Returns:
														
 
															+            list[list[np.ndarray]]: BBox and keypoint results of each image
														
 
															+                and classes. The outer list corresponds to each image.
														
 
															+                The inner list corresponds to each class.
														
 
															+        """
														
 
															+        batch_size = len(img_metas)
														
 
															+        assert batch_size == 1, 'Currently only batch_size 1 for inference ' \
														
 
															+            f'mode is supported. Found batch_size {batch_size}.'
														
 
															+        feat = self.extract_feat(inputs)
														
 
															+        results_list = self.bbox_head.simple_test(
														
 
															+            feat, img_metas, rescale=rescale)
														
 
															+
														
 
															+        bbox_kpt_results = [
														
 
															+            self.bbox_kpt2result(det_bboxes, det_labels, det_kpts,
														
 
															+                                 self.bbox_head.num_classes)
														
 
															+            for det_bboxes, det_labels, det_kpts in results_list
														
 
															+        ]
														
 
															+        return bbox_kpt_results
														
 
															+
														
 
															+    def bbox_kpt2result(self, bboxes, labels, kpts, num_classes):
														
 
															+        """Convert detection results to a list of numpy arrays.
														
 
															+
														
 
															+        Args:
														
 
															+            bboxes (paddle.Tensor | np.ndarray): shape (n, 5).
														
 
															+            labels (paddle.Tensor | np.ndarray): shape (n, ).
														
 
															+            kpts (paddle.Tensor | np.ndarray): shape (n, K, 3).
														
 
															+            num_classes (int): class number, including background class.
														
 
															+
														
 
															+        Returns:
														
 
															+            list(ndarray): bbox and keypoint results of each class.
														
 
															+        """
														
 
															+        if bboxes.shape[0] == 0:
														
 
															+            return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)], \
														
 
															+                [np.zeros((0, kpts.size(1), 3), dtype=np.float32)
														
 
															+                    for i in range(num_classes)]
														
 
															+        else:
														
 
															+            if isinstance(bboxes, paddle.Tensor):
														
 
															+                bboxes = bboxes.numpy()
														
 
															+                labels = labels.numpy()
														
 
															+                kpts = kpts.numpy()
														
 
															+            return [bboxes[labels == i, :] for i in range(num_classes)], \
														
 
															+                [kpts[labels == i, :, :] for i in range(num_classes)]
														
--- a/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
@@ -106,8 +106,8 @@ class MaskRCNN(BaseArch):
 
															             im_shape = self.inputs['im_shape']
														
 
															             scale_factor = self.inputs['scale_factor']
														
 
															-            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
														
 
															-                                                    im_shape, scale_factor)
														
 
															+            bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
														
 
															+                preds, (rois, rois_num), im_shape, scale_factor)
														
 
															             mask_out = self.mask_head(
														
 
															                 body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
														
@@ -117,7 +117,20 @@ class MaskRCNN(BaseArch):
 
															             origin_shape = self.bbox_post_process.get_origin_shape()
														
 
															             mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
														
 
															                                                origin_shape)
														
 
															-            return bbox_pred, bbox_num, mask_pred
														
 
															+
														
 
															+            if self.use_extra_data:
														
 
															+                extra_data = {}  # record the bbox output before nms, such like scores and nms_keep_idx
														
 
															+                """extra_data:{
														
 
															+                            'scores': predict scores,
														
 
															+                            'nms_keep_idx': bbox index before nms,
														
 
															+                           }
														
 
															+                """
														
 
															+                extra_data['scores'] = preds[1]  # predict scores (probability)
														
 
															+                # Todo: get logits output
														
 
															+                extra_data['nms_keep_idx'] = nms_keep_idx  # bbox index before nms
														
 
															+                return bbox_pred, bbox_num, mask_pred, extra_data
														
 
															+            else:
														
 
															+                return bbox_pred, bbox_num, mask_pred
														
 
															     def get_loss(self, ):
														
 
															         bbox_loss, mask_loss, rpn_loss = self._forward()
														
@@ -130,6 +143,10 @@ class MaskRCNN(BaseArch):
 
															         return loss
														
 
															     def get_pred(self):
														
 
															-        bbox_pred, bbox_num, mask_pred = self._forward()
														
 
															-        output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
														
 
															+        if self.use_extra_data:
														
 
															+            bbox_pred, bbox_num, mask_pred, extra_data = self._forward()
														
 
															+            output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred, 'extra_data': extra_data}
														
 
															+        else:
														
 
															+            bbox_pred, bbox_num, mask_pred = self._forward()
														
 
															+            output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
														
 
															         return output
														
--- a/paddlers/models/ppdet/modeling/architectures/meta_arch.py
+++ b/paddlers/models/ppdet/modeling/architectures/meta_arch.py
@@ -15,11 +15,12 @@ __all__ = ['BaseArch']
 
															 @register
														
 
															 class BaseArch(nn.Layer):
														
 
															-    def __init__(self, data_format='NCHW'):
														
 
															+    def __init__(self, data_format='NCHW', use_extra_data=False):
														
 
															         super(BaseArch, self).__init__()
														
 
															         self.data_format = data_format
														
 
															         self.inputs = {}
														
 
															         self.fuse_norm = False
														
 
															+        self.use_extra_data = use_extra_data
														
 
															     def load_meanstd(self, cfg_transform):
														
 
															         scale = 1.
														
--- a/paddlers/models/ppdet/modeling/architectures/pose3d_metro.py
+++ b/paddlers/models/ppdet/modeling/architectures/pose3d_metro.py
@@ -0,0 +1,114 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License. 
														
 
															+# You may obtain a copy of the License at 
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and 
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+from paddlers.models.ppdet.core.workspace import register, create
														
 
															+from .meta_arch import BaseArch
														
 
															+from .. import layers as L
														
 
															+
														
 
															+__all__ = ['METRO_Body']
														
 
															+
														
 
															+
														
 
															+def orthographic_projection(X, camera):
														
 
															+    """Perform orthographic projection of 3D points X using the camera parameters
														
 
															+    Args:
														
 
															+        X: size = [B, N, 3]
														
 
															+        camera: size = [B, 3]
														
 
															+    Returns:
														
 
															+        Projected 2D points -- size = [B, N, 2]
														
 
															+    """
														
 
															+    camera = camera.reshape((-1, 1, 3))
														
 
															+    X_trans = X[:, :, :2] + camera[:, :, 1:]
														
 
															+    shape = paddle.shape(X_trans)
														
 
															+    X_2d = (camera[:, :, 0] * X_trans.reshape((shape[0], -1))).reshape(shape)
														
 
															+    return X_2d
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class METRO_Body(BaseArch):
														
 
															+    __category__ = 'architecture'
														
 
															+    __inject__ = ['loss']
														
 
															+
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            num_joints,
														
 
															+            backbone='HRNet',
														
 
															+            trans_encoder='',
														
 
															+            loss='Pose3DLoss', ):
														
 
															+        """
														
 
															+        Modified from METRO network, see https://arxiv.org/abs/2012.09760
														
 
															+
														
 
															+        Args:
														
 
															+            backbone (nn.Layer): backbone instance
														
 
															+        """
														
 
															+        super(METRO_Body, self).__init__()
														
 
															+        self.num_joints = num_joints
														
 
															+        self.backbone = backbone
														
 
															+        self.loss = loss
														
 
															+        self.deploy = False
														
 
															+
														
 
															+        self.trans_encoder = trans_encoder
														
 
															+        self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 10, 1)
														
 
															+        self.cam_param_fc = paddle.nn.Linear(3, 2)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_config(cls, cfg, *args, **kwargs):
														
 
															+        # backbone
														
 
															+        backbone = create(cfg['backbone'])
														
 
															+        trans_encoder = create(cfg['trans_encoder'])
														
 
															+
														
 
															+        return {'backbone': backbone, 'trans_encoder': trans_encoder}
														
 
															+
														
 
															+    def _forward(self):
														
 
															+        batch_size = self.inputs['image'].shape[0]
														
 
															+
														
 
															+        image_feat = self.backbone(self.inputs)
														
 
															+        image_feat_flatten = image_feat.reshape((batch_size, 2048, 49))
														
 
															+        image_feat_flatten = image_feat_flatten.transpose(perm=(0, 2, 1))
														
 
															+        # and apply a conv layer to learn image token for each 3d joint/vertex position
														
 
															+        features = self.conv_learn_tokens(image_feat_flatten)  # (B, J, C)
														
 
															+
														
 
															+        if self.training:
														
 
															+            # apply mask vertex/joint modeling
														
 
															+            # meta_masks is a tensor of all the masks, randomly generated in dataloader
														
 
															+            # we pre-define a [MASK] token, which is a floating-value vector with 0.01s
														
 
															+            meta_masks = self.inputs['mjm_mask'].expand((-1, -1, 2048))
														
 
															+            constant_tensor = paddle.ones_like(features) * 0.01
														
 
															+            features = features * meta_masks + constant_tensor * (1 - meta_masks
														
 
															+                                                                  )
														
 
															+        pred_out = self.trans_encoder(features)
														
 
															+
														
 
															+        pred_3d_joints = pred_out[:, :self.num_joints, :]
														
 
															+        cam_features = pred_out[:, self.num_joints:, :]
														
 
															+
														
 
															+        # learn camera parameters
														
 
															+        pred_2d_joints = self.cam_param_fc(cam_features)
														
 
															+        return pred_3d_joints, pred_2d_joints
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        preds_3d, preds_2d = self._forward()
														
 
															+        loss = self.loss(preds_3d, preds_2d, self.inputs)
														
 
															+        output = {'loss': loss}
														
 
															+        return output
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        preds_3d, preds_2d = self._forward()
														
 
															+        outputs = {'pose3d': preds_3d, 'pose2d': preds_2d}
														
 
															+        return outputs
														
--- a/paddlers/models/ppdet/modeling/architectures/ppyoloe.py
+++ b/paddlers/models/ppdet/modeling/architectures/ppyoloe.py
@@ -0,0 +1,260 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
														
 
															+#   
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");   
														
 
															+# you may not use this file except in compliance with the License.  
														
 
															+# You may obtain a copy of the License at   
														
 
															+#   
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0    
														
 
															+#   
														
 
															+# Unless required by applicable law or agreed to in writing, software   
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS, 
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
														
 
															+# See the License for the specific language governing permissions and   
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import copy
														
 
															+import paddle
														
 
															+from paddlers.models.ppdet.core.workspace import register, create
														
 
															+from .meta_arch import BaseArch
														
 
															+
														
 
															+__all__ = ['PPYOLOE', 'PPYOLOEWithAuxHead']
														
 
															+# PP-YOLOE and PP-YOLOE+ are recommended to use this architecture, especially when use distillation or aux head
														
 
															+# PP-YOLOE and PP-YOLOE+ can also use the same architecture of YOLOv3 in yolo.py when not use distillation or aux head
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class PPYOLOE(BaseArch):
														
 
															+    """
														
 
															+    PPYOLOE network, see https://arxiv.org/abs/2203.16250
														
 
															+
														
 
															+    Args:
														
 
															+        backbone (nn.Layer): backbone instance
														
 
															+        neck (nn.Layer): neck instance
														
 
															+        yolo_head (nn.Layer): anchor_head instance
														
 
															+        post_process (object): `BBoxPostProcess` instance
														
 
															+        ssod_loss (object): 'SSODPPYOLOELoss' instance, only used for semi-det(ssod)
														
 
															+        for_distill (bool): whether for distillation
														
 
															+        feat_distill_place (str): distill which feature for distillation
														
 
															+        for_mot (bool): whether return other features for multi-object tracking
														
 
															+            models, default False in pure object detection models.
														
 
															+    """
														
 
															+
														
 
															+    __category__ = 'architecture'
														
 
															+    __shared__ = ['for_distill']
														
 
															+    __inject__ = ['post_process', 'ssod_loss']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 backbone='CSPResNet',
														
 
															+                 neck='CustomCSPPAN',
														
 
															+                 yolo_head='PPYOLOEHead',
														
 
															+                 post_process='BBoxPostProcess',
														
 
															+                 ssod_loss='SSODPPYOLOELoss',
														
 
															+                 for_distill=False,
														
 
															+                 feat_distill_place='neck_feats',
														
 
															+                 for_mot=False):
														
 
															+        super(PPYOLOE, self).__init__()
														
 
															+        self.backbone = backbone
														
 
															+        self.neck = neck
														
 
															+        self.yolo_head = yolo_head
														
 
															+        self.post_process = post_process
														
 
															+        self.for_mot = for_mot
														
 
															+
														
 
															+        # for ssod, semi-det
														
 
															+        self.is_teacher = False
														
 
															+        self.ssod_loss = ssod_loss
														
 
															+
														
 
															+        # distill
														
 
															+        self.for_distill = for_distill
														
 
															+        self.feat_distill_place = feat_distill_place
														
 
															+        if for_distill:
														
 
															+            assert feat_distill_place in ['backbone_feats', 'neck_feats']
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_config(cls, cfg, *args, **kwargs):
														
 
															+        backbone = create(cfg['backbone'])
														
 
															+
														
 
															+        kwargs = {'input_shape': backbone.out_shape}
														
 
															+        neck = create(cfg['neck'], **kwargs)
														
 
															+
														
 
															+        kwargs = {'input_shape': neck.out_shape}
														
 
															+        yolo_head = create(cfg['yolo_head'], **kwargs)
														
 
															+
														
 
															+        return {
														
 
															+            'backbone': backbone,
														
 
															+            'neck': neck,
														
 
															+            "yolo_head": yolo_head,
														
 
															+        }
														
 
															+
														
 
															+    def _forward(self):
														
 
															+        body_feats = self.backbone(self.inputs)
														
 
															+        neck_feats = self.neck(body_feats, self.for_mot)
														
 
															+
														
 
															+        self.is_teacher = self.inputs.get('is_teacher', False)  # for semi-det
														
 
															+        if self.training or self.is_teacher:
														
 
															+            yolo_losses = self.yolo_head(neck_feats, self.inputs)
														
 
															+
														
 
															+            if self.for_distill:
														
 
															+                if self.feat_distill_place == 'backbone_feats':
														
 
															+                    self.yolo_head.distill_pairs['backbone_feats'] = body_feats
														
 
															+                elif self.feat_distill_place == 'neck_feats':
														
 
															+                    self.yolo_head.distill_pairs['neck_feats'] = neck_feats
														
 
															+                else:
														
 
															+                    raise ValueError
														
 
															+            return yolo_losses
														
 
															+        else:
														
 
															+
														
 
															+            yolo_head_outs = self.yolo_head(neck_feats)
														
 
															+
														
 
															+            if self.post_process is not None:
														
 
															+                bbox, bbox_num, nms_keep_idx = self.post_process(
														
 
															+                    yolo_head_outs, self.yolo_head.mask_anchors,
														
 
															+                    self.inputs['im_shape'], self.inputs['scale_factor'])
														
 
															+
														
 
															+            else:
														
 
															+                bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
														
 
															+                    yolo_head_outs, self.inputs['scale_factor'])
														
 
															+
														
 
															+            if self.use_extra_data:
														
 
															+                extra_data = {}  # record the bbox output before nms, such like scores and nms_keep_idx
														
 
															+                """extra_data:{
														
 
															+                            'scores': predict scores,
														
 
															+                            'nms_keep_idx': bbox index before nms,
														
 
															+                           }
														
 
															+                           """
														
 
															+                extra_data['scores'] = yolo_head_outs[0]  # predict scores (probability)
														
 
															+                extra_data['nms_keep_idx'] = nms_keep_idx
														
 
															+                output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
														
 
															+            else:
														
 
															+                output = {'bbox': bbox, 'bbox_num': bbox_num}
														
 
															+
														
 
															+            return output
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        return self._forward()
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        return self._forward()
														
 
															+
														
 
															+    def get_loss_keys(self):
														
 
															+        return ['loss_cls', 'loss_iou', 'loss_dfl', 'loss_contrast']
														
 
															+
														
 
															+    def get_ssod_loss(self, student_head_outs, teacher_head_outs, train_cfg):
														
 
															+        ssod_losses = self.ssod_loss(student_head_outs, teacher_head_outs,
														
 
															+                                     train_cfg)
														
 
															+        return ssod_losses
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class PPYOLOEWithAuxHead(BaseArch):
														
 
															+    __category__ = 'architecture'
														
 
															+    __inject__ = ['post_process']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 backbone='CSPResNet',
														
 
															+                 neck='CustomCSPPAN',
														
 
															+                 yolo_head='PPYOLOEHead',
														
 
															+                 aux_head='SimpleConvHead',
														
 
															+                 post_process='BBoxPostProcess',
														
 
															+                 for_mot=False,
														
 
															+                 detach_epoch=5):
														
 
															+        """
														
 
															+        PPYOLOE network, see https://arxiv.org/abs/2203.16250
														
 
															+
														
 
															+        Args:
														
 
															+            backbone (nn.Layer): backbone instance
														
 
															+            neck (nn.Layer): neck instance
														
 
															+            yolo_head (nn.Layer): anchor_head instance
														
 
															+            post_process (object): `BBoxPostProcess` instance
														
 
															+            for_mot (bool): whether return other features for multi-object tracking
														
 
															+                models, default False in pure object detection models.
														
 
															+        """
														
 
															+        super(PPYOLOEWithAuxHead, self).__init__()
														
 
															+        self.backbone = backbone
														
 
															+        self.neck = neck
														
 
															+        self.aux_neck = copy.deepcopy(self.neck)
														
 
															+
														
 
															+        self.yolo_head = yolo_head
														
 
															+        self.aux_head = aux_head
														
 
															+        self.post_process = post_process
														
 
															+        self.for_mot = for_mot
														
 
															+        self.detach_epoch = detach_epoch
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_config(cls, cfg, *args, **kwargs):
														
 
															+        # backbone
														
 
															+        backbone = create(cfg['backbone'])
														
 
															+
														
 
															+        # fpn
														
 
															+        kwargs = {'input_shape': backbone.out_shape}
														
 
															+        neck = create(cfg['neck'], **kwargs)
														
 
															+        aux_neck = copy.deepcopy(neck)
														
 
															+
														
 
															+        # head
														
 
															+        kwargs = {'input_shape': neck.out_shape}
														
 
															+        yolo_head = create(cfg['yolo_head'], **kwargs)
														
 
															+        aux_head = create(cfg['aux_head'], **kwargs)
														
 
															+
														
 
															+        return {
														
 
															+            'backbone': backbone,
														
 
															+            'neck': neck,
														
 
															+            "yolo_head": yolo_head,
														
 
															+            'aux_head': aux_head,
														
 
															+        }
														
 
															+
														
 
															+    def _forward(self):
														
 
															+        body_feats = self.backbone(self.inputs)
														
 
															+        neck_feats = self.neck(body_feats, self.for_mot)
														
 
															+
														
 
															+        if self.training:
														
 
															+            if self.inputs['epoch_id'] >= self.detach_epoch:
														
 
															+                aux_neck_feats = self.aux_neck([f.detach() for f in body_feats])
														
 
															+                dual_neck_feats = (paddle.concat(
														
 
															+                    [f.detach(), aux_f], axis=1) for f, aux_f in
														
 
															+                                   zip(neck_feats, aux_neck_feats))
														
 
															+            else:
														
 
															+                aux_neck_feats = self.aux_neck(body_feats)
														
 
															+                dual_neck_feats = (paddle.concat(
														
 
															+                    [f, aux_f], axis=1) for f, aux_f in
														
 
															+                                   zip(neck_feats, aux_neck_feats))
														
 
															+            aux_cls_scores, aux_bbox_preds = self.aux_head(dual_neck_feats)
														
 
															+            loss = self.yolo_head(
														
 
															+                neck_feats,
														
 
															+                self.inputs,
														
 
															+                aux_pred=[aux_cls_scores, aux_bbox_preds])
														
 
															+            return loss
														
 
															+        else:
														
 
															+            yolo_head_outs = self.yolo_head(neck_feats)
														
 
															+
														
 
															+            if self.post_process is not None:
														
 
															+                bbox, bbox_num, nms_keep_idx = self.post_process(
														
 
															+                    yolo_head_outs, self.yolo_head.mask_anchors,
														
 
															+                    self.inputs['im_shape'], self.inputs['scale_factor'])
														
 
															+            else:
														
 
															+                bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
														
 
															+                    yolo_head_outs, self.inputs['scale_factor'])
														
 
															+
														
 
															+            if self.use_extra_data:
														
 
															+                extra_data = {}  # record the bbox output before nms, such like scores and nms_keep_idx
														
 
															+                """extra_data:{
														
 
															+                            'scores': predict scores,
														
 
															+                            'nms_keep_idx': bbox index before nms,
														
 
															+                           }
														
 
															+                           """
														
 
															+                extra_data['scores'] = yolo_head_outs[0]  # predict scores (probability)
														
 
															+                # Todo: get logits output
														
 
															+                extra_data['nms_keep_idx'] = nms_keep_idx
														
 
															+                output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
														
 
															+            else:
														
 
															+                output = {'bbox': bbox, 'bbox_num': bbox_num}
														
 
															+
														
 
															+            return output
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        return self._forward()
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        return self._forward()
														
--- a/paddlers/models/ppdet/modeling/architectures/queryinst.py
+++ b/paddlers/models/ppdet/modeling/architectures/queryinst.py
@@ -0,0 +1,104 @@
 
															+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import paddle
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import register, create
														
 
															+from .meta_arch import BaseArch
														
 
															+
														
 
															+__all__ = ['QueryInst']
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class QueryInst(BaseArch):
														
 
															+    __category__ = 'architecture'
														
 
															+    __inject__ = ['post_process']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 backbone,
														
 
															+                 neck,
														
 
															+                 rpn_head,
														
 
															+                 roi_head,
														
 
															+                 post_process='SparsePostProcess'):
														
 
															+        super(QueryInst, self).__init__()
														
 
															+        self.backbone = backbone
														
 
															+        self.neck = neck
														
 
															+        self.rpn_head = rpn_head
														
 
															+        self.roi_head = roi_head
														
 
															+        self.post_process = post_process
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_config(cls, cfg, *args, **kwargs):
														
 
															+        backbone = create(cfg['backbone'])
														
 
															+        kwargs = {'input_shape': backbone.out_shape}
														
 
															+        neck = create(cfg['neck'], **kwargs)
														
 
															+
														
 
															+        kwargs = {'input_shape': neck.out_shape}
														
 
															+        rpn_head = create(cfg['rpn_head'], **kwargs)
														
 
															+        roi_head = create(cfg['roi_head'], **kwargs)
														
 
															+
														
 
															+        return {
														
 
															+            'backbone': backbone,
														
 
															+            'neck': neck,
														
 
															+            'rpn_head': rpn_head,
														
 
															+            "roi_head": roi_head
														
 
															+        }
														
 
															+
														
 
															+    def _forward(self, targets=None):
														
 
															+        features = self.backbone(self.inputs)
														
 
															+        features = self.neck(features)
														
 
															+
														
 
															+        proposal_bboxes, proposal_features = self.rpn_head(self.inputs[
														
 
															+            'img_whwh'])
														
 
															+        outputs = self.roi_head(features, proposal_bboxes, proposal_features,
														
 
															+                                targets)
														
 
															+
														
 
															+        if self.training:
														
 
															+            return outputs
														
 
															+        else:
														
 
															+            bbox_pred, bbox_num, mask_pred = self.post_process(
														
 
															+                outputs['class_logits'], outputs['bbox_pred'],
														
 
															+                self.inputs['scale_factor_whwh'], self.inputs['ori_shape'],
														
 
															+                outputs['mask_logits'])
														
 
															+            return bbox_pred, bbox_num, mask_pred
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        targets = []
														
 
															+        for i in range(len(self.inputs['img_whwh'])):
														
 
															+            boxes = self.inputs['gt_bbox'][i]
														
 
															+            labels = self.inputs['gt_class'][i].squeeze(-1)
														
 
															+            img_whwh = self.inputs['img_whwh'][i]
														
 
															+            if boxes.shape[0] != 0:
														
 
															+                img_whwh_tgt = img_whwh.unsqueeze(0).tile([boxes.shape[0], 1])
														
 
															+            else:
														
 
															+                img_whwh_tgt = paddle.zeros_like(boxes)
														
 
															+            gt_segm = self.inputs['gt_segm'][i].astype('float32')
														
 
															+            targets.append({
														
 
															+                'boxes': boxes,
														
 
															+                'labels': labels,
														
 
															+                'img_whwh': img_whwh,
														
 
															+                'img_whwh_tgt': img_whwh_tgt,
														
 
															+                'gt_segm': gt_segm
														
 
															+            })
														
 
															+        losses = self._forward(targets)
														
 
															+        losses.update({'loss': sum(losses.values())})
														
 
															+        return losses
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        bbox_pred, bbox_num, mask_pred = self._forward()
														
 
															+        return {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
														
--- a/paddlers/models/ppdet/modeling/architectures/retinanet.py
+++ b/paddlers/models/ppdet/modeling/architectures/retinanet.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
															 from paddlers.models.ppdet.core.workspace import register, create
														
 
															 from .meta_arch import BaseArch
														
 
															 import paddle
														
 
															+import paddle.nn.functional as F
														
 
															 __all__ = ['RetinaNet']
														
@@ -57,9 +58,24 @@ class RetinaNet(BaseArch):
 
															             return self.head(neck_feats, self.inputs)
														
 
															         else:
														
 
															             head_outs = self.head(neck_feats)
														
 
															-            bbox, bbox_num = self.head.post_process(
														
 
															+            bbox, bbox_num, nms_keep_idx = self.head.post_process(
														
 
															                 head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
														
 
															-            return {'bbox': bbox, 'bbox_num': bbox_num}
														
 
															+
														
 
															+            if self.use_extra_data:
														
 
															+                extra_data = {}  # record the bbox output before nms, such like scores and nms_keep_idx
														
 
															+                """extra_data:{
														
 
															+                            'scores': predict scores,
														
 
															+                            'nms_keep_idx': bbox index before nms,
														
 
															+                           }
														
 
															+                           """
														
 
															+                preds_logits = self.head.decode_cls_logits(head_outs[0])
														
 
															+                preds_scores = F.sigmoid(preds_logits)
														
 
															+                extra_data['logits'] = preds_logits
														
 
															+                extra_data['scores'] = preds_scores
														
 
															+                extra_data['nms_keep_idx'] = nms_keep_idx  # bbox index before nms
														
 
															+                return {'bbox': bbox, 'bbox_num': bbox_num, "extra_data": extra_data}
														
 
															+            else:
														
 
															+                return {'bbox': bbox, 'bbox_num': bbox_num}
														
 
															     def get_loss(self):
														
 
															         return self._forward()
														
--- a/paddlers/models/ppdet/modeling/architectures/sparse_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/sparse_rcnn.py
@@ -60,10 +60,10 @@ class SparseRCNN(BaseArch):
 
															         head_outs = self.head(fpn_feats, self.inputs["img_whwh"])
														
 
															         if not self.training:
														
 
															-            bboxes = self.postprocess(
														
 
															+            bbox_pred, bbox_num = self.postprocess(
														
 
															                 head_outs["pred_logits"], head_outs["pred_boxes"],
														
 
															-                self.inputs["scale_factor_wh"], self.inputs["img_whwh"])
														
 
															-            return bboxes
														
 
															+                self.inputs["scale_factor_whwh"], self.inputs["ori_shape"])
														
 
															+            return bbox_pred, bbox_num
														
 
															         else:
														
 
															             return head_outs
														
--- a/paddlers/models/ppdet/modeling/architectures/ssd.py
+++ b/paddlers/models/ppdet/modeling/architectures/ssd.py
@@ -18,6 +18,8 @@ from __future__ import print_function
 
															 from paddlers.models.ppdet.core.workspace import register, create
														
 
															 from .meta_arch import BaseArch
														
 
															+import paddle
														
 
															+import paddle.nn.functional as F
														
 
															 __all__ = ['SSD']
														
@@ -75,18 +77,42 @@ class SSD(BaseArch):
 
															                                  self.inputs['gt_class'])
														
 
															         else:
														
 
															             preds, anchors = self.ssd_head(body_feats, self.inputs['image'])
														
 
															-            bbox, bbox_num = self.post_process(preds, anchors,
														
 
															-                                               self.inputs['im_shape'],
														
 
															-                                               self.inputs['scale_factor'])
														
 
															-            return bbox, bbox_num
														
 
															+            bbox, bbox_num, nms_keep_idx = self.post_process(
														
 
															+                preds, anchors, self.inputs['im_shape'],
														
 
															+                self.inputs['scale_factor'])
														
 
															+
														
 
															+            if self.use_extra_data:
														
 
															+                extra_data = {}  # record the bbox output before nms, such like scores and nms_keep_idx
														
 
															+                """extra_data:{
														
 
															+                            'scores': predict scores,
														
 
															+                            'nms_keep_idx': bbox index before nms,
														
 
															+                           }
														
 
															+                           """
														
 
															+                preds_logits = preds[1]  # [[1xNumBBoxNumClass]]
														
 
															+                extra_data['scores'] = F.softmax(paddle.concat(
														
 
															+                    preds_logits, axis=1)).transpose([0, 2, 1])
														
 
															+                extra_data['logits'] = paddle.concat(
														
 
															+                    preds_logits, axis=1).transpose([0, 2, 1])
														
 
															+                extra_data['nms_keep_idx'] = nms_keep_idx  # bbox index before nms
														
 
															+                return bbox, bbox_num, extra_data
														
 
															+            else:
														
 
															+                return bbox, bbox_num
														
 
															     def get_loss(self, ):
														
 
															         return {"loss": self._forward()}
														
 
															     def get_pred(self):
														
 
															-        bbox_pred, bbox_num = self._forward()
														
 
															-        output = {
														
 
															-            "bbox": bbox_pred,
														
 
															-            "bbox_num": bbox_num,
														
 
															-        }
														
 
															+        if self.use_extra_data:
														
 
															+            bbox_pred, bbox_num, extra_data = self._forward()
														
 
															+            output = {
														
 
															+                "bbox": bbox_pred,
														
 
															+                "bbox_num": bbox_num,
														
 
															+                "extra_data": extra_data
														
 
															+            }
														
 
															+        else:
														
 
															+            bbox_pred, bbox_num = self._forward()
														
 
															+            output = {
														
 
															+                "bbox": bbox_pred,
														
 
															+                "bbox_num": bbox_num,
														
 
															+            }
														
 
															         return output
														
--- a/paddlers/models/ppdet/modeling/architectures/yolo.py
+++ b/paddlers/models/ppdet/modeling/architectures/yolo.py
@@ -21,6 +21,8 @@ from .meta_arch import BaseArch
 
															 from ..post_process import JDEBBoxPostProcess
														
 
															 __all__ = ['YOLOv3']
														
 
															+# YOLOv3,PP-YOLO,PP-YOLOv2,PP-YOLOE,PP-YOLOE+ use the same architecture as YOLOv3
														
 
															+# PP-YOLOE and PP-YOLOE+ are recommended to use PPYOLOE architecture in ppyoloe.py, especially when use distillation or aux head
														
 
															 @register
														
@@ -77,7 +79,10 @@ class YOLOv3(BaseArch):
 
															     def _forward(self):
														
 
															         body_feats = self.backbone(self.inputs)
														
 
															-        neck_feats = self.neck(body_feats, self.for_mot)
														
 
															+        if self.for_mot:
														
 
															+            neck_feats = self.neck(body_feats, self.for_mot)
														
 
															+        else:
														
 
															+            neck_feats = self.neck(body_feats)
														
 
															         if isinstance(neck_feats, dict):
														
 
															             assert self.for_mot == True
														
@@ -96,6 +101,7 @@ class YOLOv3(BaseArch):
 
															             yolo_head_outs = self.yolo_head(neck_feats)
														
 
															             if self.for_mot:
														
 
															+                # the detection part of JDE MOT model
														
 
															                 boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
														
 
															                     yolo_head_outs, self.yolo_head.mask_anchors)
														
 
															                 output = {
														
@@ -107,16 +113,33 @@ class YOLOv3(BaseArch):
 
															                 }
														
 
															             else:
														
 
															                 if self.return_idx:
														
 
															-                    _, bbox, bbox_num, _ = self.post_process(
														
 
															+                    # the detection part of JDE MOT model
														
 
															+                    _, bbox, bbox_num, nms_keep_idx = self.post_process(
														
 
															                         yolo_head_outs, self.yolo_head.mask_anchors)
														
 
															                 elif self.post_process is not None:
														
 
															-                    bbox, bbox_num = self.post_process(
														
 
															+                    # anchor based YOLOs: YOLOv3,PP-YOLO,PP-YOLOv2 use mask_anchors
														
 
															+                    bbox, bbox_num, nms_keep_idx = self.post_process(
														
 
															                         yolo_head_outs, self.yolo_head.mask_anchors,
														
 
															                         self.inputs['im_shape'], self.inputs['scale_factor'])
														
 
															                 else:
														
 
															-                    bbox, bbox_num = self.yolo_head.post_process(
														
 
															+                    # anchor free YOLOs: PP-YOLOE, PP-YOLOE+
														
 
															+                    bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
														
 
															                         yolo_head_outs, self.inputs['scale_factor'])
														
 
															-                output = {'bbox': bbox, 'bbox_num': bbox_num}
														
 
															+
														
 
															+                if self.use_extra_data:
														
 
															+                    extra_data = {}  # record the bbox output before nms, such like scores and nms_keep_idx
														
 
															+                    """extra_data:{
														
 
															+                                'scores': predict scores,
														
 
															+                                'nms_keep_idx': bbox index before nms,
														
 
															+                               }
														
 
															+                    """
														
 
															+                    extra_data['scores'] = yolo_head_outs[0]  # predict scores (probability)
														
 
															+                    # Todo: get logits output
														
 
															+                    extra_data['nms_keep_idx'] = nms_keep_idx
														
 
															+                    # Todo support for mask_anchors yolo
														
 
															+                    output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
														
 
															+                else:
														
 
															+                    output = {'bbox': bbox, 'bbox_num': bbox_num}
														
 
															             return output
														
--- a/paddlers/models/ppdet/modeling/architectures/yolof.py
+++ b/paddlers/models/ppdet/modeling/architectures/yolof.py
@@ -0,0 +1,88 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
														
 
															+#   
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");   
														
 
															+# you may not use this file except in compliance with the License.  
														
 
															+# You may obtain a copy of the License at   
														
 
															+#   
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0    
														
 
															+#   
														
 
															+# Unless required by applicable law or agreed to in writing, software   
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS, 
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
														
 
															+# See the License for the specific language governing permissions and   
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import register, create
														
 
															+from .meta_arch import BaseArch
														
 
															+
														
 
															+__all__ = ['YOLOF']
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class YOLOF(BaseArch):
														
 
															+    __category__ = 'architecture'
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 backbone='ResNet',
														
 
															+                 neck='DilatedEncoder',
														
 
															+                 head='YOLOFHead',
														
 
															+                 for_mot=False):
														
 
															+        """
														
 
															+        YOLOF network, see https://arxiv.org/abs/2103.09460
														
 
															+
														
 
															+        Args:
														
 
															+            backbone (nn.Layer): backbone instance
														
 
															+            neck (nn.Layer): DilatedEncoder instance
														
 
															+            head (nn.Layer): YOLOFHead instance
														
 
															+            for_mot (bool): whether return other features for multi-object tracking
														
 
															+                models, default False in pure object detection models.
														
 
															+        """
														
 
															+        super(YOLOF, self).__init__()
														
 
															+        self.backbone = backbone
														
 
															+        self.neck = neck
														
 
															+        self.head = head
														
 
															+        self.for_mot = for_mot
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_config(cls, cfg, *args, **kwargs):
														
 
															+        # backbone
														
 
															+        backbone = create(cfg['backbone'])
														
 
															+
														
 
															+        # fpn
														
 
															+        kwargs = {'input_shape': backbone.out_shape}
														
 
															+        neck = create(cfg['neck'], **kwargs)
														
 
															+
														
 
															+        # head
														
 
															+        kwargs = {'input_shape': neck.out_shape}
														
 
															+        head = create(cfg['head'], **kwargs)
														
 
															+
														
 
															+        return {
														
 
															+            'backbone': backbone,
														
 
															+            'neck': neck,
														
 
															+            "head": head,
														
 
															+        }
														
 
															+
														
 
															+    def _forward(self):
														
 
															+        body_feats = self.backbone(self.inputs)
														
 
															+        neck_feats = self.neck(body_feats, self.for_mot)
														
 
															+
														
 
															+        if self.training:
														
 
															+            yolo_losses = self.head(neck_feats, self.inputs)
														
 
															+            return yolo_losses
														
 
															+        else:
														
 
															+            yolo_head_outs = self.head(neck_feats)
														
 
															+            bbox, bbox_num = self.head.post_process(yolo_head_outs,
														
 
															+                                                    self.inputs['im_shape'],
														
 
															+                                                    self.inputs['scale_factor'])
														
 
															+            output = {'bbox': bbox, 'bbox_num': bbox_num}
														
 
															+            return output
														
 
															+
														
 
															+    def get_loss(self):
														
 
															+        return self._forward()
														
 
															+
														
 
															+    def get_pred(self):
														
 
															+        return self._forward()
														
--- a/paddlers/models/ppdet/modeling/assigners/__init__.py
+++ b/paddlers/models/ppdet/modeling/assigners/__init__.py
@@ -17,9 +17,19 @@ from . import task_aligned_assigner
 
															 from . import atss_assigner
														
 
															 from . import simota_assigner
														
 
															 from . import max_iou_assigner
														
 
															+from . import fcosr_assigner
														
 
															+from . import rotated_task_aligned_assigner
														
 
															+from . import task_aligned_assigner_cr
														
 
															+from . import uniform_assigner
														
 
															 from .utils import *
														
 
															 from .task_aligned_assigner import *
														
 
															 from .atss_assigner import *
														
 
															 from .simota_assigner import *
														
 
															 from .max_iou_assigner import *
														
 
															+from .fcosr_assigner import *
														
 
															+from .rotated_task_aligned_assigner import *
														
 
															+from .task_aligned_assigner_cr import *
														
 
															+from .uniform_assigner import *
														
 
															+from .hungarian_assigner import *
														
 
															+from .pose_utils import *
														
--- a/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
@@ -1,4 +1,4 @@
 
															-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
														
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															 #
														
 
															 # Licensed under the Apache License, Version 2.0 (the "License");
														
 
															 # you may not use this file except in compliance with the License.
														
@@ -41,12 +41,14 @@ class ATSSAssigner(nn.Layer):
 
															                  topk=9,
														
 
															                  num_classes=80,
														
 
															                  force_gt_matching=False,
														
 
															-                 eps=1e-9):
														
 
															+                 eps=1e-9,
														
 
															+                 sm_use=False):
														
 
															         super(ATSSAssigner, self).__init__()
														
 
															         self.topk = topk
														
 
															         self.num_classes = num_classes
														
 
															         self.force_gt_matching = force_gt_matching
														
 
															         self.eps = eps
														
 
															+        self.sm_use = sm_use
														
 
															     def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
														
 
															                              pad_gt_mask):
														
@@ -124,7 +126,8 @@ class ATSSAssigner(nn.Layer):
 
															             assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
														
 
															             assigned_scores = paddle.zeros(
														
 
															                 [batch_size, num_anchors, self.num_classes])
														
 
															-            return assigned_labels, assigned_bboxes, assigned_scores
														
 
															+            mask_positive = paddle.zeros([batch_size, 1, num_anchors])
														
 
															+            return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
														
 
															         # 1. compute iou between gt and anchor bbox, [B, n, L]
														
 
															         ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
														
@@ -154,7 +157,11 @@ class ATSSAssigner(nn.Layer):
 
															                                   paddle.zeros_like(is_in_topk))
														
 
															         # 6. check the positive sample's center in gt, [B, n, L]
														
 
															-        is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
														
 
															+        if self.sm_use:
														
 
															+            is_in_gts = check_points_inside_bboxes(
														
 
															+                anchor_centers, gt_bboxes, sm_use=True)
														
 
															+        else:
														
 
															+            is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
														
 
															         # select positive sample, [B, n, L]
														
 
															         mask_positive = is_in_topk * is_in_gts * pad_gt_mask
														
@@ -165,7 +172,10 @@ class ATSSAssigner(nn.Layer):
 
															         if mask_positive_sum.max() > 1:
														
 
															             mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
														
 
															                 [1, num_max_boxes, 1])
														
 
															-            is_max_iou = compute_max_iou_anchor(ious)
														
 
															+            if self.sm_use:
														
 
															+                is_max_iou = compute_max_iou_anchor(ious * mask_positive)
														
 
															+            else:
														
 
															+                is_max_iou = compute_max_iou_anchor(ious)
														
 
															             mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
														
 
															                                          mask_positive)
														
 
															             mask_positive_sum = mask_positive.sum(axis=-2)
														
@@ -212,4 +222,4 @@ class ATSSAssigner(nn.Layer):
 
															                                          paddle.zeros_like(gather_scores))
														
 
															             assigned_scores *= gather_scores.unsqueeze(-1)
														
 
															-        return assigned_labels, assigned_bboxes, assigned_scores
														
 
															+        return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
														
--- a/paddlers/models/ppdet/modeling/assigners/fcosr_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/fcosr_assigner.py
@@ -0,0 +1,227 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import numpy as np
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import register
														
 
															+from paddlers.models.ppdet.modeling.rbox_utils import box2corners, check_points_in_polys, paddle_gather
														
 
															+
														
 
															+__all__ = ['FCOSRAssigner']
														
 
															+
														
 
															+EPS = 1e-9
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class FCOSRAssigner(nn.Layer):
														
 
															+    """ FCOSR Assigner, refer to https://arxiv.org/abs/2111.10780 for details
														
 
															+
														
 
															+    1. compute normalized gaussian distribution score and refined gaussian distribution score
														
 
															+    2. refer to ellipse center sampling, sample points whose normalized gaussian distribution score is greater than threshold
														
 
															+    3. refer to multi-level sampling, assign ground truth to feature map which follows two conditions.
														
 
															+        i). first, the ratio between the short edge of the target and the stride of the feature map is less than 2.
														
 
															+        ii). second, the long edge of minimum bounding rectangle of the target is larger than the acceptance range of feature map
														
 
															+    4. refer to fuzzy sample label assignment, the points satisfying 2 and 3 will be assigned to the ground truth according to gaussian distribution score
														
 
															+    """
														
 
															+    __shared__ = ['num_classes']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 num_classes=80,
														
 
															+                 factor=12,
														
 
															+                 threshold=0.23,
														
 
															+                 boundary=[[-1, 128], [128, 320], [320, 10000]],
														
 
															+                 score_type='iou'):
														
 
															+        super(FCOSRAssigner, self).__init__()
														
 
															+        self.num_classes = num_classes
														
 
															+        self.factor = factor
														
 
															+        self.threshold = threshold
														
 
															+        self.boundary = [
														
 
															+            paddle.to_tensor(
														
 
															+                l, dtype=paddle.float32).reshape([1, 1, 2]) for l in boundary
														
 
															+        ]
														
 
															+        self.score_type = score_type
														
 
															+
														
 
															+    def get_gaussian_distribution_score(self, points, gt_rboxes, gt_polys):
														
 
															+        # projecting points to coordinate system defined by each rbox
														
 
															+        # [B, N, 4, 2] -> 4 * [B, N, 1, 2]
														
 
															+        a, b, c, d = gt_polys.split(4, axis=2)
														
 
															+        # [1, L, 2] -> [1, 1, L, 2]
														
 
															+        points = points.unsqueeze(0)
														
 
															+        ab = b - a
														
 
															+        ad = d - a
														
 
															+        # [B, N, 5] -> [B, N, 2], [B, N, 2], [B, N, 1]
														
 
															+        xy, wh, angle = gt_rboxes.split([2, 2, 1], axis=-1)
														
 
															+        # [B, N, 2] -> [B, N, 1, 2]
														
 
															+        xy = xy.unsqueeze(2)
														
 
															+        # vector of points to center [B, N, L, 2]
														
 
															+        vec = points - xy
														
 
															+        # <ab, vec> = |ab| * |vec| * cos(theta) [B, N, L]
														
 
															+        vec_dot_ab = paddle.sum(vec * ab, axis=-1)
														
 
															+        # <ad, vec> = |ad| * |vec| * cos(theta) [B, N, L]
														
 
															+        vec_dot_ad = paddle.sum(vec * ad, axis=-1)
														
 
															+        # norm_ab [B, N, L]
														
 
															+        norm_ab = paddle.sum(ab * ab, axis=-1).sqrt()
														
 
															+        # norm_ad [B, N, L]
														
 
															+        norm_ad = paddle.sum(ad * ad, axis=-1).sqrt()
														
 
															+        # min(h, w), [B, N, 1]
														
 
															+        min_edge = paddle.min(wh, axis=-1, keepdim=True)
														
 
															+        # delta_x, delta_y [B, N, L]
														
 
															+        delta_x = vec_dot_ab.pow(2) / (norm_ab.pow(3) * min_edge + EPS)
														
 
															+        delta_y = vec_dot_ad.pow(2) / (norm_ad.pow(3) * min_edge + EPS)
														
 
															+        # score [B, N, L]
														
 
															+        norm_score = paddle.exp(-0.5 * self.factor * (delta_x + delta_y))
														
 
															+
														
 
															+        # simplified calculation
														
 
															+        sigma = min_edge / self.factor
														
 
															+        refined_score = norm_score / (2 * np.pi * sigma + EPS)
														
 
															+        return norm_score, refined_score
														
 
															+
														
 
															+    def get_rotated_inside_mask(self, points, gt_polys, scores):
														
 
															+        inside_mask = check_points_in_polys(points, gt_polys)
														
 
															+        center_mask = scores >= self.threshold
														
 
															+        return (inside_mask & center_mask).cast(paddle.float32)
														
 
															+
														
 
															+    def get_inside_range_mask(self, points, gt_bboxes, gt_rboxes, stride_tensor,
														
 
															+                              regress_range):
														
 
															+        # [1, L, 2] -> [1, 1, L, 2]
														
 
															+        points = points.unsqueeze(0)
														
 
															+        # [B, n, 4] -> [B, n, 1, 4]
														
 
															+        x1y1, x2y2 = gt_bboxes.unsqueeze(2).split(2, axis=-1)
														
 
															+        # [B, n, L, 2]
														
 
															+        lt = points - x1y1
														
 
															+        rb = x2y2 - points
														
 
															+        # [B, n, L, 4]
														
 
															+        ltrb = paddle.concat([lt, rb], axis=-1)
														
 
															+        # [B, n, L, 4] -> [B, n, L]
														
 
															+        inside_mask = paddle.min(ltrb, axis=-1) > EPS
														
 
															+        # regress_range [1, L, 2] -> [1, 1, L, 2]
														
 
															+        regress_range = regress_range.unsqueeze(0)
														
 
															+        # stride_tensor [1, L, 1] -> [1, 1, L]
														
 
															+        stride_tensor = stride_tensor.transpose((0, 2, 1))
														
 
															+        # fcos range
														
 
															+        # [B, n, L, 4] -> [B, n, L]
														
 
															+        ltrb_max = paddle.max(ltrb, axis=-1)
														
 
															+        # [1, 1, L, 2] -> [1, 1, L]
														
 
															+        low, high = regress_range[..., 0], regress_range[..., 1]
														
 
															+        # [B, n, L]
														
 
															+        regress_mask = (ltrb_max >= low) & (ltrb_max <= high)
														
 
															+        # mask for rotated
														
 
															+        # [B, n, 1]
														
 
															+        min_edge = paddle.min(gt_rboxes[..., 2:4], axis=-1, keepdim=True)
														
 
															+        # [B, n , L]
														
 
															+        rotated_mask = ((min_edge / stride_tensor) < 2.0) & (ltrb_max > high)
														
 
															+        mask = inside_mask & (regress_mask | rotated_mask)
														
 
															+        return mask.cast(paddle.float32)
														
 
															+
														
 
															+    @paddle.no_grad()
														
 
															+    def forward(self,
														
 
															+                anchor_points,
														
 
															+                stride_tensor,
														
 
															+                num_anchors_list,
														
 
															+                gt_labels,
														
 
															+                gt_bboxes,
														
 
															+                gt_rboxes,
														
 
															+                pad_gt_mask,
														
 
															+                bg_index,
														
 
															+                pred_rboxes=None):
														
 
															+        r"""
														
 
															+
														
 
															+        Args:
														
 
															+            anchor_points (Tensor, float32): pre-defined anchor points, shape(1, L, 2),
														
 
															+                    "x, y" format
														
 
															+            stride_tensor (Tensor, float32): stride tensor, shape (1, L, 1)
														
 
															+            num_anchors_list (List): num of anchors in each level
														
 
															+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
														
 
															+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
														
 
															+            gt_rboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
														
 
															+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
														
 
															+            bg_index (int): background index
														
 
															+            pred_rboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 5)
														
 
															+        Returns:
														
 
															+            assigned_labels (Tensor): (B, L)
														
 
															+            assigned_rboxes (Tensor): (B, L, 5)
														
 
															+            assigned_scores (Tensor): (B, L, C), if pred_rboxes is not None, then output ious
														
 
															+        """
														
 
															+
														
 
															+        _, num_anchors, _ = anchor_points.shape
														
 
															+        batch_size, num_max_boxes, _ = gt_rboxes.shape
														
 
															+        if num_max_boxes == 0:
														
 
															+            assigned_labels = paddle.full(
														
 
															+                [batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
														
 
															+            assigned_rboxes = paddle.zeros([batch_size, num_anchors, 5])
														
 
															+            assigned_scores = paddle.zeros(
														
 
															+                [batch_size, num_anchors, self.num_classes])
														
 
															+            return assigned_labels, assigned_rboxes, assigned_scores
														
 
															+
														
 
															+        # get normalized gaussian distribution score and refined distribution score
														
 
															+        gt_polys = box2corners(gt_rboxes)
														
 
															+        score, refined_score = self.get_gaussian_distribution_score(
														
 
															+            anchor_points, gt_rboxes, gt_polys)
														
 
															+        inside_mask = self.get_rotated_inside_mask(anchor_points, gt_polys,
														
 
															+                                                   score)
														
 
															+        regress_ranges = []
														
 
															+        for num, bound in zip(num_anchors_list, self.boundary):
														
 
															+            regress_ranges.append(bound.tile((1, num, 1)))
														
 
															+        regress_ranges = paddle.concat(regress_ranges, axis=1)
														
 
															+        regress_mask = self.get_inside_range_mask(
														
 
															+            anchor_points, gt_bboxes, gt_rboxes, stride_tensor, regress_ranges)
														
 
															+        # [B, n, L]
														
 
															+        mask_positive = inside_mask * regress_mask * pad_gt_mask
														
 
															+        refined_score = refined_score * mask_positive - (1. - mask_positive)
														
 
															+
														
 
															+        argmax_refined_score = refined_score.argmax(axis=-2)
														
 
															+        max_refined_score = refined_score.max(axis=-2)
														
 
															+        assigned_gt_index = argmax_refined_score
														
 
															+
														
 
															+        # assigned target
														
 
															+        batch_ind = paddle.arange(
														
 
															+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
														
 
															+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
														
 
															+        assigned_labels = paddle.gather(
														
 
															+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
														
 
															+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
														
 
															+        assigned_labels = paddle.where(
														
 
															+            max_refined_score > 0, assigned_labels,
														
 
															+            paddle.full_like(assigned_labels, bg_index))
														
 
															+
														
 
															+        assigned_rboxes = paddle.gather(
														
 
															+            gt_rboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
														
 
															+        assigned_rboxes = assigned_rboxes.reshape([batch_size, num_anchors, 5])
														
 
															+
														
 
															+        assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
														
 
															+        ind = list(range(self.num_classes + 1))
														
 
															+        ind.remove(bg_index)
														
 
															+        assigned_scores = paddle.index_select(
														
 
															+            assigned_scores, paddle.to_tensor(ind), axis=-1)
														
 
															+
														
 
															+        if self.score_type == 'gaussian':
														
 
															+            selected_scores = paddle_gather(
														
 
															+                score, 1, argmax_refined_score.unsqueeze(-2)).squeeze(-2)
														
 
															+            assigned_scores = assigned_scores * selected_scores.unsqueeze(-1)
														
 
															+        elif self.score_type == 'iou':
														
 
															+            assert pred_rboxes is not None, 'If score type is iou, pred_rboxes should not be None'
														
 
															+            from ext_op import matched_rbox_iou
														
 
															+            b, l = pred_rboxes.shape[:2]
														
 
															+            iou_score = matched_rbox_iou(
														
 
															+                pred_rboxes.reshape((-1, 5)), assigned_rboxes.reshape(
														
 
															+                    (-1, 5))).reshape((b, l, 1))
														
 
															+            assigned_scores = assigned_scores * iou_score
														
 
															+
														
 
															+        return assigned_labels, assigned_rboxes, assigned_scores 
														
--- a/paddlers/models/ppdet/modeling/assigners/hungarian_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/hungarian_assigner.py
@@ -0,0 +1,316 @@
 
															+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+try:
														
 
															+    from scipy.optimize import linear_sum_assignment
														
 
															+except ImportError:
														
 
															+    linear_sum_assignment = None
														
 
															+
														
 
															+import paddle
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import register
														
 
															+
														
 
															+__all__ = ['PoseHungarianAssigner', 'PseudoSampler']
														
 
															+
														
 
															+
														
 
															+class AssignResult:
														
 
															+    """Stores assignments between predicted and truth boxes.
														
 
															+
														
 
															+    Attributes:
														
 
															+        num_gts (int): the number of truth boxes considered when computing this
														
 
															+            assignment
														
 
															+
														
 
															+        gt_inds (LongTensor): for each predicted box indicates the 1-based
														
 
															+            index of the assigned truth box. 0 means unassigned and -1 means
														
 
															+            ignore.
														
 
															+
														
 
															+        max_overlaps (FloatTensor): the iou between the predicted box and its
														
 
															+            assigned truth box.
														
 
															+
														
 
															+        labels (None | LongTensor): If specified, for each predicted box
														
 
															+            indicates the category label of the assigned truth box.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
														
 
															+        self.num_gts = num_gts
														
 
															+        self.gt_inds = gt_inds
														
 
															+        self.max_overlaps = max_overlaps
														
 
															+        self.labels = labels
														
 
															+        # Interface for possible user-defined properties
														
 
															+        self._extra_properties = {}
														
 
															+
														
 
															+    @property
														
 
															+    def num_preds(self):
														
 
															+        """int: the number of predictions in this assignment"""
														
 
															+        return len(self.gt_inds)
														
 
															+
														
 
															+    def set_extra_property(self, key, value):
														
 
															+        """Set user-defined new property."""
														
 
															+        assert key not in self.info
														
 
															+        self._extra_properties[key] = value
														
 
															+
														
 
															+    def get_extra_property(self, key):
														
 
															+        """Get user-defined property."""
														
 
															+        return self._extra_properties.get(key, None)
														
 
															+
														
 
															+    @property
														
 
															+    def info(self):
														
 
															+        """dict: a dictionary of info about the object"""
														
 
															+        basic_info = {
														
 
															+            'num_gts': self.num_gts,
														
 
															+            'num_preds': self.num_preds,
														
 
															+            'gt_inds': self.gt_inds,
														
 
															+            'max_overlaps': self.max_overlaps,
														
 
															+            'labels': self.labels,
														
 
															+        }
														
 
															+        basic_info.update(self._extra_properties)
														
 
															+        return basic_info
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class PoseHungarianAssigner:
														
 
															+    """Computes one-to-one matching between predictions and ground truth.
														
 
															+
														
 
															+    This class computes an assignment between the targets and the predictions
														
 
															+    based on the costs. The costs are weighted sum of three components:
														
 
															+    classification cost, regression L1 cost and regression oks cost. The
														
 
															+    targets don't include the no_object, so generally there are more
														
 
															+    predictions than targets. After the one-to-one matching, the un-matched
														
 
															+    are treated as backgrounds. Thus each query prediction will be assigned
														
 
															+    with `0` or a positive integer indicating the ground truth index:
														
 
															+
														
 
															+    - 0: negative sample, no assigned gt.
														
 
															+    - positive integer: positive sample, index (1-based) of assigned gt.
														
 
															+
														
 
															+    Args:
														
 
															+        cls_weight (int | float, optional): The scale factor for classification
														
 
															+            cost. Default 1.0.
														
 
															+        kpt_weight (int | float, optional): The scale factor for regression
														
 
															+            L1 cost. Default 1.0.
														
 
															+        oks_weight (int | float, optional): The scale factor for regression
														
 
															+            oks cost. Default 1.0.
														
 
															+    """
														
 
															+    __inject__ = ['cls_cost', 'kpt_cost', 'oks_cost']
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 cls_cost='ClassificationCost',
														
 
															+                 kpt_cost='KptL1Cost',
														
 
															+                 oks_cost='OksCost'):
														
 
															+        self.cls_cost = cls_cost
														
 
															+        self.kpt_cost = kpt_cost
														
 
															+        self.oks_cost = oks_cost
														
 
															+
														
 
															+    def assign(self,
														
 
															+               cls_pred,
														
 
															+               kpt_pred,
														
 
															+               gt_labels,
														
 
															+               gt_keypoints,
														
 
															+               gt_areas,
														
 
															+               img_meta,
														
 
															+               eps=1e-7):
														
 
															+        """Computes one-to-one matching based on the weighted costs.
														
 
															+
														
 
															+        This method assign each query prediction to a ground truth or
														
 
															+        background. The `assigned_gt_inds` with -1 means don't care,
														
 
															+        0 means negative sample, and positive number is the index (1-based)
														
 
															+        of assigned gt.
														
 
															+        The assignment is done in the following steps, the order matters.
														
 
															+
														
 
															+        1. assign every prediction to -1
														
 
															+        2. compute the weighted costs
														
 
															+        3. do Hungarian matching on CPU based on the costs
														
 
															+        4. assign all to 0 (background) first, then for each matched pair
														
 
															+           between predictions and gts, treat this prediction as foreground
														
 
															+           and assign the corresponding gt index (plus 1) to it.
														
 
															+
														
 
															+        Args:
														
 
															+            cls_pred (Tensor): Predicted classification logits, shape
														
 
															+                [num_query, num_class].
														
 
															+            kpt_pred (Tensor): Predicted keypoints with normalized coordinates
														
 
															+                (x_{i}, y_{i}), which are all in range [0, 1]. Shape
														
 
															+                [num_query, K*2].
														
 
															+            gt_labels (Tensor): Label of `gt_keypoints`, shape (num_gt,).
														
 
															+            gt_keypoints (Tensor): Ground truth keypoints with unnormalized
														
 
															+                coordinates [p^{1}_x, p^{1}_y, p^{1}_v, ..., \
														
 
															+                    p^{K}_x, p^{K}_y, p^{K}_v]. Shape [num_gt, K*3].
														
 
															+            gt_areas (Tensor): Ground truth mask areas, shape (num_gt,).
														
 
															+            img_meta (dict): Meta information for current image.
														
 
															+            eps (int | float, optional): A value added to the denominator for
														
 
															+                numerical stability. Default 1e-7.
														
 
															+
														
 
															+        Returns:
														
 
															+            :obj:`AssignResult`: The assigned result.
														
 
															+        """
														
 
															+        num_gts, num_kpts = gt_keypoints.shape[0], kpt_pred.shape[0]
														
 
															+        if not gt_keypoints.astype('bool').any():
														
 
															+            num_gts = 0
														
 
															+
														
 
															+        # 1. assign -1 by default
														
 
															+        assigned_gt_inds = paddle.full((num_kpts, ), -1, dtype="int64")
														
 
															+        assigned_labels = paddle.full((num_kpts, ), -1, dtype="int64")
														
 
															+        if num_gts == 0 or num_kpts == 0:
														
 
															+            # No ground truth or keypoints, return empty assignment
														
 
															+            if num_gts == 0:
														
 
															+                # No ground truth, assign all to background
														
 
															+                assigned_gt_inds[:] = 0
														
 
															+            return AssignResult(
														
 
															+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
														
 
															+        img_h, img_w, _ = img_meta['img_shape']
														
 
															+        factor = paddle.to_tensor(
														
 
															+            [img_w, img_h, img_w, img_h], dtype=gt_keypoints.dtype).reshape(
														
 
															+                (1, -1))
														
 
															+
														
 
															+        # 2. compute the weighted costs
														
 
															+        # classification cost
														
 
															+        cls_cost = self.cls_cost(cls_pred, gt_labels)
														
 
															+
														
 
															+        # keypoint regression L1 cost
														
 
															+        gt_keypoints_reshape = gt_keypoints.reshape((gt_keypoints.shape[0], -1,
														
 
															+                                                     3))
														
 
															+        valid_kpt_flag = gt_keypoints_reshape[..., -1]
														
 
															+        kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
														
 
															+                                                          2))
														
 
															+        normalize_gt_keypoints = gt_keypoints_reshape[
														
 
															+            ..., :2] / factor[:, :2].unsqueeze(0)
														
 
															+        kpt_cost = self.kpt_cost(kpt_pred_tmp, normalize_gt_keypoints,
														
 
															+                                 valid_kpt_flag)
														
 
															+        # keypoint OKS cost
														
 
															+        kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
														
 
															+                                                          2))
														
 
															+        kpt_pred_tmp = kpt_pred_tmp * factor[:, :2].unsqueeze(0)
														
 
															+        oks_cost = self.oks_cost(kpt_pred_tmp, gt_keypoints_reshape[..., :2],
														
 
															+                                 valid_kpt_flag, gt_areas)
														
 
															+        # weighted sum of above three costs
														
 
															+        cost = cls_cost + kpt_cost + oks_cost
														
 
															+
														
 
															+        # 3. do Hungarian matching on CPU using linear_sum_assignment
														
 
															+        cost = cost.detach().cpu()
														
 
															+        if linear_sum_assignment is None:
														
 
															+            raise ImportError('Please run "pip install scipy" '
														
 
															+                              'to install scipy first.')
														
 
															+        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
														
 
															+        matched_row_inds = paddle.to_tensor(matched_row_inds)
														
 
															+        matched_col_inds = paddle.to_tensor(matched_col_inds)
														
 
															+
														
 
															+        # 4. assign backgrounds and foregrounds
														
 
															+        # assign all indices to backgrounds first
														
 
															+        assigned_gt_inds[:] = 0
														
 
															+        # assign foregrounds based on matching results
														
 
															+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
														
 
															+        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds][
														
 
															+            ..., 0].astype("int64")
														
 
															+        return AssignResult(
														
 
															+            num_gts, assigned_gt_inds, None, labels=assigned_labels)
														
 
															+
														
 
															+
														
 
															+class SamplingResult:
														
 
															+    """Bbox sampling result.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
														
 
															+                 gt_flags):
														
 
															+        self.pos_inds = pos_inds
														
 
															+        self.neg_inds = neg_inds
														
 
															+        if pos_inds.size > 0:
														
 
															+            self.pos_bboxes = bboxes[pos_inds]
														
 
															+            self.neg_bboxes = bboxes[neg_inds]
														
 
															+            self.pos_is_gt = gt_flags[pos_inds]
														
 
															+
														
 
															+            self.num_gts = gt_bboxes.shape[0]
														
 
															+            self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
														
 
															+
														
 
															+            if gt_bboxes.numel() == 0:
														
 
															+                # hack for index error case
														
 
															+                assert self.pos_assigned_gt_inds.numel() == 0
														
 
															+                self.pos_gt_bboxes = paddle.zeros(
														
 
															+                    gt_bboxes.shape, dtype=gt_bboxes.dtype).reshape((-1, 4))
														
 
															+            else:
														
 
															+                if len(gt_bboxes.shape) < 2:
														
 
															+                    gt_bboxes = gt_bboxes.reshape((-1, 4))
														
 
															+
														
 
															+                self.pos_gt_bboxes = paddle.index_select(
														
 
															+                    gt_bboxes,
														
 
															+                    self.pos_assigned_gt_inds.astype('int64'),
														
 
															+                    axis=0)
														
 
															+
														
 
															+            if assign_result.labels is not None:
														
 
															+                self.pos_gt_labels = assign_result.labels[pos_inds]
														
 
															+            else:
														
 
															+                self.pos_gt_labels = None
														
 
															+
														
 
															+    @property
														
 
															+    def bboxes(self):
														
 
															+        """paddle.Tensor: concatenated positive and negative boxes"""
														
 
															+        return paddle.concat([self.pos_bboxes, self.neg_bboxes])
														
 
															+
														
 
															+    def __nice__(self):
														
 
															+        data = self.info.copy()
														
 
															+        data['pos_bboxes'] = data.pop('pos_bboxes').shape
														
 
															+        data['neg_bboxes'] = data.pop('neg_bboxes').shape
														
 
															+        parts = [f"'{k}': {v!r}" for k, v in sorted(data.items())]
														
 
															+        body = '    ' + ',\n    '.join(parts)
														
 
															+        return '{\n' + body + '\n}'
														
 
															+
														
 
															+    @property
														
 
															+    def info(self):
														
 
															+        """Returns a dictionary of info about the object."""
														
 
															+        return {
														
 
															+            'pos_inds': self.pos_inds,
														
 
															+            'neg_inds': self.neg_inds,
														
 
															+            'pos_bboxes': self.pos_bboxes,
														
 
															+            'neg_bboxes': self.neg_bboxes,
														
 
															+            'pos_is_gt': self.pos_is_gt,
														
 
															+            'num_gts': self.num_gts,
														
 
															+            'pos_assigned_gt_inds': self.pos_assigned_gt_inds,
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class PseudoSampler:
														
 
															+    """A pseudo sampler that does not do sampling actually."""
														
 
															+
														
 
															+    def __init__(self, **kwargs):
														
 
															+        pass
														
 
															+
														
 
															+    def _sample_pos(self, **kwargs):
														
 
															+        """Sample positive samples."""
														
 
															+        raise NotImplementedError
														
 
															+
														
 
															+    def _sample_neg(self, **kwargs):
														
 
															+        """Sample negative samples."""
														
 
															+        raise NotImplementedError
														
 
															+
														
 
															+    def sample(self, assign_result, bboxes, gt_bboxes, *args, **kwargs):
														
 
															+        """Directly returns the positive and negative indices  of samples.
														
 
															+
														
 
															+        Args:
														
 
															+            assign_result (:obj:`AssignResult`): Assigned results
														
 
															+            bboxes (paddle.Tensor): Bounding boxes
														
 
															+            gt_bboxes (paddle.Tensor): Ground truth boxes
														
 
															+
														
 
															+        Returns:
														
 
															+            :obj:`SamplingResult`: sampler results
														
 
															+        """
														
 
															+        pos_inds = paddle.nonzero(
														
 
															+            assign_result.gt_inds > 0, as_tuple=False).squeeze(-1)
														
 
															+        neg_inds = paddle.nonzero(
														
 
															+            assign_result.gt_inds == 0, as_tuple=False).squeeze(-1)
														
 
															+        gt_flags = paddle.zeros([bboxes.shape[0]], dtype='int32')
														
 
															+        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
														
 
															+                                         assign_result, gt_flags)
														
 
															+        return sampling_result
														
--- a/paddlers/models/ppdet/modeling/assigners/pose_utils.py
+++ b/paddlers/models/ppdet/modeling/assigners/pose_utils.py
@@ -0,0 +1,275 @@
 
															+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import numpy as np
														
 
															+import paddle
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import register
														
 
															+
														
 
															+__all__ = ['KptL1Cost', 'OksCost', 'ClassificationCost']
														
 
															+
														
 
															+
														
 
															+def masked_fill(x, mask, value):
														
 
															+    y = paddle.full(x.shape, value, x.dtype)
														
 
															+    return paddle.where(mask, y, x)
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class KptL1Cost(object):
														
 
															+    """KptL1Cost.
														
 
															+
														
 
															+    this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
														
 
															+
														
 
															+    Args:
														
 
															+        weight (int | float, optional): loss_weight.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, weight=1.0):
														
 
															+        self.weight = weight
														
 
															+
														
 
															+    def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag):
														
 
															+        """
														
 
															+        Args:
														
 
															+            kpt_pred (Tensor): Predicted keypoints with normalized coordinates
														
 
															+                (x_{i}, y_{i}), which are all in range [0, 1]. Shape
														
 
															+                [num_query, K, 2].
														
 
															+            gt_keypoints (Tensor): Ground truth keypoints with normalized
														
 
															+                coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
														
 
															+            valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
														
 
															+                Shape [num_gt, K].
														
 
															+
														
 
															+        Returns:
														
 
															+            paddle.Tensor: kpt_cost value with weight.
														
 
															+        """
														
 
															+        kpt_cost = []
														
 
															+        for i in range(len(gt_keypoints)):
														
 
															+            if gt_keypoints[i].size == 0:
														
 
															+                kpt_cost.append(kpt_pred.sum() * 0)
														
 
															+            kpt_pred_tmp = kpt_pred.clone()
														
 
															+            valid_flag = valid_kpt_flag[i] > 0
														
 
															+            valid_flag_expand = valid_flag.unsqueeze(0).unsqueeze(-1).expand_as(
														
 
															+                kpt_pred_tmp)
														
 
															+            if not valid_flag_expand.all():
														
 
															+                kpt_pred_tmp = masked_fill(kpt_pred_tmp, ~valid_flag_expand, 0)
														
 
															+            cost = F.pairwise_distance(
														
 
															+                kpt_pred_tmp.reshape((kpt_pred_tmp.shape[0], -1)),
														
 
															+                gt_keypoints[i].reshape((-1, )).unsqueeze(0),
														
 
															+                p=1,
														
 
															+                keepdim=True)
														
 
															+            avg_factor = paddle.clip(
														
 
															+                valid_flag.astype('float32').sum() * 2, 1.0)
														
 
															+            cost = cost / avg_factor
														
 
															+            kpt_cost.append(cost)
														
 
															+        kpt_cost = paddle.concat(kpt_cost, axis=1)
														
 
															+        return kpt_cost * self.weight
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class OksCost(object):
														
 
															+    """OksCost.
														
 
															+
														
 
															+    this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
														
 
															+
														
 
															+    Args:
														
 
															+        num_keypoints (int): number of keypoints
														
 
															+        weight (int | float, optional): loss_weight.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, num_keypoints=17, weight=1.0):
														
 
															+        self.weight = weight
														
 
															+        if num_keypoints == 17:
														
 
															+            self.sigmas = np.array(
														
 
															+                [
														
 
															+                    .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
														
 
															+                    1.07, .87, .87, .89, .89
														
 
															+                ],
														
 
															+                dtype=np.float32) / 10.0
														
 
															+        elif num_keypoints == 14:
														
 
															+            self.sigmas = np.array(
														
 
															+                [
														
 
															+                    .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89,
														
 
															+                    .89, .79, .79
														
 
															+                ],
														
 
															+                dtype=np.float32) / 10.0
														
 
															+        else:
														
 
															+            raise ValueError(f'Unsupported keypoints number {num_keypoints}')
														
 
															+
														
 
															+    def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag, gt_areas):
														
 
															+        """
														
 
															+        Args:
														
 
															+            kpt_pred (Tensor): Predicted keypoints with unnormalized
														
 
															+                coordinates (x_{i}, y_{i}). Shape [num_query, K, 2].
														
 
															+            gt_keypoints (Tensor): Ground truth keypoints with unnormalized
														
 
															+                coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
														
 
															+            valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
														
 
															+                Shape [num_gt, K].
														
 
															+            gt_areas (Tensor): Ground truth mask areas. Shape [num_gt,].
														
 
															+
														
 
															+        Returns:
														
 
															+            paddle.Tensor: oks_cost value with weight.
														
 
															+        """
														
 
															+        sigmas = paddle.to_tensor(self.sigmas)
														
 
															+        variances = (sigmas * 2)**2
														
 
															+
														
 
															+        oks_cost = []
														
 
															+        assert len(gt_keypoints) == len(gt_areas)
														
 
															+        for i in range(len(gt_keypoints)):
														
 
															+            if gt_keypoints[i].size == 0:
														
 
															+                oks_cost.append(kpt_pred.sum() * 0)
														
 
															+            squared_distance = \
														
 
															+                (kpt_pred[:, :, 0] - gt_keypoints[i, :, 0].unsqueeze(0)) ** 2 + \
														
 
															+                (kpt_pred[:, :, 1] - gt_keypoints[i, :, 1].unsqueeze(0)) ** 2
														
 
															+            vis_flag = (valid_kpt_flag[i] > 0).astype('int')
														
 
															+            vis_ind = vis_flag.nonzero(as_tuple=False)[:, 0]
														
 
															+            num_vis_kpt = vis_ind.shape[0]
														
 
															+            # assert num_vis_kpt > 0
														
 
															+            if num_vis_kpt == 0:
														
 
															+                oks_cost.append(paddle.zeros((squared_distance.shape[0], 1)))
														
 
															+                continue
														
 
															+            area = gt_areas[i]
														
 
															+
														
 
															+            squared_distance0 = squared_distance / (area * variances * 2)
														
 
															+            squared_distance0 = paddle.index_select(
														
 
															+                squared_distance0, vis_ind, axis=1)
														
 
															+            squared_distance1 = paddle.exp(-squared_distance0).sum(axis=1,
														
 
															+                                                                   keepdim=True)
														
 
															+            oks = squared_distance1 / num_vis_kpt
														
 
															+            # The 1 is a constant that doesn't change the matching, so omitted.
														
 
															+            oks_cost.append(-oks)
														
 
															+        oks_cost = paddle.concat(oks_cost, axis=1)
														
 
															+        return oks_cost * self.weight
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class ClassificationCost:
														
 
															+    """ClsSoftmaxCost.
														
 
															+
														
 
															+     Args:
														
 
															+         weight (int | float, optional): loss_weight
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, weight=1.):
														
 
															+        self.weight = weight
														
 
															+
														
 
															+    def __call__(self, cls_pred, gt_labels):
														
 
															+        """
														
 
															+        Args:
														
 
															+            cls_pred (Tensor): Predicted classification logits, shape
														
 
															+                (num_query, num_class).
														
 
															+            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
														
 
															+
														
 
															+        Returns:
														
 
															+            paddle.Tensor: cls_cost value with weight
														
 
															+        """
														
 
															+        # Following the official DETR repo, contrary to the loss that
														
 
															+        # NLL is used, we approximate it in 1 - cls_score[gt_label].
														
 
															+        # The 1 is a constant that doesn't change the matching,
														
 
															+        # so it can be omitted.
														
 
															+        cls_score = cls_pred.softmax(-1)
														
 
															+        cls_cost = -cls_score[:, gt_labels]
														
 
															+        return cls_cost * self.weight
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class FocalLossCost:
														
 
															+    """FocalLossCost.
														
 
															+
														
 
															+     Args:
														
 
															+         weight (int | float, optional): loss_weight
														
 
															+         alpha (int | float, optional): focal_loss alpha
														
 
															+         gamma (int | float, optional): focal_loss gamma
														
 
															+         eps (float, optional): default 1e-12
														
 
															+         binary_input (bool, optional): Whether the input is binary,
														
 
															+            default False.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 weight=1.,
														
 
															+                 alpha=0.25,
														
 
															+                 gamma=2,
														
 
															+                 eps=1e-12,
														
 
															+                 binary_input=False):
														
 
															+        self.weight = weight
														
 
															+        self.alpha = alpha
														
 
															+        self.gamma = gamma
														
 
															+        self.eps = eps
														
 
															+        self.binary_input = binary_input
														
 
															+
														
 
															+    def _focal_loss_cost(self, cls_pred, gt_labels):
														
 
															+        """
														
 
															+        Args:
														
 
															+            cls_pred (Tensor): Predicted classification logits, shape
														
 
															+                (num_query, num_class).
														
 
															+            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
														
 
															+
														
 
															+        Returns:
														
 
															+            paddle.Tensor: cls_cost value with weight
														
 
															+        """
														
 
															+        if gt_labels.size == 0:
														
 
															+            return cls_pred.sum() * 0
														
 
															+        cls_pred = F.sigmoid(cls_pred)
														
 
															+        neg_cost = -(1 - cls_pred + self.eps).log() * (
														
 
															+            1 - self.alpha) * cls_pred.pow(self.gamma)
														
 
															+        pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
														
 
															+            1 - cls_pred).pow(self.gamma)
														
 
															+
														
 
															+        cls_cost = paddle.index_select(
														
 
															+            pos_cost, gt_labels, axis=1) - paddle.index_select(
														
 
															+                neg_cost, gt_labels, axis=1)
														
 
															+        return cls_cost * self.weight
														
 
															+
														
 
															+    def _mask_focal_loss_cost(self, cls_pred, gt_labels):
														
 
															+        """
														
 
															+        Args:
														
 
															+            cls_pred (Tensor): Predicted classfication logits
														
 
															+                in shape (num_query, d1, ..., dn), dtype=paddle.float32.
														
 
															+            gt_labels (Tensor): Ground truth in shape (num_gt, d1, ..., dn),
														
 
															+                dtype=paddle.long. Labels should be binary.
														
 
															+
														
 
															+        Returns:
														
 
															+            Tensor: Focal cost matrix with weight in shape\
														
 
															+                (num_query, num_gt).
														
 
															+        """
														
 
															+        cls_pred = cls_pred.flatten(1)
														
 
															+        gt_labels = gt_labels.flatten(1).float()
														
 
															+        n = cls_pred.shape[1]
														
 
															+        cls_pred = F.sigmoid(cls_pred)
														
 
															+        neg_cost = -(1 - cls_pred + self.eps).log() * (
														
 
															+            1 - self.alpha) * cls_pred.pow(self.gamma)
														
 
															+        pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
														
 
															+            1 - cls_pred).pow(self.gamma)
														
 
															+
														
 
															+        cls_cost = paddle.einsum('nc,mc->nm', pos_cost, gt_labels) + \
														
 
															+            paddle.einsum('nc,mc->nm', neg_cost, (1 - gt_labels))
														
 
															+        return cls_cost / n * self.weight
														
 
															+
														
 
															+    def __call__(self, cls_pred, gt_labels):
														
 
															+        """
														
 
															+        Args:
														
 
															+            cls_pred (Tensor): Predicted classfication logits.
														
 
															+            gt_labels (Tensor)): Labels.
														
 
															+
														
 
															+        Returns:
														
 
															+            Tensor: Focal cost matrix with weight in shape\
														
 
															+                (num_query, num_gt).
														
 
															+        """
														
 
															+        if self.binary_input:
														
 
															+            return self._mask_focal_loss_cost(cls_pred, gt_labels)
														
 
															+        else:
														
 
															+            return self._focal_loss_cost(cls_pred, gt_labels)
														
--- a/paddlers/models/ppdet/modeling/assigners/rotated_task_aligned_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/rotated_task_aligned_assigner.py
@@ -0,0 +1,164 @@
 
															+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import register
														
 
															+from ..rbox_utils import rotated_iou_similarity, check_points_in_rotated_boxes
														
 
															+from .utils import gather_topk_anchors, compute_max_iou_anchor
														
 
															+
														
 
															+__all__ = ['RotatedTaskAlignedAssigner']
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class RotatedTaskAlignedAssigner(nn.Layer):
														
 
															+    """TOOD: Task-aligned One-stage Object Detection
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
														
 
															+        super(RotatedTaskAlignedAssigner, self).__init__()
														
 
															+        self.topk = topk
														
 
															+        self.alpha = alpha
														
 
															+        self.beta = beta
														
 
															+        self.eps = eps
														
 
															+
														
 
															+    @paddle.no_grad()
														
 
															+    def forward(self,
														
 
															+                pred_scores,
														
 
															+                pred_bboxes,
														
 
															+                anchor_points,
														
 
															+                num_anchors_list,
														
 
															+                gt_labels,
														
 
															+                gt_bboxes,
														
 
															+                pad_gt_mask,
														
 
															+                bg_index,
														
 
															+                gt_scores=None):
														
 
															+        r"""This code is based on
														
 
															+            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
														
 
															+
														
 
															+        The assignment is done in following steps
														
 
															+        1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
														
 
															+        2. select top-k bbox as candidates for each gt
														
 
															+        3. limit the positive sample's center in gt (because the anchor-free detector
														
 
															+           only can predict positive distance)
														
 
															+        4. if an anchor box is assigned to multiple gts, the one with the
														
 
															+           highest iou will be selected.
														
 
															+        Args:
														
 
															+            pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
														
 
															+            pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 5)
														
 
															+            anchor_points (Tensor, float32): pre-defined anchors, shape(1, L, 2), "cxcy" format
														
 
															+            num_anchors_list (List): num of anchors in each level, shape(L)
														
 
															+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
														
 
															+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
														
 
															+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
														
 
															+            bg_index (int): background index
														
 
															+            gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
														
 
															+        Returns:
														
 
															+            assigned_labels (Tensor): (B, L)
														
 
															+            assigned_bboxes (Tensor): (B, L, 5)
														
 
															+            assigned_scores (Tensor): (B, L, C)
														
 
															+        """
														
 
															+        assert pred_scores.ndim == pred_bboxes.ndim
														
 
															+        assert gt_labels.ndim == gt_bboxes.ndim and \
														
 
															+               gt_bboxes.ndim == 3
														
 
															+
														
 
															+        batch_size, num_anchors, num_classes = pred_scores.shape
														
 
															+        _, num_max_boxes, _ = gt_bboxes.shape
														
 
															+
														
 
															+        # negative batch
														
 
															+        if num_max_boxes == 0:
														
 
															+            assigned_labels = paddle.full(
														
 
															+                [batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
														
 
															+            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 5])
														
 
															+            assigned_scores = paddle.zeros(
														
 
															+                [batch_size, num_anchors, num_classes])
														
 
															+            return assigned_labels, assigned_bboxes, assigned_scores
														
 
															+
														
 
															+        # compute iou between gt and pred bbox, [B, n, L]
														
 
															+        ious = rotated_iou_similarity(gt_bboxes, pred_bboxes)
														
 
															+        ious = paddle.where(ious > 1 + self.eps, paddle.zeros_like(ious), ious)
														
 
															+        ious.stop_gradient = True
														
 
															+        # gather pred bboxes class score
														
 
															+        pred_scores = pred_scores.transpose([0, 2, 1])
														
 
															+        batch_ind = paddle.arange(
														
 
															+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
														
 
															+        gt_labels_ind = paddle.stack(
														
 
															+            [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
														
 
															+            axis=-1)
														
 
															+        bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
														
 
															+        # compute alignment metrics, [B, n, L]
														
 
															+        alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
														
 
															+            self.beta)
														
 
															+
														
 
															+        # check the positive sample's center in gt, [B, n, L]
														
 
															+        is_in_gts = check_points_in_rotated_boxes(anchor_points, gt_bboxes)
														
 
															+
														
 
															+        # select topk largest alignment metrics pred bbox as candidates
														
 
															+        # for each gt, [B, n, L]
														
 
															+        is_in_topk = gather_topk_anchors(
														
 
															+            alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
														
 
															+
														
 
															+        # select positive sample, [B, n, L]
														
 
															+        mask_positive = is_in_topk * is_in_gts * pad_gt_mask
														
 
															+
														
 
															+        # if an anchor box is assigned to multiple gts,
														
 
															+        # the one with the highest iou will be selected, [B, n, L]
														
 
															+        mask_positive_sum = mask_positive.sum(axis=-2)
														
 
															+        if mask_positive_sum.max() > 1:
														
 
															+            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
														
 
															+                [1, num_max_boxes, 1])
														
 
															+            is_max_iou = compute_max_iou_anchor(ious)
														
 
															+            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
														
 
															+                                         mask_positive)
														
 
															+            mask_positive_sum = mask_positive.sum(axis=-2)
														
 
															+        assigned_gt_index = mask_positive.argmax(axis=-2)
														
 
															+
														
 
															+        # assigned target
														
 
															+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
														
 
															+        assigned_labels = paddle.gather(
														
 
															+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
														
 
															+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
														
 
															+        assigned_labels = paddle.where(
														
 
															+            mask_positive_sum > 0, assigned_labels,
														
 
															+            paddle.full_like(assigned_labels, bg_index))
														
 
															+
														
 
															+        assigned_bboxes = paddle.gather(
														
 
															+            gt_bboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
														
 
															+        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 5])
														
 
															+
														
 
															+        assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
														
 
															+        ind = list(range(num_classes + 1))
														
 
															+        ind.remove(bg_index)
														
 
															+        assigned_scores = paddle.index_select(
														
 
															+            assigned_scores, paddle.to_tensor(ind), axis=-1)
														
 
															+        # rescale alignment metrics
														
 
															+        alignment_metrics *= mask_positive
														
 
															+        max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
														
 
															+        max_ious_per_instance = (ious * mask_positive).max(axis=-1,
														
 
															+                                                           keepdim=True)
														
 
															+        alignment_metrics = alignment_metrics / (
														
 
															+            max_metrics_per_instance + self.eps) * max_ious_per_instance
														
 
															+        alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
														
 
															+        assigned_scores = assigned_scores * alignment_metrics
														
 
															+
														
 
															+        assigned_bboxes.stop_gradient = True
														
 
															+        assigned_scores.stop_gradient = True
														
 
															+        assigned_labels.stop_gradient = True
														
 
															+        return assigned_labels, assigned_bboxes, assigned_scores
														
--- a/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
@@ -236,7 +236,7 @@ class SimOTAAssigner(object):
 
															         )] = match_fg_mask_inmatrix
														
 
															         assigned_gt_inds[match_fg_mask_inall.astype(
														
 
															-            np.bool)] = match_gt_inds_to_fg + 1
														
 
															+            np.bool_)] = match_gt_inds_to_fg + 1
														
 
															         pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds \
														
 
															             = self.get_sample(assigned_gt_inds, gt_bboxes.numpy())
														
--- a/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
@@ -28,17 +28,47 @@ from .utils import (gather_topk_anchors, check_points_inside_bboxes,
 
															 __all__ = ['TaskAlignedAssigner']
														
 
															+def is_close_gt(anchor, gt, stride_lst, max_dist=2.0, alpha=2.):
														
 
															+    """Calculate distance ratio of box1 and box2 in batch for larger stride
														
 
															+        anchors dist/stride to promote the survive of large distance match
														
 
															+    Args:
														
 
															+        anchor (Tensor): box with the shape [L, 2]
														
 
															+        gt (Tensor): box with the shape [N, M2, 4]
														
 
															+    Return:
														
 
															+        dist (Tensor): dist ratio between box1 and box2 with the shape [N, M1, M2]
														
 
															+    """
														
 
															+    center1 = anchor.unsqueeze(0)
														
 
															+    center2 = (gt[..., :2] + gt[..., -2:]) / 2.
														
 
															+    center1 = center1.unsqueeze(1)  # [N, M1, 2] -> [N, 1, M1, 2]
														
 
															+    center2 = center2.unsqueeze(2)  # [N, M2, 2] -> [N, M2, 1, 2]
														
 
															+
														
 
															+    stride = paddle.concat([
														
 
															+        paddle.full([x], 32 / pow(2, idx)) for idx, x in enumerate(stride_lst)
														
 
															+    ]).unsqueeze(0).unsqueeze(0)
														
 
															+    dist = paddle.linalg.norm(center1 - center2, p=2, axis=-1) / stride
														
 
															+    dist_ratio = dist
														
 
															+    dist_ratio[dist < max_dist] = 1.
														
 
															+    dist_ratio[dist >= max_dist] = 0.
														
 
															+    return dist_ratio
														
 
															+
														
 
															+
														
 
															 @register
														
 
															 class TaskAlignedAssigner(nn.Layer):
														
 
															     """TOOD: Task-aligned One-stage Object Detection
														
 
															     """
														
 
															-    def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
														
 
															+    def __init__(self,
														
 
															+                 topk=13,
														
 
															+                 alpha=1.0,
														
 
															+                 beta=6.0,
														
 
															+                 eps=1e-9,
														
 
															+                 is_close_gt=False):
														
 
															         super(TaskAlignedAssigner, self).__init__()
														
 
															         self.topk = topk
														
 
															         self.alpha = alpha
														
 
															         self.beta = beta
														
 
															         self.eps = eps
														
 
															+        self.is_close_gt = is_close_gt
														
 
															     @paddle.no_grad()
														
 
															     def forward(self,
														
@@ -90,7 +120,8 @@ class TaskAlignedAssigner(nn.Layer):
 
															             assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
														
 
															             assigned_scores = paddle.zeros(
														
 
															                 [batch_size, num_anchors, num_classes])
														
 
															-            return assigned_labels, assigned_bboxes, assigned_scores
														
 
															+            mask_positive = paddle.zeros([batch_size, 1, num_anchors])
														
 
															+            return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
														
 
															         # compute iou between gt and pred bbox, [B, n, L]
														
 
															         ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
														
@@ -107,7 +138,10 @@ class TaskAlignedAssigner(nn.Layer):
 
															             self.beta)
														
 
															         # check the positive sample's center in gt, [B, n, L]
														
 
															-        is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)
														
 
															+        if self.is_close_gt:
														
 
															+            is_in_gts = is_close_gt(anchor_points, gt_bboxes, num_anchors_list)
														
 
															+        else:
														
 
															+            is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)
														
 
															         # select topk largest alignment metrics pred bbox as candidates
														
 
															         # for each gt, [B, n, L]
														
@@ -157,4 +191,4 @@ class TaskAlignedAssigner(nn.Layer):
 
															         alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
														
 
															         assigned_scores = assigned_scores * alignment_metrics
														
 
															-        return assigned_labels, assigned_bboxes, assigned_scores
														
 
															+        return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
														
--- a/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner_cr.py
+++ b/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner_cr.py
@@ -0,0 +1,182 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+from __future__ import absolute_import
														
 
															+from __future__ import division
														
 
															+from __future__ import print_function
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import register
														
 
															+from ..bbox_utils import batch_iou_similarity
														
 
															+from .utils import (gather_topk_anchors, check_points_inside_bboxes,
														
 
															+                    compute_max_iou_anchor)
														
 
															+
														
 
															+__all__ = ['TaskAlignedAssigner_CR']
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class TaskAlignedAssigner_CR(nn.Layer):
														
 
															+    """TOOD: Task-aligned One-stage Object Detection with Center R
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 topk=13,
														
 
															+                 alpha=1.0,
														
 
															+                 beta=6.0,
														
 
															+                 center_radius=None,
														
 
															+                 eps=1e-9):
														
 
															+        super(TaskAlignedAssigner_CR, self).__init__()
														
 
															+        self.topk = topk
														
 
															+        self.alpha = alpha
														
 
															+        self.beta = beta
														
 
															+        self.center_radius = center_radius
														
 
															+        self.eps = eps
														
 
															+
														
 
															+    @paddle.no_grad()
														
 
															+    def forward(self,
														
 
															+                pred_scores,
														
 
															+                pred_bboxes,
														
 
															+                anchor_points,
														
 
															+                stride_tensor,
														
 
															+                gt_labels,
														
 
															+                gt_bboxes,
														
 
															+                pad_gt_mask,
														
 
															+                bg_index,
														
 
															+                gt_scores=None):
														
 
															+        r"""This code is based on
														
 
															+            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
														
 
															+
														
 
															+        The assignment is done in following steps
														
 
															+        1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
														
 
															+        2. select top-k bbox as candidates for each gt
														
 
															+        3. limit the positive sample's center in gt (because the anchor-free detector
														
 
															+           only can predict positive distance)
														
 
															+        4. if an anchor box is assigned to multiple gts, the one with the
														
 
															+           highest iou will be selected.
														
 
															+        Args:
														
 
															+            pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
														
 
															+            pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
														
 
															+            anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
														
 
															+            stride_tensor (Tensor, float32): stride of feature map, shape(L, 1)
														
 
															+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
														
 
															+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
														
 
															+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
														
 
															+            bg_index (int): background index
														
 
															+            gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
														
 
															+        Returns:
														
 
															+            assigned_labels (Tensor): (B, L)
														
 
															+            assigned_bboxes (Tensor): (B, L, 4)
														
 
															+            assigned_scores (Tensor): (B, L, C)
														
 
															+        """
														
 
															+        assert pred_scores.ndim == pred_bboxes.ndim
														
 
															+        assert gt_labels.ndim == gt_bboxes.ndim and \
														
 
															+               gt_bboxes.ndim == 3
														
 
															+
														
 
															+        batch_size, num_anchors, num_classes = pred_scores.shape
														
 
															+        _, num_max_boxes, _ = gt_bboxes.shape
														
 
															+
														
 
															+        # negative batch
														
 
															+        if num_max_boxes == 0:
														
 
															+            assigned_labels = paddle.full(
														
 
															+                [batch_size, num_anchors], bg_index, dtype='int32')
														
 
															+            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
														
 
															+            assigned_scores = paddle.zeros(
														
 
															+                [batch_size, num_anchors, num_classes])
														
 
															+            mask_positive = paddle.zeros([batch_size, 1, num_anchors])
														
 
															+            return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
														
 
															+
														
 
															+        # compute iou between gt and pred bbox, [B, n, L]
														
 
															+        ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
														
 
															+        # gather pred bboxes class score
														
 
															+        pred_scores = pred_scores.transpose([0, 2, 1])
														
 
															+        batch_ind = paddle.arange(
														
 
															+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
														
 
															+        gt_labels_ind = paddle.stack(
														
 
															+            [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
														
 
															+            axis=-1)
														
 
															+        bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
														
 
															+        # compute alignment metrics, [B, n, L]
														
 
															+        alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
														
 
															+            self.beta) * pad_gt_mask
														
 
															+
														
 
															+        # select positive sample, [B, n, L]
														
 
															+        if self.center_radius is None:
														
 
															+            # check the positive sample's center in gt, [B, n, L]
														
 
															+            is_in_gts = check_points_inside_bboxes(
														
 
															+                anchor_points, gt_bboxes, sm_use=True)
														
 
															+            # select topk largest alignment metrics pred bbox as candidates
														
 
															+            # for each gt, [B, n, L]
														
 
															+            mask_positive = gather_topk_anchors(
														
 
															+                alignment_metrics, self.topk, topk_mask=pad_gt_mask) * is_in_gts
														
 
															+        else:
														
 
															+            is_in_gts, is_in_center = check_points_inside_bboxes(
														
 
															+                anchor_points,
														
 
															+                gt_bboxes,
														
 
															+                stride_tensor * self.center_radius,
														
 
															+                sm_use=True)
														
 
															+            is_in_gts *= pad_gt_mask
														
 
															+            is_in_center *= pad_gt_mask
														
 
															+            candidate_metrics = paddle.where(
														
 
															+                is_in_gts.sum(-1, keepdim=True) == 0,
														
 
															+                alignment_metrics + is_in_center,
														
 
															+                alignment_metrics)
														
 
															+            mask_positive = gather_topk_anchors(
														
 
															+                candidate_metrics, self.topk,
														
 
															+                topk_mask=pad_gt_mask) * paddle.cast((is_in_center > 0) |
														
 
															+                                                     (is_in_gts > 0), 'float32')
														
 
															+
														
 
															+        # if an anchor box is assigned to multiple gts,
														
 
															+        # the one with the highest iou will be selected, [B, n, L]
														
 
															+        mask_positive_sum = mask_positive.sum(axis=-2)
														
 
															+        if mask_positive_sum.max() > 1:
														
 
															+            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
														
 
															+                [1, num_max_boxes, 1])
														
 
															+            is_max_iou = compute_max_iou_anchor(ious * mask_positive)
														
 
															+            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
														
 
															+                                         mask_positive)
														
 
															+            mask_positive_sum = mask_positive.sum(axis=-2)
														
 
															+        assigned_gt_index = mask_positive.argmax(axis=-2)
														
 
															+
														
 
															+        # assigned target
														
 
															+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
														
 
															+        assigned_labels = paddle.gather(
														
 
															+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
														
 
															+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
														
 
															+        assigned_labels = paddle.where(
														
 
															+            mask_positive_sum > 0, assigned_labels,
														
 
															+            paddle.full_like(assigned_labels, bg_index))
														
 
															+
														
 
															+        assigned_bboxes = paddle.gather(
														
 
															+            gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
														
 
															+        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
														
 
															+
														
 
															+        assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
														
 
															+        ind = list(range(num_classes + 1))
														
 
															+        ind.remove(bg_index)
														
 
															+        assigned_scores = paddle.index_select(
														
 
															+            assigned_scores, paddle.to_tensor(ind), axis=-1)
														
 
															+        # rescale alignment metrics
														
 
															+        alignment_metrics *= mask_positive
														
 
															+        max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
														
 
															+        max_ious_per_instance = (ious * mask_positive).max(axis=-1,
														
 
															+                                                           keepdim=True)
														
 
															+        alignment_metrics = alignment_metrics / (
														
 
															+            max_metrics_per_instance + self.eps) * max_ious_per_instance
														
 
															+        alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
														
 
															+        assigned_scores = assigned_scores * alignment_metrics
														
 
															+
														
 
															+        return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
														
--- a/paddlers/models/ppdet/modeling/assigners/uniform_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/uniform_assigner.py
@@ -0,0 +1,93 @@
 
															+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+from paddlers.models.ppdet.core.workspace import register
														
 
															+
														
 
															+from paddlers.models.ppdet.modeling.bbox_utils import batch_bbox_overlaps
														
 
															+from paddlers.models.ppdet.modeling.transformers import bbox_xyxy_to_cxcywh
														
 
															+
														
 
															+__all__ = ['UniformAssigner']
														
 
															+
														
 
															+
														
 
															+def batch_p_dist(x, y, p=2):
														
 
															+    """
														
 
															+    calculate pairwise p_dist, the first index of x and y are batch
														
 
															+    return [x.shape[0], y.shape[0]]
														
 
															+    """
														
 
															+    x = x.unsqueeze(1)
														
 
															+    diff = x - y
														
 
															+    return paddle.norm(diff, p=p, axis=list(range(2, diff.dim())))
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class UniformAssigner(nn.Layer):
														
 
															+    def __init__(self, pos_ignore_thr, neg_ignore_thr, match_times=4):
														
 
															+        super(UniformAssigner, self).__init__()
														
 
															+        self.pos_ignore_thr = pos_ignore_thr
														
 
															+        self.neg_ignore_thr = neg_ignore_thr
														
 
															+        self.match_times = match_times
														
 
															+
														
 
															+    def forward(self, bbox_pred, anchor, gt_bboxes, gt_labels=None):
														
 
															+        num_bboxes = bbox_pred.shape[0]
														
 
															+        num_gts = gt_bboxes.shape[0]
														
 
															+        match_labels = paddle.full([num_bboxes], -1, dtype=paddle.int32)
														
 
															+
														
 
															+        pred_ious = batch_bbox_overlaps(bbox_pred, gt_bboxes)
														
 
															+        pred_max_iou = pred_ious.max(axis=1)
														
 
															+        neg_ignore = pred_max_iou > self.neg_ignore_thr
														
 
															+        # exclude potential ignored neg samples first, deal with pos samples later
														
 
															+        #match_labels: -2(ignore), -1(neg) or >=0(pos_inds)
														
 
															+        match_labels = paddle.where(neg_ignore,
														
 
															+                                    paddle.full_like(match_labels, -2),
														
 
															+                                    match_labels)
														
 
															+
														
 
															+        bbox_pred_c = bbox_xyxy_to_cxcywh(bbox_pred)
														
 
															+        anchor_c = bbox_xyxy_to_cxcywh(anchor)
														
 
															+        gt_bboxes_c = bbox_xyxy_to_cxcywh(gt_bboxes)
														
 
															+        bbox_pred_dist = batch_p_dist(bbox_pred_c, gt_bboxes_c, p=1)
														
 
															+        anchor_dist = batch_p_dist(anchor_c, gt_bboxes_c, p=1)
														
 
															+
														
 
															+        top_pred = bbox_pred_dist.topk(
														
 
															+            k=self.match_times, axis=0, largest=False)[1]
														
 
															+        top_anchor = anchor_dist.topk(
														
 
															+            k=self.match_times, axis=0, largest=False)[1]
														
 
															+
														
 
															+        tar_pred = paddle.arange(num_gts).expand([self.match_times, num_gts])
														
 
															+        tar_anchor = paddle.arange(num_gts).expand([self.match_times, num_gts])
														
 
															+        pos_places = paddle.concat([top_pred, top_anchor]).reshape([-1])
														
 
															+        pos_inds = paddle.concat([tar_pred, tar_anchor]).reshape([-1])
														
 
															+
														
 
															+        pos_anchor = anchor[pos_places]
														
 
															+        pos_tar_bbox = gt_bboxes[pos_inds]
														
 
															+        pos_ious = batch_bbox_overlaps(
														
 
															+            pos_anchor, pos_tar_bbox, is_aligned=True)
														
 
															+        pos_ignore = pos_ious < self.pos_ignore_thr
														
 
															+        pos_inds = paddle.where(pos_ignore,
														
 
															+                                paddle.full_like(pos_inds, -2), pos_inds)
														
 
															+        match_labels[pos_places] = pos_inds
														
 
															+        match_labels.stop_gradient = True
														
 
															+        pos_keep = ~pos_ignore
														
 
															+
														
 
															+        if pos_keep.sum() > 0:
														
 
															+            pos_places_keep = pos_places[pos_keep]
														
 
															+            pos_bbox_pred = bbox_pred[pos_places_keep].reshape([-1, 4])
														
 
															+            pos_bbox_tar = pos_tar_bbox[pos_keep].reshape([-1, 4]).detach()
														
 
															+        else:
														
 
															+            pos_bbox_pred = None
														
 
															+            pos_bbox_tar = None
														
 
															+
														
 
															+        return match_labels, pos_bbox_pred, pos_bbox_tar
														
--- a/paddlers/models/ppdet/modeling/assigners/utils.py
+++ b/paddlers/models/ppdet/modeling/assigners/utils.py
@@ -108,7 +108,8 @@ def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
 
															 def check_points_inside_bboxes(points,
														
 
															                                bboxes,
														
 
															                                center_radius_tensor=None,
														
 
															-                               eps=1e-9):
														
 
															+                               eps=1e-9,
														
 
															+                               sm_use=False):
														
 
															     r"""
														
 
															     Args:
														
 
															         points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
														
@@ -139,8 +140,12 @@ def check_points_inside_bboxes(points,
 
															         b = (cy + center_radius_tensor) - y
														
 
															         delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1)
														
 
															         is_in_center = (delta_ltrb_c.min(axis=-1) > eps)
														
 
															-        return (paddle.logical_and(is_in_bboxes, is_in_center),
														
 
															-                paddle.logical_or(is_in_bboxes, is_in_center))
														
 
															+        if sm_use:
														
 
															+            return is_in_bboxes.astype(bboxes.dtype), is_in_center.astype(
														
 
															+                bboxes.dtype)
														
 
															+        else:
														
 
															+            return (paddle.logical_and(is_in_bboxes, is_in_center),
														
 
															+                    paddle.logical_or(is_in_bboxes, is_in_center))
														
 
															     return is_in_bboxes.astype(bboxes.dtype)
														
--- a/paddlers/models/ppdet/modeling/backbones/__init__.py
+++ b/paddlers/models/ppdet/modeling/backbones/__init__.py
@@ -34,6 +34,7 @@ from . import csp_darknet
 
															 from . import convnext
														
 
															 from . import vision_transformer
														
 
															 from . import mobileone
														
 
															+from . import trans_encoder
														
 
															 from .vgg import *
														
 
															 from .resnet import *
														
@@ -58,3 +59,4 @@ from .convnext import *
 
															 from .vision_transformer import *
														
 
															 from .vision_transformer import *
														
 
															 from .mobileone import *
														
 
															+from .trans_encoder import *
														
--- a/paddlers/models/ppdet/modeling/backbones/dla.py
+++ b/paddlers/models/ppdet/modeling/backbones/dla.py
@@ -19,7 +19,7 @@ from paddlers.models.ppdet.core.workspace import register, serializable
 
															 from paddlers.models.ppdet.modeling.layers import ConvNormLayer
														
 
															 from ..shape_spec import ShapeSpec
														
 
															-DLA_cfg = {34: ([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512])}
														
 
															+DLA_cfg = {34: ([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512]), }
														
 
															 class BasicBlock(nn.Layer):
														
@@ -157,17 +157,25 @@ class DLA(nn.Layer):
 
															     DLA, see https://arxiv.org/pdf/1707.06484.pdf
														
 
															     Args:
														
 
															-        depth (int): DLA depth, should be 34.
														
 
															+        depth (int): DLA depth, only support 34 now.
														
 
															         residual_root (bool): whether use a reidual layer in the root block
														
 
															-
														
 
															+        pre_img (bool): add pre_img, only used in CenterTrack
														
 
															+        pre_hm (bool): add pre_hm, only used in CenterTrack
														
 
															     """
														
 
															-    def __init__(self, depth=34, residual_root=False):
														
 
															+    def __init__(self,
														
 
															+                 depth=34,
														
 
															+                 residual_root=False,
														
 
															+                 pre_img=False,
														
 
															+                 pre_hm=False):
														
 
															         super(DLA, self).__init__()
														
 
															-        levels, channels = DLA_cfg[depth]
														
 
															+        assert depth == 34, 'Only support DLA with depth of 34 now.'
														
 
															         if depth == 34:
														
 
															             block = BasicBlock
														
 
															+        levels, channels = DLA_cfg[depth]
														
 
															         self.channels = channels
														
 
															+        self.num_levels = len(levels)
														
 
															+
														
 
															         self.base_layer = nn.Sequential(
														
 
															             ConvNormLayer(
														
 
															                 3,
														
@@ -213,6 +221,29 @@ class DLA(nn.Layer):
 
															             level_root=True,
														
 
															             root_residual=residual_root)
														
 
															+        if pre_img:
														
 
															+            self.pre_img_layer = nn.Sequential(
														
 
															+                ConvNormLayer(
														
 
															+                    3,
														
 
															+                    channels[0],
														
 
															+                    filter_size=7,
														
 
															+                    stride=1,
														
 
															+                    bias_on=False,
														
 
															+                    norm_decay=None),
														
 
															+                nn.ReLU())
														
 
															+        if pre_hm:
														
 
															+            self.pre_hm_layer = nn.Sequential(
														
 
															+                ConvNormLayer(
														
 
															+                    1,
														
 
															+                    channels[0],
														
 
															+                    filter_size=7,
														
 
															+                    stride=1,
														
 
															+                    bias_on=False,
														
 
															+                    norm_decay=None),
														
 
															+                nn.ReLU())
														
 
															+        self.pre_img = pre_img
														
 
															+        self.pre_hm = pre_hm
														
 
															+
														
 
															     def _make_conv_level(self, ch_in, ch_out, conv_num, stride=1):
														
 
															         modules = []
														
 
															         for i in range(conv_num):
														
@@ -230,13 +261,22 @@ class DLA(nn.Layer):
 
															     @property
														
 
															     def out_shape(self):
														
 
															-        return [ShapeSpec(channels=self.channels[i]) for i in range(6)]
														
 
															+        return [
														
 
															+            ShapeSpec(channels=self.channels[i]) for i in range(self.num_levels)
														
 
															+        ]
														
 
															     def forward(self, inputs):
														
 
															         outs = []
														
 
															-        im = inputs['image']
														
 
															-        feats = self.base_layer(im)
														
 
															-        for i in range(6):
														
 
															+        feats = self.base_layer(inputs['image'])
														
 
															+
														
 
															+        if self.pre_img and 'pre_image' in inputs and inputs[
														
 
															+                'pre_image'] is not None:
														
 
															+            feats = feats + self.pre_img_layer(inputs['pre_image'])
														
 
															+
														
 
															+        if self.pre_hm and 'pre_hm' in inputs and inputs['pre_hm'] is not None:
														
 
															+            feats = feats + self.pre_hm_layer(inputs['pre_hm'])
														
 
															+
														
 
															+        for i in range(self.num_levels):
														
 
															             feats = getattr(self, 'level{}'.format(i))(feats)
														
 
															             outs.append(feats)
														
--- a/paddlers/models/ppdet/modeling/backbones/hrnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/hrnet.py
@@ -37,6 +37,7 @@ class ConvNormLayer(nn.Layer):
 
															                  norm_type='bn',
														
 
															                  norm_groups=32,
														
 
															                  use_dcn=False,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=False,
														
 
															                  act=None,
														
@@ -66,6 +67,7 @@ class ConvNormLayer(nn.Layer):
 
															         if norm_type in ['bn', 'sync_bn']:
														
 
															             self.norm = nn.BatchNorm2D(
														
 
															                 ch_out,
														
 
															+                momentum=norm_momentum,
														
 
															                 weight_attr=param_attr,
														
 
															                 bias_attr=bias_attr,
														
 
															                 use_global_stats=global_stats)
														
@@ -93,6 +95,7 @@ class Layer1(nn.Layer):
 
															     def __init__(self,
														
 
															                  num_channels,
														
 
															                  has_se=False,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=True,
														
 
															                  name=None):
														
@@ -109,6 +112,7 @@ class Layer1(nn.Layer):
 
															                     has_se=has_se,
														
 
															                     stride=1,
														
 
															                     downsample=True if i == 0 else False,
														
 
															+                    norm_momentum=norm_momentum,
														
 
															                     norm_decay=norm_decay,
														
 
															                     freeze_norm=freeze_norm,
														
 
															                     name=name + '_' + str(i + 1)))
														
@@ -125,6 +129,7 @@ class TransitionLayer(nn.Layer):
 
															     def __init__(self,
														
 
															                  in_channels,
														
 
															                  out_channels,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=True,
														
 
															                  name=None):
														
@@ -144,6 +149,7 @@ class TransitionLayer(nn.Layer):
 
															                             ch_in=in_channels[i],
														
 
															                             ch_out=out_channels[i],
														
 
															                             filter_size=3,
														
 
															+                            norm_momentum=norm_momentum,
														
 
															                             norm_decay=norm_decay,
														
 
															                             freeze_norm=freeze_norm,
														
 
															                             act='relu',
														
@@ -156,6 +162,7 @@ class TransitionLayer(nn.Layer):
 
															                         ch_out=out_channels[i],
														
 
															                         filter_size=3,
														
 
															                         stride=2,
														
 
															+                        norm_momentum=norm_momentum,
														
 
															                         norm_decay=norm_decay,
														
 
															                         freeze_norm=freeze_norm,
														
 
															                         act='relu',
														
@@ -181,6 +188,7 @@ class Branches(nn.Layer):
 
															                  in_channels,
														
 
															                  out_channels,
														
 
															                  has_se=False,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=True,
														
 
															                  name=None):
														
@@ -197,6 +205,7 @@ class Branches(nn.Layer):
 
															                         num_channels=in_ch,
														
 
															                         num_filters=out_channels[i],
														
 
															                         has_se=has_se,
														
 
															+                        norm_momentum=norm_momentum,
														
 
															                         norm_decay=norm_decay,
														
 
															                         freeze_norm=freeze_norm,
														
 
															                         name=name + '_branch_layer_' + str(i + 1) + '_' +
														
@@ -221,6 +230,7 @@ class BottleneckBlock(nn.Layer):
 
															                  has_se,
														
 
															                  stride=1,
														
 
															                  downsample=False,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=True,
														
 
															                  name=None):
														
@@ -233,6 +243,7 @@ class BottleneckBlock(nn.Layer):
 
															             ch_in=num_channels,
														
 
															             ch_out=num_filters,
														
 
															             filter_size=1,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             act="relu",
														
@@ -242,6 +253,7 @@ class BottleneckBlock(nn.Layer):
 
															             ch_out=num_filters,
														
 
															             filter_size=3,
														
 
															             stride=stride,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             act="relu",
														
@@ -250,6 +262,7 @@ class BottleneckBlock(nn.Layer):
 
															             ch_in=num_filters,
														
 
															             ch_out=num_filters * 4,
														
 
															             filter_size=1,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             act=None,
														
@@ -260,6 +273,7 @@ class BottleneckBlock(nn.Layer):
 
															                 ch_in=num_channels,
														
 
															                 ch_out=num_filters * 4,
														
 
															                 filter_size=1,
														
 
															+                norm_momentum=norm_momentum,
														
 
															                 norm_decay=norm_decay,
														
 
															                 freeze_norm=freeze_norm,
														
 
															                 act=None,
														
@@ -296,6 +310,7 @@ class BasicBlock(nn.Layer):
 
															                  stride=1,
														
 
															                  has_se=False,
														
 
															                  downsample=False,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=True,
														
 
															                  name=None):
														
@@ -307,6 +322,7 @@ class BasicBlock(nn.Layer):
 
															             ch_in=num_channels,
														
 
															             ch_out=num_filters,
														
 
															             filter_size=3,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             stride=stride,
														
@@ -316,6 +332,7 @@ class BasicBlock(nn.Layer):
 
															             ch_in=num_filters,
														
 
															             ch_out=num_filters,
														
 
															             filter_size=3,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             stride=1,
														
@@ -327,6 +344,7 @@ class BasicBlock(nn.Layer):
 
															                 ch_in=num_channels,
														
 
															                 ch_out=num_filters * 4,
														
 
															                 filter_size=1,
														
 
															+                norm_momentum=norm_momentum,
														
 
															                 norm_decay=norm_decay,
														
 
															                 freeze_norm=freeze_norm,
														
 
															                 act=None,
														
@@ -394,6 +412,7 @@ class Stage(nn.Layer):
 
															                  num_modules,
														
 
															                  num_filters,
														
 
															                  has_se=False,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=True,
														
 
															                  multi_scale_output=True,
														
@@ -410,6 +429,7 @@ class Stage(nn.Layer):
 
															                         num_channels=num_channels,
														
 
															                         num_filters=num_filters,
														
 
															                         has_se=has_se,
														
 
															+                        norm_momentum=norm_momentum,
														
 
															                         norm_decay=norm_decay,
														
 
															                         freeze_norm=freeze_norm,
														
 
															                         multi_scale_output=False,
														
@@ -421,6 +441,7 @@ class Stage(nn.Layer):
 
															                         num_channels=num_channels,
														
 
															                         num_filters=num_filters,
														
 
															                         has_se=has_se,
														
 
															+                        norm_momentum=norm_momentum,
														
 
															                         norm_decay=norm_decay,
														
 
															                         freeze_norm=freeze_norm,
														
 
															                         name=name + '_' + str(i + 1)))
														
@@ -440,6 +461,7 @@ class HighResolutionModule(nn.Layer):
 
															                  num_filters,
														
 
															                  has_se=False,
														
 
															                  multi_scale_output=True,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=True,
														
 
															                  name=None):
														
@@ -449,6 +471,7 @@ class HighResolutionModule(nn.Layer):
 
															             in_channels=num_channels,
														
 
															             out_channels=num_filters,
														
 
															             has_se=has_se,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             name=name)
														
@@ -457,6 +480,7 @@ class HighResolutionModule(nn.Layer):
 
															             in_channels=num_filters,
														
 
															             out_channels=num_filters,
														
 
															             multi_scale_output=multi_scale_output,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             name=name)
														
@@ -472,6 +496,7 @@ class FuseLayers(nn.Layer):
 
															                  in_channels,
														
 
															                  out_channels,
														
 
															                  multi_scale_output=True,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  freeze_norm=True,
														
 
															                  name=None):
														
@@ -493,6 +518,7 @@ class FuseLayers(nn.Layer):
 
															                             filter_size=1,
														
 
															                             stride=1,
														
 
															                             act=None,
														
 
															+                            norm_momentum=norm_momentum,
														
 
															                             norm_decay=norm_decay,
														
 
															                             freeze_norm=freeze_norm,
														
 
															                             name=name + '_layer_' + str(i + 1) + '_' +
														
@@ -510,6 +536,7 @@ class FuseLayers(nn.Layer):
 
															                                     ch_out=out_channels[i],
														
 
															                                     filter_size=3,
														
 
															                                     stride=2,
														
 
															+                                    norm_momentum=norm_momentum,
														
 
															                                     norm_decay=norm_decay,
														
 
															                                     freeze_norm=freeze_norm,
														
 
															                                     act=None,
														
@@ -525,6 +552,7 @@ class FuseLayers(nn.Layer):
 
															                                     ch_out=out_channels[j],
														
 
															                                     filter_size=3,
														
 
															                                     stride=2,
														
 
															+                                    norm_momentum=norm_momentum,
														
 
															                                     norm_decay=norm_decay,
														
 
															                                     freeze_norm=freeze_norm,
														
 
															                                     act="relu",
														
@@ -549,7 +577,6 @@ class FuseLayers(nn.Layer):
 
															                     for k in range(i - j):
														
 
															                         y = self.residual_func_list[residual_func_idx](y)
														
 
															                         residual_func_idx += 1
														
 
															-
														
 
															                     residual = paddle.add(x=residual, y=y)
														
 
															             residual = F.relu(residual)
														
 
															             outs.append(residual)
														
@@ -567,6 +594,7 @@ class HRNet(nn.Layer):
 
															         has_se (bool): whether to add SE block for each stage
														
 
															         freeze_at (int): the stage to freeze
														
 
															         freeze_norm (bool): whether to freeze norm in HRNet
														
 
															+        norm_momentum (float): momentum of BatchNorm
														
 
															         norm_decay (float): weight decay for normalization layer weights
														
 
															         return_idx (List): the stage to return
														
 
															         upsample (bool): whether to upsample and concat the backbone feats
														
@@ -577,9 +605,11 @@ class HRNet(nn.Layer):
 
															                  has_se=False,
														
 
															                  freeze_at=0,
														
 
															                  freeze_norm=True,
														
 
															+                 norm_momentum=0.9,
														
 
															                  norm_decay=0.,
														
 
															                  return_idx=[0, 1, 2, 3],
														
 
															-                 upsample=False):
														
 
															+                 upsample=False,
														
 
															+                 downsample=False):
														
 
															         super(HRNet, self).__init__()
														
 
															         self.width = width
														
@@ -591,6 +621,7 @@ class HRNet(nn.Layer):
 
															         self.freeze_at = freeze_at
														
 
															         self.return_idx = return_idx
														
 
															         self.upsample = upsample
														
 
															+        self.downsample = downsample
														
 
															         self.channels = {
														
 
															             18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
														
@@ -613,6 +644,7 @@ class HRNet(nn.Layer):
 
															             ch_out=64,
														
 
															             filter_size=3,
														
 
															             stride=2,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             act='relu',
														
@@ -623,6 +655,7 @@ class HRNet(nn.Layer):
 
															             ch_out=64,
														
 
															             filter_size=3,
														
 
															             stride=2,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             act='relu',
														
@@ -631,6 +664,7 @@ class HRNet(nn.Layer):
 
															         self.la1 = Layer1(
														
 
															             num_channels=64,
														
 
															             has_se=has_se,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             name="layer2")
														
@@ -638,6 +672,7 @@ class HRNet(nn.Layer):
 
															         self.tr1 = TransitionLayer(
														
 
															             in_channels=[256],
														
 
															             out_channels=channels_2,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             name="tr1")
														
@@ -647,6 +682,7 @@ class HRNet(nn.Layer):
 
															             num_modules=num_modules_2,
														
 
															             num_filters=channels_2,
														
 
															             has_se=self.has_se,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             name="st2")
														
@@ -654,6 +690,7 @@ class HRNet(nn.Layer):
 
															         self.tr2 = TransitionLayer(
														
 
															             in_channels=channels_2,
														
 
															             out_channels=channels_3,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             name="tr2")
														
@@ -663,6 +700,7 @@ class HRNet(nn.Layer):
 
															             num_modules=num_modules_3,
														
 
															             num_filters=channels_3,
														
 
															             has_se=self.has_se,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             name="st3")
														
@@ -670,6 +708,7 @@ class HRNet(nn.Layer):
 
															         self.tr3 = TransitionLayer(
														
 
															             in_channels=channels_3,
														
 
															             out_channels=channels_4,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             name="tr3")
														
@@ -678,11 +717,106 @@ class HRNet(nn.Layer):
 
															             num_modules=num_modules_4,
														
 
															             num_filters=channels_4,
														
 
															             has_se=self.has_se,
														
 
															+            norm_momentum=norm_momentum,
														
 
															             norm_decay=norm_decay,
														
 
															             freeze_norm=freeze_norm,
														
 
															             multi_scale_output=len(return_idx) > 1,
														
 
															             name="st4")
														
 
															+        if self.downsample:
														
 
															+            self.incre_modules, self.downsamp_modules, \
														
 
															+                self.final_layer = self._make_head(channels_4, norm_momentum=norm_momentum, has_se=self.has_se)
														
 
															+
														
 
															+    def _make_layer(self,
														
 
															+                    block,
														
 
															+                    inplanes,
														
 
															+                    planes,
														
 
															+                    blocks,
														
 
															+                    stride=1,
														
 
															+                    norm_momentum=0.9,
														
 
															+                    has_se=False,
														
 
															+                    name=None):
														
 
															+        downsample = None
														
 
															+        if stride != 1 or inplanes != planes * 4:
														
 
															+            downsample = True
														
 
															+
														
 
															+        layers = []
														
 
															+        layers.append(
														
 
															+            block(
														
 
															+                inplanes,
														
 
															+                planes,
														
 
															+                has_se,
														
 
															+                stride,
														
 
															+                downsample,
														
 
															+                norm_momentum=norm_momentum,
														
 
															+                freeze_norm=False,
														
 
															+                name=name + "_s0"))
														
 
															+        inplanes = planes * 4
														
 
															+        for i in range(1, blocks):
														
 
															+            layers.append(
														
 
															+                block(
														
 
															+                    inplanes,
														
 
															+                    planes,
														
 
															+                    has_se,
														
 
															+                    norm_momentum=norm_momentum,
														
 
															+                    freeze_norm=False,
														
 
															+                    name=name + "_s" + str(i)))
														
 
															+
														
 
															+        return nn.Sequential(*layers)
														
 
															+
														
 
															+    def _make_head(self, pre_stage_channels, norm_momentum=0.9, has_se=False):
														
 
															+        head_block = BottleneckBlock
														
 
															+        head_channels = [32, 64, 128, 256]
														
 
															+
														
 
															+        # Increasing the #channels on each resolution 
														
 
															+        # from C, 2C, 4C, 8C to 128, 256, 512, 1024
														
 
															+        incre_modules = []
														
 
															+        for i, channels in enumerate(pre_stage_channels):
														
 
															+            incre_module = self._make_layer(
														
 
															+                head_block,
														
 
															+                channels,
														
 
															+                head_channels[i],
														
 
															+                1,
														
 
															+                stride=1,
														
 
															+                norm_momentum=norm_momentum,
														
 
															+                has_se=has_se,
														
 
															+                name='incre' + str(i))
														
 
															+            incre_modules.append(incre_module)
														
 
															+        incre_modules = nn.LayerList(incre_modules)
														
 
															+
														
 
															+        # downsampling modules
														
 
															+        downsamp_modules = []
														
 
															+        for i in range(len(pre_stage_channels) - 1):
														
 
															+            in_channels = head_channels[i] * 4
														
 
															+            out_channels = head_channels[i + 1] * 4
														
 
															+
														
 
															+            downsamp_module = nn.Sequential(
														
 
															+                nn.Conv2D(
														
 
															+                    in_channels=in_channels,
														
 
															+                    out_channels=out_channels,
														
 
															+                    kernel_size=3,
														
 
															+                    stride=2,
														
 
															+                    padding=1),
														
 
															+                nn.BatchNorm2D(
														
 
															+                    out_channels, momentum=norm_momentum),
														
 
															+                nn.ReLU())
														
 
															+
														
 
															+            downsamp_modules.append(downsamp_module)
														
 
															+        downsamp_modules = nn.LayerList(downsamp_modules)
														
 
															+
														
 
															+        final_layer = nn.Sequential(
														
 
															+            nn.Conv2D(
														
 
															+                in_channels=head_channels[3] * 4,
														
 
															+                out_channels=2048,
														
 
															+                kernel_size=1,
														
 
															+                stride=1,
														
 
															+                padding=0),
														
 
															+            nn.BatchNorm2D(
														
 
															+                2048, momentum=norm_momentum),
														
 
															+            nn.ReLU())
														
 
															+
														
 
															+        return incre_modules, downsamp_modules, final_layer
														
 
															+
														
 
															     def forward(self, inputs):
														
 
															         x = inputs['image']
														
 
															         conv1 = self.conv_layer1_1(x)
														
@@ -707,6 +841,14 @@ class HRNet(nn.Layer):
 
															             x = paddle.concat([st4[0], x1, x2, x3], 1)
														
 
															             return x
														
 
															+        if self.downsample:
														
 
															+            y = self.incre_modules[0](st4[0])
														
 
															+            for i in range(len(self.downsamp_modules)):
														
 
															+                y = self.incre_modules[i+1](st4[i+1]) + \
														
 
															+                            self.downsamp_modules[i](y)
														
 
															+            y = self.final_layer(y)
														
 
															+            return y
														
 
															+
														
 
															         res = []
														
 
															         for i, layer in enumerate(st4):
														
 
															             if i == self.freeze_at:
														
--- a/paddlers/models/ppdet/modeling/backbones/lite_hrnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/lite_hrnet.py
@@ -854,6 +854,11 @@ class LiteHRNet(nn.Layer):
 
															     def forward(self, inputs):
														
 
															         x = inputs['image']
														
 
															+        dims = x.shape
														
 
															+        if len(dims) == 5:
														
 
															+            x = paddle.reshape(x, (dims[0] * dims[1], dims[2], dims[3],
														
 
															+                                   dims[4]))  # [6, 3, 128, 96]
														
 
															+
														
 
															         x = self.stem(x)
														
 
															         y_list = [x]
														
 
															         for stage_idx in range(3):
														
--- a/paddlers/models/ppdet/modeling/backbones/resnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/resnet.py
@@ -285,36 +285,6 @@ class BottleNeck(nn.Layer):
 
															         # ResNeXt
														
 
															         width = int(ch_out * (base_width / 64.)) * groups
														
 
															-        self.shortcut = shortcut
														
 
															-        if not shortcut:
														
 
															-            if variant == 'd' and stride == 2:
														
 
															-                self.short = nn.Sequential()
														
 
															-                self.short.add_sublayer(
														
 
															-                    'pool',
														
 
															-                    nn.AvgPool2D(
														
 
															-                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
														
 
															-                self.short.add_sublayer(
														
 
															-                    'conv',
														
 
															-                    ConvNormLayer(
														
 
															-                        ch_in=ch_in,
														
 
															-                        ch_out=ch_out * self.expansion,
														
 
															-                        filter_size=1,
														
 
															-                        stride=1,
														
 
															-                        norm_type=norm_type,
														
 
															-                        norm_decay=norm_decay,
														
 
															-                        freeze_norm=freeze_norm,
														
 
															-                        lr=lr))
														
 
															-            else:
														
 
															-                self.short = ConvNormLayer(
														
 
															-                    ch_in=ch_in,
														
 
															-                    ch_out=ch_out * self.expansion,
														
 
															-                    filter_size=1,
														
 
															-                    stride=stride,
														
 
															-                    norm_type=norm_type,
														
 
															-                    norm_decay=norm_decay,
														
 
															-                    freeze_norm=freeze_norm,
														
 
															-                    lr=lr)
														
 
															-
														
 
															         self.branch2a = ConvNormLayer(
														
 
															             ch_in=ch_in,
														
 
															             ch_out=width,
														
@@ -351,6 +321,36 @@ class BottleNeck(nn.Layer):
 
															             freeze_norm=freeze_norm,
														
 
															             lr=lr)
														
 
															+        self.shortcut = shortcut
														
 
															+        if not shortcut:
														
 
															+            if variant == 'd' and stride == 2:
														
 
															+                self.short = nn.Sequential()
														
 
															+                self.short.add_sublayer(
														
 
															+                    'pool',
														
 
															+                    nn.AvgPool2D(
														
 
															+                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
														
 
															+                self.short.add_sublayer(
														
 
															+                    'conv',
														
 
															+                    ConvNormLayer(
														
 
															+                        ch_in=ch_in,
														
 
															+                        ch_out=ch_out * self.expansion,
														
 
															+                        filter_size=1,
														
 
															+                        stride=1,
														
 
															+                        norm_type=norm_type,
														
 
															+                        norm_decay=norm_decay,
														
 
															+                        freeze_norm=freeze_norm,
														
 
															+                        lr=lr))
														
 
															+            else:
														
 
															+                self.short = ConvNormLayer(
														
 
															+                    ch_in=ch_in,
														
 
															+                    ch_out=ch_out * self.expansion,
														
 
															+                    filter_size=1,
														
 
															+                    stride=stride,
														
 
															+                    norm_type=norm_type,
														
 
															+                    norm_decay=norm_decay,
														
 
															+                    freeze_norm=freeze_norm,
														
 
															+                    lr=lr)
														
 
															+
														
 
															         self.std_senet = std_senet
														
 
															         if self.std_senet:
														
 
															             self.se = SELayer(ch_out * self.expansion)
														
--- a/paddlers/models/ppdet/modeling/backbones/trans_encoder.py
+++ b/paddlers/models/ppdet/modeling/backbones/trans_encoder.py
@@ -0,0 +1,381 @@
 
															+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
														
 
															+#
														
 
															+# Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+# you may not use this file except in compliance with the License.
														
 
															+# You may obtain a copy of the License at
														
 
															+#
														
 
															+#     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+#
														
 
															+# Unless required by applicable law or agreed to in writing, software
														
 
															+# distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+# See the License for the specific language governing permissions and
														
 
															+# limitations under the License.
														
 
															+
														
 
															+import paddle
														
 
															+import paddle.nn as nn
														
 
															+import paddle.nn.functional as F
														
 
															+from paddle.nn import ReLU, Swish, GELU
														
 
															+import math
														
 
															+
														
 
															+from paddlers.models.ppdet.core.workspace import register
														
 
															+from ..shape_spec import ShapeSpec
														
 
															+
														
 
															+__all__ = ['TransEncoder']
														
 
															+
														
 
															+
														
 
															+class BertEmbeddings(nn.Layer):
														
 
															+    def __init__(self, word_size, position_embeddings_size, word_type_size,
														
 
															+                 hidden_size, dropout_prob):
														
 
															+        super(BertEmbeddings, self).__init__()
														
 
															+        self.word_embeddings = nn.Embedding(
														
 
															+            word_size, hidden_size, padding_idx=0)
														
 
															+        self.position_embeddings = nn.Embedding(position_embeddings_size,
														
 
															+                                                hidden_size)
														
 
															+        self.token_type_embeddings = nn.Embedding(word_type_size, hidden_size)
														
 
															+        self.layernorm = nn.LayerNorm(hidden_size, epsilon=1e-8)
														
 
															+        self.dropout = nn.Dropout(dropout_prob)
														
 
															+
														
 
															+    def forward(self, x, token_type_ids=None, position_ids=None):
														
 
															+        seq_len = paddle.shape(x)[1]
														
 
															+        if position_ids is None:
														
 
															+            position_ids = paddle.arange(seq_len).unsqueeze(0).expand_as(x)
														
 
															+        if token_type_ids is None:
														
 
															+            token_type_ids = paddle.zeros(paddle.shape(x))
														
 
															+
														
 
															+        word_embs = self.word_embeddings(x)
														
 
															+        position_embs = self.position_embeddings(position_ids)
														
 
															+        token_type_embs = self.token_type_embeddings(token_type_ids)
														
 
															+
														
 
															+        embs_cmb = word_embs + position_embs + token_type_embs
														
 
															+        embs_out = self.layernorm(embs_cmb)
														
 
															+        embs_out = self.dropout(embs_out)
														
 
															+        return embs_out
														
 
															+
														
 
															+
														
 
															+class BertSelfAttention(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 hidden_size,
														
 
															+                 num_attention_heads,
														
 
															+                 attention_probs_dropout_prob,
														
 
															+                 output_attentions=False):
														
 
															+        super(BertSelfAttention, self).__init__()
														
 
															+        if hidden_size % num_attention_heads != 0:
														
 
															+            raise ValueError(
														
 
															+                "The hidden_size must be a multiple of the number of attention "
														
 
															+                "heads, but got {} % {} != 0" %
														
 
															+                (hidden_size, num_attention_heads))
														
 
															+
														
 
															+        self.num_attention_heads = num_attention_heads
														
 
															+        self.attention_head_size = int(hidden_size / num_attention_heads)
														
 
															+        self.all_head_size = self.num_attention_heads * self.attention_head_size
														
 
															+
														
 
															+        self.query = nn.Linear(hidden_size, self.all_head_size)
														
 
															+        self.key = nn.Linear(hidden_size, self.all_head_size)
														
 
															+        self.value = nn.Linear(hidden_size, self.all_head_size)
														
 
															+
														
 
															+        self.dropout = nn.Dropout(attention_probs_dropout_prob)
														
 
															+        self.output_attentions = output_attentions
														
 
															+
														
 
															+    def forward(self, x, attention_mask, head_mask=None):
														
 
															+        query = self.query(x)
														
 
															+        key = self.key(x)
														
 
															+        value = self.value(x)
														
 
															+
														
 
															+        query_dim1, query_dim2 = paddle.shape(query)[:-1]
														
 
															+        new_shape = [
														
 
															+            query_dim1, query_dim2, self.num_attention_heads,
														
 
															+            self.attention_head_size
														
 
															+        ]
														
 
															+        query = query.reshape(new_shape).transpose(perm=(0, 2, 1, 3))
														
 
															+        key = key.reshape(new_shape).transpose(perm=(0, 2, 3, 1))
														
 
															+        value = value.reshape(new_shape).transpose(perm=(0, 2, 1, 3))
														
 
															+
														
 
															+        attention = paddle.matmul(query,
														
 
															+                                  key) / math.sqrt(self.attention_head_size)
														
 
															+        attention = attention + attention_mask
														
 
															+        attention_value = F.softmax(attention, axis=-1)
														
 
															+        attention_value = self.dropout(attention_value)
														
 
															+
														
 
															+        if head_mask is not None:
														
 
															+            attention_value = attention_value * head_mask
														
 
															+
														
 
															+        context = paddle.matmul(attention_value, value).transpose(perm=(0, 2, 1,
														
 
															+                                                                        3))
														
 
															+        ctx_dim1, ctx_dim2 = paddle.shape(context)[:-2]
														
 
															+        new_context_shape = [
														
 
															+            ctx_dim1,
														
 
															+            ctx_dim2,
														
 
															+            self.all_head_size,
														
 
															+        ]
														
 
															+        context = context.reshape(new_context_shape)
														
 
															+
														
 
															+        if self.output_attentions:
														
 
															+            return (context, attention_value)
														
 
															+        else:
														
 
															+            return (context, )
														
 
															+
														
 
															+
														
 
															+class BertAttention(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 hidden_size,
														
 
															+                 num_attention_heads,
														
 
															+                 attention_probs_dropout_prob,
														
 
															+                 fc_dropout_prob,
														
 
															+                 output_attentions=False):
														
 
															+        super(BertAttention, self).__init__()
														
 
															+        self.bert_selfattention = BertSelfAttention(
														
 
															+            hidden_size, num_attention_heads, attention_probs_dropout_prob,
														
 
															+            output_attentions)
														
 
															+        self.fc = nn.Linear(hidden_size, hidden_size)
														
 
															+        self.layernorm = nn.LayerNorm(hidden_size, epsilon=1e-8)
														
 
															+        self.dropout = nn.Dropout(fc_dropout_prob)
														
 
															+
														
 
															+    def forward(self, x, attention_mask, head_mask=None):
														
 
															+        attention_feats = self.bert_selfattention(x, attention_mask, head_mask)
														
 
															+        features = self.fc(attention_feats[0])
														
 
															+        features = self.dropout(features)
														
 
															+        features = self.layernorm(features + x)
														
 
															+        if len(attention_feats) == 2:
														
 
															+            return (features, attention_feats[1])
														
 
															+        else:
														
 
															+            return (features, )
														
 
															+
														
 
															+
														
 
															+class BertFeedForward(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 hidden_size,
														
 
															+                 intermediate_size,
														
 
															+                 num_attention_heads,
														
 
															+                 attention_probs_dropout_prob,
														
 
															+                 fc_dropout_prob,
														
 
															+                 act_fn='ReLU',
														
 
															+                 output_attentions=False):
														
 
															+        super(BertFeedForward, self).__init__()
														
 
															+        self.fc1 = nn.Linear(hidden_size, intermediate_size)
														
 
															+        self.act_fn = eval(act_fn)
														
 
															+        self.fc2 = nn.Linear(intermediate_size, hidden_size)
														
 
															+        self.layernorm = nn.LayerNorm(hidden_size, epsilon=1e-8)
														
 
															+        self.dropout = nn.Dropout(fc_dropout_prob)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        features = self.fc1(x)
														
 
															+        features = self.act_fn(features)
														
 
															+        features = self.fc2(features)
														
 
															+        features = self.dropout(features)
														
 
															+        features = self.layernorm(features + x)
														
 
															+        return features
														
 
															+
														
 
															+
														
 
															+class BertLayer(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 hidden_size,
														
 
															+                 intermediate_size,
														
 
															+                 num_attention_heads,
														
 
															+                 attention_probs_dropout_prob,
														
 
															+                 fc_dropout_prob,
														
 
															+                 act_fn='ReLU',
														
 
															+                 output_attentions=False):
														
 
															+        super(BertLayer, self).__init__()
														
 
															+        self.attention = BertAttention(hidden_size, num_attention_heads,
														
 
															+                                       attention_probs_dropout_prob,
														
 
															+                                       output_attentions)
														
 
															+        self.feed_forward = BertFeedForward(
														
 
															+            hidden_size, intermediate_size, num_attention_heads,
														
 
															+            attention_probs_dropout_prob, fc_dropout_prob, act_fn,
														
 
															+            output_attentions)
														
 
															+
														
 
															+    def forward(self, x, attention_mask, head_mask=None):
														
 
															+        attention_feats = self.attention(x, attention_mask, head_mask)
														
 
															+        features = self.feed_forward(attention_feats[0])
														
 
															+        if len(attention_feats) == 2:
														
 
															+            return (features, attention_feats[1])
														
 
															+        else:
														
 
															+            return (features, )
														
 
															+
														
 
															+
														
 
															+class BertEncoder(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 num_hidden_layers,
														
 
															+                 hidden_size,
														
 
															+                 intermediate_size,
														
 
															+                 num_attention_heads,
														
 
															+                 attention_probs_dropout_prob,
														
 
															+                 fc_dropout_prob,
														
 
															+                 act_fn='ReLU',
														
 
															+                 output_attentions=False,
														
 
															+                 output_hidden_feats=False):
														
 
															+        super(BertEncoder, self).__init__()
														
 
															+        self.output_attentions = output_attentions
														
 
															+        self.output_hidden_feats = output_hidden_feats
														
 
															+        self.layers = nn.LayerList([
														
 
															+            BertLayer(hidden_size, intermediate_size, num_attention_heads,
														
 
															+                      attention_probs_dropout_prob, fc_dropout_prob, act_fn,
														
 
															+                      output_attentions) for _ in range(num_hidden_layers)
														
 
															+        ])
														
 
															+
														
 
															+    def forward(self, x, attention_mask, head_mask=None):
														
 
															+        all_features = (x, )
														
 
															+        all_attentions = ()
														
 
															+
														
 
															+        for i, layer in enumerate(self.layers):
														
 
															+            mask = head_mask[i] if head_mask is not None else None
														
 
															+            layer_out = layer(x, attention_mask, mask)
														
 
															+
														
 
															+            if self.output_hidden_feats:
														
 
															+                all_features = all_features + (x, )
														
 
															+            x = layer_out[0]
														
 
															+            if self.output_attentions:
														
 
															+                all_attentions = all_attentions + (layer_out[1], )
														
 
															+
														
 
															+        outputs = (x, )
														
 
															+        if self.output_hidden_feats:
														
 
															+            outputs += (all_features, )
														
 
															+        if self.output_attentions:
														
 
															+            outputs += (all_attentions, )
														
 
															+        return outputs
														
 
															+
														
 
															+
														
 
															+class BertPooler(nn.Layer):
														
 
															+    def __init__(self, hidden_size):
														
 
															+        super(BertPooler, self).__init__()
														
 
															+        self.fc = nn.Linear(hidden_size, hidden_size)
														
 
															+        self.act = nn.Tanh()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        first_token = x[:, 0]
														
 
															+        pooled_output = self.fc(first_token)
														
 
															+        pooled_output = self.act(pooled_output)
														
 
															+        return pooled_output
														
 
															+
														
 
															+
														
 
															+class METROEncoder(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 vocab_size,
														
 
															+                 num_hidden_layers,
														
 
															+                 features_dims,
														
 
															+                 position_embeddings_size,
														
 
															+                 hidden_size,
														
 
															+                 intermediate_size,
														
 
															+                 output_feature_dim,
														
 
															+                 num_attention_heads,
														
 
															+                 attention_probs_dropout_prob,
														
 
															+                 fc_dropout_prob,
														
 
															+                 act_fn='ReLU',
														
 
															+                 output_attentions=False,
														
 
															+                 output_hidden_feats=False,
														
 
															+                 use_img_layernorm=False):
														
 
															+        super(METROEncoder, self).__init__()
														
 
															+        self.img_dims = features_dims
														
 
															+        self.num_hidden_layers = num_hidden_layers
														
 
															+        self.use_img_layernorm = use_img_layernorm
														
 
															+        self.output_attentions = output_attentions
														
 
															+        self.embedding = BertEmbeddings(vocab_size, position_embeddings_size, 2,
														
 
															+                                        hidden_size, fc_dropout_prob)
														
 
															+        self.encoder = BertEncoder(
														
 
															+            num_hidden_layers, hidden_size, intermediate_size,
														
 
															+            num_attention_heads, attention_probs_dropout_prob, fc_dropout_prob,
														
 
															+            act_fn, output_attentions, output_hidden_feats)
														
 
															+        self.pooler = BertPooler(hidden_size)
														
 
															+        self.position_embeddings = nn.Embedding(position_embeddings_size,
														
 
															+                                                hidden_size)
														
 
															+        self.img_embedding = nn.Linear(
														
 
															+            features_dims, hidden_size, bias_attr=True)
														
 
															+        self.dropout = nn.Dropout(fc_dropout_prob)
														
 
															+        self.cls_head = nn.Linear(hidden_size, output_feature_dim)
														
 
															+        self.residual = nn.Linear(features_dims, output_feature_dim)
														
 
															+
														
 
															+        self.apply(self.init_weights)
														
 
															+
														
 
															+    def init_weights(self, module):
														
 
															+        """ Initialize the weights.
														
 
															+        """
														
 
															+        if isinstance(module, (nn.Linear, nn.Embedding)):
														
 
															+            module.weight.set_value(
														
 
															+                paddle.normal(
														
 
															+                    mean=0.0, std=0.02, shape=module.weight.shape))
														
 
															+        elif isinstance(module, nn.LayerNorm):
														
 
															+            module.bias.set_value(paddle.zeros(shape=module.bias.shape))
														
 
															+            module.weight.set_value(
														
 
															+                paddle.full(
														
 
															+                    shape=module.weight.shape, fill_value=1.0))
														
 
															+        if isinstance(module, nn.Linear) and module.bias is not None:
														
 
															+            module.bias.set_value(paddle.zeros(shape=module.bias.shape))
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        batchsize, seq_len = paddle.shape(x)[:2]
														
 
															+        input_ids = paddle.zeros((batchsize, seq_len), dtype="int64")
														
 
															+        position_ids = paddle.arange(
														
 
															+            seq_len, dtype="int64").unsqueeze(0).expand_as(input_ids)
														
 
															+
														
 
															+        attention_mask = paddle.ones_like(input_ids).unsqueeze(1).unsqueeze(2)
														
 
															+        head_mask = [None] * self.num_hidden_layers
														
 
															+
														
 
															+        position_embs = self.position_embeddings(position_ids)
														
 
															+        attention_mask = (1.0 - attention_mask) * -10000.0
														
 
															+
														
 
															+        img_features = self.img_embedding(x)
														
 
															+
														
 
															+        # We empirically observe that adding an additional learnable position embedding leads to more stable training
														
 
															+        embeddings = position_embs + img_features
														
 
															+        if self.use_img_layernorm:
														
 
															+            embeddings = self.layernorm(embeddings)
														
 
															+        embeddings = self.dropout(embeddings)
														
 
															+
														
 
															+        encoder_outputs = self.encoder(
														
 
															+            embeddings, attention_mask, head_mask=head_mask)
														
 
															+
														
 
															+        pred_score = self.cls_head(encoder_outputs[0])
														
 
															+        res_img_feats = self.residual(x)
														
 
															+        pred_score = pred_score + res_img_feats
														
 
															+
														
 
															+        if self.output_attentions and self.output_hidden_feats:
														
 
															+            return pred_score, encoder_outputs[1], encoder_outputs[-1]
														
 
															+        else:
														
 
															+            return pred_score
														
 
															+
														
 
															+
														
 
															+def gelu(x):
														
 
															+    """Implementation of the gelu activation function.
														
 
															+        https://arxiv.org/abs/1606.08415
														
 
															+    """
														
 
															+    return x * 0.5 * (1.0 + paddle.erf(x / math.sqrt(2.0)))
														
 
															+
														
 
															+
														
 
															+@register
														
 
															+class TransEncoder(nn.Layer):
														
 
															+    def __init__(self,
														
 
															+                 vocab_size=30522,
														
 
															+                 num_hidden_layers=4,
														
 
															+                 num_attention_heads=4,
														
 
															+                 position_embeddings_size=512,
														
 
															+                 intermediate_size=3072,
														
 
															+                 input_feat_dim=[2048, 512, 128],
														
 
															+                 hidden_feat_dim=[1024, 256, 128],
														
 
															+                 attention_probs_dropout_prob=0.1,
														
 
															+                 fc_dropout_prob=0.1,
														
 
															+                 act_fn='gelu',
														
 
															+                 output_attentions=False,
														
 
															+                 output_hidden_feats=False):
														
 
															+        super(TransEncoder, self).__init__()
														
 
															+        output_feat_dim = input_feat_dim[1:] + [3]
														
 
															+        trans_encoder = []
														
 
															+        for i in range(len(output_feat_dim)):
														
 
															+            features_dims = input_feat_dim[i]
														
 
															+            output_feature_dim = output_feat_dim[i]
														
 
															+            hidden_size = hidden_feat_dim[i]
														
 
															+
														
 
															+            # init a transformer encoder and append it to a list
														
 
															+            assert hidden_size % num_attention_heads == 0
														
 
															+            model = METROEncoder(vocab_size, num_hidden_layers, features_dims,
														
 
															+                                 position_embeddings_size, hidden_size,
														
 
															+                                 intermediate_size, output_feature_dim,
														
 
															+                                 num_attention_heads,
														
 
															+                                 attention_probs_dropout_prob, fc_dropout_prob,
														
 
															+                                 act_fn, output_attentions, output_hidden_feats)
														
 
															+            trans_encoder.append(model)
														
 
															+        self.trans_encoder = paddle.nn.Sequential(*trans_encoder)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        out = self.trans_encoder(x)
														
 
															+        return out
														
--- a/paddlers/models/ppdet/modeling/backbones/vision_transformer.py
+++ b/paddlers/models/ppdet/modeling/backbones/vision_transformer.py
@@ -284,9 +284,9 @@ class RelativePositionBias(nn.Layer):
 
															     def forward(self):
														
 
															         relative_position_bias = \
														
 
															-            self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
														
 
															+            self.relative_position_bias_table[self.relative_position_index.reshape([-1])].reshape([
														
 
															                  self.window_size[0] * self.window_size[1] + 1,
														
 
															-                 self.window_size[0] * self.window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH 
														
 
															+                 self.window_size[0] * self.window_size[1] + 1, -1])  # Wh*Ww,Wh*Ww,nH 
														
 
															         return relative_position_bias.transpose((2, 0, 1))  # nH, Wh*Ww, Wh*Ww
														
@@ -340,6 +340,7 @@ class VisionTransformer(nn.Layer):
 
															                  use_abs_pos_emb=False,
														
 
															                  use_sincos_pos_emb=True,
														
 
															                  with_fpn=True,
														
 
															+                 num_fpn_levels=4,
														
 
															                  use_checkpoint=False,
														
 
															                  **args):
														
 
															         super().__init__()
														
@@ -350,6 +351,8 @@ class VisionTransformer(nn.Layer):
 
															         self.use_sincos_pos_emb = use_sincos_pos_emb
														
 
															         self.use_rel_pos_bias = use_rel_pos_bias
														
 
															         self.final_norm = final_norm
														
 
															+        self.out_indices = out_indices
														
 
															+        self.num_fpn_levels = num_fpn_levels
														
 
															         if use_checkpoint:
														
 
															             paddle.seed(0)
														
@@ -415,14 +418,15 @@ class VisionTransformer(nn.Layer):
 
															         assert len(out_indices) <= 4, ''
														
 
															         self.out_indices = out_indices
														
 
															-        self.out_channels = [embed_dim for _ in range(len(out_indices))]
														
 
															-        self.out_strides = [4, 8, 16, 32][-len(out_indices):] if with_fpn else [
														
 
															-            8 for _ in range(len(out_indices))
														
 
															+        self.out_channels = [embed_dim for _ in range(num_fpn_levels)]
														
 
															+        self.out_strides = [4, 8, 16, 32][-num_fpn_levels:] if with_fpn else [
														
 
															+            patch_size for _ in range(len(out_indices))
														
 
															         ]
														
 
															         self.norm = Identity()
														
 
															         if self.with_fpn:
														
 
															+            assert num_fpn_levels <= 4, ''
														
 
															             self.init_fpn(
														
 
															                 embed_dim=embed_dim,
														
 
															                 patch_size=patch_size, )
														
@@ -505,16 +509,24 @@ class VisionTransformer(nn.Layer):
 
															         dim = x.shape[-1]
														
 
															         # we add a small number to avoid floating point error in the interpolation
														
 
															         # see discussion at https://github.com/facebookresearch/dino/issues/8
														
 
															-        w0, h0 = w0 + 0.1, h0 + 0.1
														
 
															+        # w0, h0 = w0 + 0.1, h0 + 0.1
														
 
															+        # patch_pos_embed = nn.functional.interpolate(
														
 
															+        #     patch_pos_embed.reshape([
														
 
															+        #         1, self.patch_embed.num_patches_w,
														
 
															+        #         self.patch_embed.num_patches_h, dim
														
 
															+        #     ]).transpose((0, 3, 1, 2)),
														
 
															+        #     scale_factor=(w0 / self.patch_embed.num_patches_w,
														
 
															+        #                   h0 / self.patch_embed.num_patches_h),
														
 
															+        #     mode='bicubic', )
														
 
															         patch_pos_embed = nn.functional.interpolate(
														
 
															             patch_pos_embed.reshape([
														
 
															                 1, self.patch_embed.num_patches_w,
														
 
															                 self.patch_embed.num_patches_h, dim
														
 
															             ]).transpose((0, 3, 1, 2)),
														
 
															-            scale_factor=(w0 / self.patch_embed.num_patches_w,
														
 
															-                          h0 / self.patch_embed.num_patches_h),
														
 
															+            (w0, h0),
														
 
															             mode='bicubic', )
														
 
															+
														
 
															         assert int(w0) == patch_pos_embed.shape[-2] and int(
														
 
															             h0) == patch_pos_embed.shape[-1]
														
 
															         patch_pos_embed = patch_pos_embed.transpose(
														
@@ -611,9 +623,15 @@ class VisionTransformer(nn.Layer):
 
															                 feats.append(xp)
														
 
															         if self.with_fpn:
														
 
															-            fpns = [self.fpn1, self.fpn2, self.fpn3, self.fpn4]
														
 
															-            for i in range(len(feats)):
														
 
															-                feats[i] = fpns[i](feats[i])
														
 
															+            fpns = [self.fpn1, self.fpn2, self.fpn3, self.fpn4][
														
 
															+                -self.num_fpn_levels:]
														
 
															+            assert len(fpns) == len(feats) or len(feats) == 1, ''
														
 
															+            outputs = []
														
 
															+            for i, m in enumerate(fpns):
														
 
															+                outputs.append(
														
 
															+                    m(feats[i] if len(feats) == len(fpns) else feats[-1]))
														
 
															+
														
 
															+            return outputs
														
 
															         return feats
														
--- a/paddlers/models/ppdet/modeling/bbox_utils.py
+++ b/paddlers/models/ppdet/modeling/bbox_utils.py
@@ -17,7 +17,9 @@ import paddle
 
															 import numpy as np
														
 
															-def bbox2delta(src_boxes, tgt_boxes, weights):
														
 
															+def bbox2delta(src_boxes, tgt_boxes, weights=[1.0, 1.0, 1.0, 1.0]):
														
 
															+    """Encode bboxes to deltas.
														
 
															+    """
														
 
															     src_w = src_boxes[:, 2] - src_boxes[:, 0]
														
 
															     src_h = src_boxes[:, 3] - src_boxes[:, 1]
														
 
															     src_ctr_x = src_boxes[:, 0] + 0.5 * src_w
														
@@ -38,7 +40,11 @@ def bbox2delta(src_boxes, tgt_boxes, weights):
 
															     return deltas
														
 
															-def delta2bbox(deltas, boxes, weights):
														
 
															+def delta2bbox(deltas, boxes, weights=[1.0, 1.0, 1.0, 1.0], max_shape=None):
														
 
															+    """Decode deltas to boxes. Used in RCNNBox,CascadeHead,RCNNHead,RetinaHead.
														
 
															+    Note: return tensor shape [n,1,4]
														
 
															+        If you want to add a reshape, please add after the calling code instead of here.
														
 
															+    """
														
 
															     clip_scale = math.log(1000.0 / 16)
														
 
															     widths = boxes[:, 2] - boxes[:, 0]
														
@@ -67,6 +73,96 @@ def delta2bbox(deltas, boxes, weights):
 
															     pred_boxes.append(pred_ctr_y + 0.5 * pred_h)
														
 
															     pred_boxes = paddle.stack(pred_boxes, axis=-1)
														
 
															+    if max_shape is not None:
														
 
															+        pred_boxes[..., 0::2] = pred_boxes[..., 0::2].clip(
														
 
															+            min=0, max=max_shape[1])
														
 
															+        pred_boxes[..., 1::2] = pred_boxes[..., 1::2].clip(
														
 
															+            min=0, max=max_shape[0])
														
 
															+    return pred_boxes
														
 
															+
														
 
															+
														
 
															+def bbox2delta_v2(src_boxes,
														
 
															+                  tgt_boxes,
														
 
															+                  delta_mean=[0.0, 0.0, 0.0, 0.0],
														
 
															+                  delta_std=[1.0, 1.0, 1.0, 1.0]):
														
 
															+    """Encode bboxes to deltas.
														
 
															+    Modified from bbox2delta() which just use weight parameters to multiply deltas.
														
 
															+    """
														
 
															+    src_w = src_boxes[:, 2] - src_boxes[:, 0]
														
 
															+    src_h = src_boxes[:, 3] - src_boxes[:, 1]
														
 
															+    src_ctr_x = src_boxes[:, 0] + 0.5 * src_w
														
 
															+    src_ctr_y = src_boxes[:, 1] + 0.5 * src_h
														
 
															+
														
 
															+    tgt_w = tgt_boxes[:, 2] - tgt_boxes[:, 0]
														
 
															+    tgt_h = tgt_boxes[:, 3] - tgt_boxes[:, 1]
														
 
															+    tgt_ctr_x = tgt_boxes[:, 0] + 0.5 * tgt_w
														
 
															+    tgt_ctr_y = tgt_boxes[:, 1] + 0.5 * tgt_h
														
 
															+
														
 
															+    dx = (tgt_ctr_x - src_ctr_x) / src_w
														
 
															+    dy = (tgt_ctr_y - src_ctr_y) / src_h
														
 
															+    dw = paddle.log(tgt_w / src_w)
														
 
															+    dh = paddle.log(tgt_h / src_h)
														
 
															+
														
 
															+    deltas = paddle.stack((dx, dy, dw, dh), axis=1)
														
 
															+    deltas = (
														
 
															+        deltas - paddle.to_tensor(delta_mean)) / paddle.to_tensor(delta_std)
														
 
															+    return deltas
														
 
															+
														
 
															+
														
 
															+def delta2bbox_v2(deltas,
														
 
															+                  boxes,
														
 
															+                  delta_mean=[0.0, 0.0, 0.0, 0.0],
														
 
															+                  delta_std=[1.0, 1.0, 1.0, 1.0],
														
 
															+                  max_shape=None,
														
 
															+                  ctr_clip=32.0):
														
 
															+    """Decode deltas to bboxes.
														
 
															+    Modified from delta2bbox() which just use weight parameters to be divided by deltas.
														
 
															+    Used in YOLOFHead.
														
 
															+    Note: return tensor shape [n,1,4]
														
 
															+        If you want to add a reshape, please add after the calling code instead of here.
														
 
															+    """
														
 
															+    clip_scale = math.log(1000.0 / 16)
														
 
															+
														
 
															+    widths = boxes[:, 2] - boxes[:, 0]
														
 
															+    heights = boxes[:, 3] - boxes[:, 1]
														
 
															+    ctr_x = boxes[:, 0] + 0.5 * widths
														
 
															+    ctr_y = boxes[:, 1] + 0.5 * heights
														
 
															+
														
 
															+    deltas = deltas * paddle.to_tensor(delta_std) + paddle.to_tensor(delta_mean)
														
 
															+    dx = deltas[:, 0::4]
														
 
															+    dy = deltas[:, 1::4]
														
 
															+    dw = deltas[:, 2::4]
														
 
															+    dh = deltas[:, 3::4]
														
 
															+
														
 
															+    # Prevent sending too large values into paddle.exp()
														
 
															+    dx = dx * widths.unsqueeze(1)
														
 
															+    dy = dy * heights.unsqueeze(1)
														
 
															+    if ctr_clip is not None:
														
 
															+        dx = paddle.clip(dx, max=ctr_clip, min=-ctr_clip)
														
 
															+        dy = paddle.clip(dy, max=ctr_clip, min=-ctr_clip)
														
 
															+        dw = paddle.clip(dw, max=clip_scale)
														
 
															+        dh = paddle.clip(dh, max=clip_scale)
														
 
															+    else:
														
 
															+        dw = dw.clip(min=-clip_scale, max=clip_scale)
														
 
															+        dh = dh.clip(min=-clip_scale, max=clip_scale)
														
 
															+
														
 
															+    pred_ctr_x = dx + ctr_x.unsqueeze(1)
														
 
															+    pred_ctr_y = dy + ctr_y.unsqueeze(1)
														
 
															+    pred_w = paddle.exp(dw) * widths.unsqueeze(1)
														
 
															+    pred_h = paddle.exp(dh) * heights.unsqueeze(1)
														
 
															+
														
 
															+    pred_boxes = []
														
 
															+    pred_boxes.append(pred_ctr_x - 0.5 * pred_w)
														
 
															+    pred_boxes.append(pred_ctr_y - 0.5 * pred_h)
														
 
															+    pred_boxes.append(pred_ctr_x + 0.5 * pred_w)
														
 
															+    pred_boxes.append(pred_ctr_y + 0.5 * pred_h)
														
 
															+    pred_boxes = paddle.stack(pred_boxes, axis=-1)
														
 
															+
														
 
															+    if max_shape is not None:
														
 
															+        pred_boxes[..., 0::2] = pred_boxes[..., 0::2].clip(
														
 
															+            min=0, max=max_shape[1])
														
 
															+        pred_boxes[..., 1::2] = pred_boxes[..., 1::2].clip(
														
 
															+            min=0, max=max_shape[0])
														
 
															     return pred_boxes
														
@@ -269,8 +365,7 @@ def decode_yolo(box, anchor, downsample_ratio):
 
															     x1 = (x + grid[:, :, :, :, 0:1]) / grid_w
														
 
															     y1 = (y + grid[:, :, :, :, 1:2]) / grid_h
														
 
															-    anchor = paddle.to_tensor(anchor)
														
 
															-    anchor = paddle.cast(anchor, x.dtype)
														
 
															+    anchor = paddle.to_tensor(anchor, dtype=x.dtype)
														
 
															     anchor = anchor.reshape((1, na, 1, 1, 2))
														
 
															     w1 = paddle.exp(w) * anchor[:, :, :, :, 0:1] / (downsample_ratio * grid_w)
														
 
															     h1 = paddle.exp(h) * anchor[:, :, :, :, 1:2] / (downsample_ratio * grid_h)
														
@@ -489,96 +584,6 @@ def batch_distance2bbox(points, distance, max_shapes=None):
 
															     return out_bbox
														
 
															-def delta2bbox_v2(rois,
														
 
															-                  deltas,
														
 
															-                  means=(0.0, 0.0, 0.0, 0.0),
														
 
															-                  stds=(1.0, 1.0, 1.0, 1.0),
														
 
															-                  max_shape=None,
														
 
															-                  wh_ratio_clip=16.0 / 1000.0,
														
 
															-                  ctr_clip=None):
														
 
															-    """Transform network output(delta) to bboxes.
														
 
															-    Based on https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/
														
 
															-             bbox/coder/delta_xywh_bbox_coder.py
														
 
															-    Args:
														
 
															-        rois (Tensor): shape [..., 4], base bboxes, typical examples include
														
 
															-            anchor and rois
														
 
															-        deltas (Tensor): shape [..., 4], offset relative to base bboxes
														
 
															-        means (list[float]): the mean that was used to normalize deltas,
														
 
															-            must be of size 4
														
 
															-        stds (list[float]): the std that was used to normalize deltas,
														
 
															-            must be of size 4
														
 
															-        max_shape (list[float] or None): height and width of image, will be
														
 
															-            used to clip bboxes if not None
														
 
															-        wh_ratio_clip (float): to clip delta wh of decoded bboxes
														
 
															-        ctr_clip (float or None): whether to clip delta xy of decoded bboxes
														
 
															-    """
														
 
															-    if rois.size == 0:
														
 
															-        return paddle.empty_like(rois)
														
 
															-    means = paddle.to_tensor(means)
														
 
															-    stds = paddle.to_tensor(stds)
														
 
															-    deltas = deltas * stds + means
														
 
															-
														
 
															-    dxy = deltas[..., :2]
														
 
															-    dwh = deltas[..., 2:]
														
 
															-
														
 
															-    pxy = (rois[..., :2] + rois[..., 2:]) * 0.5
														
 
															-    pwh = rois[..., 2:] - rois[..., :2]
														
 
															-    dxy_wh = pwh * dxy
														
 
															-
														
 
															-    max_ratio = np.abs(np.log(wh_ratio_clip))
														
 
															-    if ctr_clip is not None:
														
 
															-        dxy_wh = paddle.clip(dxy_wh, max=ctr_clip, min=-ctr_clip)
														
 
															-        dwh = paddle.clip(dwh, max=max_ratio)
														
 
															-    else:
														
 
															-        dwh = dwh.clip(min=-max_ratio, max=max_ratio)
														
 
															-
														
 
															-    gxy = pxy + dxy_wh
														
 
															-    gwh = pwh * dwh.exp()
														
 
															-    x1y1 = gxy - (gwh * 0.5)
														
 
															-    x2y2 = gxy + (gwh * 0.5)
														
 
															-    bboxes = paddle.concat([x1y1, x2y2], axis=-1)
														
 
															-    if max_shape is not None:
														
 
															-        bboxes[..., 0::2] = bboxes[..., 0::2].clip(min=0, max=max_shape[1])
														
 
															-        bboxes[..., 1::2] = bboxes[..., 1::2].clip(min=0, max=max_shape[0])
														
 
															-    return bboxes
														
 
															-
														
 
															-
														
 
															-def bbox2delta_v2(src_boxes,
														
 
															-                  tgt_boxes,
														
 
															-                  means=(0.0, 0.0, 0.0, 0.0),
														
 
															-                  stds=(1.0, 1.0, 1.0, 1.0)):
														
 
															-    """Encode bboxes to deltas.
														
 
															-    Modified from ppdet.modeling.bbox_utils.bbox2delta.
														
 
															-    Args:
														
 
															-        src_boxes (Tensor[..., 4]): base bboxes
														
 
															-        tgt_boxes (Tensor[..., 4]): target bboxes
														
 
															-        means (list[float]): the mean that will be used to normalize delta
														
 
															-        stds (list[float]): the std that will be used to normalize delta
														
 
															-    """
														
 
															-    if src_boxes.size == 0:
														
 
															-        return paddle.empty_like(src_boxes)
														
 
															-    src_w = src_boxes[..., 2] - src_boxes[..., 0]
														
 
															-    src_h = src_boxes[..., 3] - src_boxes[..., 1]
														
 
															-    src_ctr_x = src_boxes[..., 0] + 0.5 * src_w
														
 
															-    src_ctr_y = src_boxes[..., 1] + 0.5 * src_h
														
 
															-
														
 
															-    tgt_w = tgt_boxes[..., 2] - tgt_boxes[..., 0]
														
 
															-    tgt_h = tgt_boxes[..., 3] - tgt_boxes[..., 1]
														
 
															-    tgt_ctr_x = tgt_boxes[..., 0] + 0.5 * tgt_w
														
 
															-    tgt_ctr_y = tgt_boxes[..., 1] + 0.5 * tgt_h
														
 
															-
														
 
															-    dx = (tgt_ctr_x - src_ctr_x) / src_w
														
 
															-    dy = (tgt_ctr_y - src_ctr_y) / src_h
														
 
															-    dw = paddle.log(tgt_w / src_w)
														
 
															-    dh = paddle.log(tgt_h / src_h)
														
 
															-
														
 
															-    deltas = paddle.stack((dx, dy, dw, dh), axis=1)  # [n, 4]
														
 
															-    means = paddle.to_tensor(means, place=src_boxes.place)
														
 
															-    stds = paddle.to_tensor(stds, place=src_boxes.place)
														
 
															-    deltas = (deltas - means) / stds
														
 
															-    return deltas
														
 
															-
														
 
															-
														
 
															 def iou_similarity(box1, box2, eps=1e-10):
														
 
															     """Calculate iou of box1 and box2
														
--- a/paddlers/models/ppdet/modeling/heads/__init__.py
+++ b/paddlers/models/ppdet/modeling/heads/__init__.py
@@ -33,6 +33,12 @@ from . import sparsercnn_head
 
															 from . import tood_head
														
 
															 from . import retina_head
														
 
															 from . import ppyoloe_head
														
 
															+from . import fcosr_head
														
 
															+from . import ppyoloe_r_head
														
 
															+from . import yolof_head
														
 
															+from . import ppyoloe_contrast_head
														
 
															+from . import centertrack_head
														
 
															+from . import sparse_roi_head
														
 
															 from .bbox_head import *
														
 
															 from .mask_head import *
														
@@ -55,3 +61,10 @@ from .sparsercnn_head import *
 
															 from .tood_head import *
														
 
															 from .retina_head import *
														
 
															 from .ppyoloe_head import *
														
 
															+from .fcosr_head import *
														
 
															+from .ppyoloe_r_head import *
														
 
															+from .yolof_head import *
														
 
															+from .ppyoloe_contrast_head import *
														
 
															+from .centertrack_head import *
														
 
															+from .sparse_roi_head import *
														
 
															+from .petr_head import *
														
--- a/paddlers/models/ppdet/modeling/heads/bbox_head.py
+++ b/paddlers/models/ppdet/modeling/heads/bbox_head.py
@@ -160,8 +160,8 @@ class XConvNormHead(nn.Layer):
 
															 @register
														
 
															 class BBoxHead(nn.Layer):
														
 
															-    __shared__ = ['num_classes']
														
 
															-    __inject__ = ['bbox_assigner', 'bbox_loss']
														
 
															+    __shared__ = ['num_classes', 'use_cot']
														
 
															+    __inject__ = ['bbox_assigner', 'bbox_loss', 'loss_cot']
														
 
															     """
														
 
															     RCNN bbox head
														
@@ -173,7 +173,10 @@ class BBoxHead(nn.Layer):
 
															             box.
														
 
															         with_pool (bool): Whether to use pooling for the RoI feature.
														
 
															         num_classes (int): The number of classes
														
 
															-        bbox_weight (List[float]): The weight to get the decode box 
														
 
															+        bbox_weight (List[float]): The weight to get the decode box
														
 
															+        cot_classes (int): The number of base classes
														
 
															+        loss_cot (object): The module of Label-cotuning
														
 
															+        use_cot(bool): whether to use Label-cotuning 
														
 
															     """
														
 
															     def __init__(self,
														
@@ -185,7 +188,10 @@ class BBoxHead(nn.Layer):
 
															                  num_classes=80,
														
 
															                  bbox_weight=[10., 10., 5., 5.],
														
 
															                  bbox_loss=None,
														
 
															-                 loss_normalize_pos=False):
														
 
															+                 loss_normalize_pos=False,
														
 
															+                 cot_classes=None,
														
 
															+                 loss_cot='COTLoss',
														
 
															+                 use_cot=False):
														
 
															         super(BBoxHead, self).__init__()
														
 
															         self.head = head
														
 
															         self.roi_extractor = roi_extractor
														
@@ -199,11 +205,29 @@ class BBoxHead(nn.Layer):
 
															         self.bbox_loss = bbox_loss
														
 
															         self.loss_normalize_pos = loss_normalize_pos
														
 
															-        self.bbox_score = nn.Linear(
														
 
															-            in_channel,
														
 
															-            self.num_classes + 1,
														
 
															-            weight_attr=paddle.ParamAttr(initializer=Normal(
														
 
															-                mean=0.0, std=0.01)))
														
 
															+        self.loss_cot = loss_cot
														
 
															+        self.cot_relation = None
														
 
															+        self.cot_classes = cot_classes
														
 
															+        self.use_cot = use_cot
														
 
															+        if use_cot:
														
 
															+            self.cot_bbox_score = nn.Linear(
														
 
															+                in_channel,
														
 
															+                self.num_classes + 1,
														
 
															+                weight_attr=paddle.ParamAttr(initializer=Normal(
														
 
															+                    mean=0.0, std=0.01)))
														
 
															+            
														
 
															+            self.bbox_score = nn.Linear(
														
 
															+                in_channel,
														
 
															+                self.cot_classes + 1,
														
 
															+                weight_attr=paddle.ParamAttr(initializer=Normal(
														
 
															+                    mean=0.0, std=0.01)))
														
 
															+            self.cot_bbox_score.skip_quant = True
														
 
															+        else:
														
 
															+            self.bbox_score = nn.Linear(
														
 
															+                in_channel,
														
 
															+                self.num_classes + 1,
														
 
															+                weight_attr=paddle.ParamAttr(initializer=Normal(
														
 
															+                    mean=0.0, std=0.01)))
														
 
															         self.bbox_score.skip_quant = True
														
 
															         self.bbox_delta = nn.Linear(
														
@@ -215,6 +239,9 @@ class BBoxHead(nn.Layer):
 
															         self.assigned_label = None
														
 
															         self.assigned_rois = None
														
 
															+    def init_cot_head(self, relationship):
														
 
															+        self.cot_relation = relationship
														
 
															+        
														
 
															     @classmethod
														
 
															     def from_config(cls, cfg, input_shape):
														
 
															         roi_pooler = cfg['roi_extractor']
														
@@ -229,7 +256,7 @@ class BBoxHead(nn.Layer):
 
															             'in_channel': head.out_shape[0].channels
														
 
															         }
														
 
															-    def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None):
														
 
															+    def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None, cot=False):
														
 
															         """
														
 
															         body_feats (list[Tensor]): Feature maps from backbone
														
 
															         rois (list[Tensor]): RoIs generated from RPN module
														
@@ -248,7 +275,11 @@ class BBoxHead(nn.Layer):
 
															             feat = paddle.squeeze(feat, axis=[2, 3])
														
 
															         else:
														
 
															             feat = bbox_feat
														
 
															-        scores = self.bbox_score(feat)
														
 
															+        if self.use_cot:
														
 
															+            scores = self.cot_bbox_score(feat)
														
 
															+            cot_scores = self.bbox_score(feat)
														
 
															+        else:
														
 
															+            scores = self.bbox_score(feat)
														
 
															         deltas = self.bbox_delta(feat)
														
 
															         if self.training:
														
@@ -259,11 +290,19 @@ class BBoxHead(nn.Layer):
 
															                 rois,
														
 
															                 self.bbox_weight,
														
 
															                 loss_normalize_pos=self.loss_normalize_pos)
														
 
															+            
														
 
															+            if self.cot_relation is not None:
														
 
															+                loss_cot = self.loss_cot(cot_scores, targets, self.cot_relation)
														
 
															+                loss.update(loss_cot)
														
 
															             return loss, bbox_feat
														
 
															         else:
														
 
															-            pred = self.get_prediction(scores, deltas)
														
 
															+            if cot:
														
 
															+                pred = self.get_prediction(cot_scores, deltas)
														
 
															+            else:
														
 
															+                pred = self.get_prediction(scores, deltas)
														
 
															             return pred, self.head
														
 
															+
														
 
															     def get_loss(self,
														
 
															                  scores,
														
 
															                  deltas,
														
--- a/paddlers/models/ppdet/modeling/heads/centernet_head.py
+++ b/paddlers/models/ppdet/modeling/heads/centernet_head.py
@@ -61,13 +61,12 @@ class CenterNetHead(nn.Layer):
 
															         in_channels (int): the channel number of input to CenterNetHead.
														
 
															         num_classes (int): the number of classes, 80 (COCO dataset) by default.
														
 
															         head_planes (int): the channel number in all head, 256 by default.
														
 
															-        heatmap_weight (float): the weight of heatmap loss, 1 by default.
														
 
															+        prior_bias (float): prior bias in heatmap head, -2.19 by default, -4.6 in CenterTrack
														
 
															         regress_ltrb (bool): whether to regress left/top/right/bottom or
														
 
															-            width/height for a box, true by default
														
 
															-        size_weight (float): the weight of box size loss, 0.1 by default.
														
 
															-        size_loss (): the type of size regression loss, 'L1 loss' by default.
														
 
															-        offset_weight (float): the weight of center offset loss, 1 by default.
														
 
															-        iou_weight (float): the weight of iou head loss, 0 by default.
														
 
															+            width/height for a box, True by default.
														
 
															+        size_loss (str): the type of size regression loss, 'L1' by default, can be 'giou'.
														
 
															+        loss_weight (dict): the weight of each loss.
														
 
															+        add_iou (bool): whether to add iou branch, False by default.
														
 
															     """
														
 
															     __shared__ = ['num_classes']
														
@@ -76,20 +75,20 @@ class CenterNetHead(nn.Layer):
 
															                  in_channels,
														
 
															                  num_classes=80,
														
 
															                  head_planes=256,
														
 
															-                 heatmap_weight=1,
														
 
															+                 prior_bias=-2.19,
														
 
															                  regress_ltrb=True,
														
 
															-                 size_weight=0.1,
														
 
															                  size_loss='L1',
														
 
															-                 offset_weight=1,
														
 
															-                 iou_weight=0):
														
 
															+                 loss_weight={
														
 
															+                     'heatmap': 1.0,
														
 
															+                     'size': 0.1,
														
 
															+                     'offset': 1.0,
														
 
															+                     'iou': 0.0,
														
 
															+                 },
														
 
															+                 add_iou=False):
														
 
															         super(CenterNetHead, self).__init__()
														
 
															         self.regress_ltrb = regress_ltrb
														
 
															-        self.weights = {
														
 
															-            'heatmap': heatmap_weight,
														
 
															-            'size': size_weight,
														
 
															-            'offset': offset_weight,
														
 
															-            'iou': iou_weight
														
 
															-        }
														
 
															+        self.loss_weight = loss_weight
														
 
															+        self.add_iou = add_iou
														
 
															         # heatmap head
														
 
															         self.heatmap = nn.Sequential(
														
@@ -104,7 +103,7 @@ class CenterNetHead(nn.Layer):
 
															                 padding=0,
														
 
															                 bias=True))
														
 
															         with paddle.no_grad():
														
 
															-            self.heatmap[2].conv.bias[:] = -2.19
														
 
															+            self.heatmap[2].conv.bias[:] = prior_bias
														
 
															         # size(ltrb or wh) head
														
 
															         self.size = nn.Sequential(
														
@@ -129,7 +128,7 @@ class CenterNetHead(nn.Layer):
 
															                 head_planes, 2, kernel_size=1, stride=1, padding=0, bias=True))
														
 
															         # iou head (optinal)
														
 
															-        if iou_weight > 0:
														
 
															+        if self.add_iou and 'iou' in self.loss_weight:
														
 
															             self.iou = nn.Sequential(
														
 
															                 ConvLayer(
														
 
															                     in_channels,
														
@@ -153,34 +152,34 @@ class CenterNetHead(nn.Layer):
 
															         return {'in_channels': input_shape.channels}
														
 
															     def forward(self, feat, inputs):
														
 
															-        heatmap = self.heatmap(feat)
														
 
															+        heatmap = F.sigmoid(self.heatmap(feat))
														
 
															         size = self.size(feat)
														
 
															         offset = self.offset(feat)
														
 
															-        iou = self.iou(feat) if hasattr(self, 'iou_weight') else None
														
 
															+        head_outs = {'heatmap': heatmap, 'size': size, 'offset': offset}
														
 
															+        if self.add_iou and 'iou' in self.loss_weight:
														
 
															+            iou = self.iou(feat)
														
 
															+            head_outs.update({'iou': iou})
														
 
															         if self.training:
														
 
															-            loss = self.get_loss(
														
 
															-                inputs, self.weights, heatmap, size, offset, iou=iou)
														
 
															-            return loss
														
 
															+            losses = self.get_loss(inputs, self.loss_weight, head_outs)
														
 
															+            return losses
														
 
															         else:
														
 
															-            heatmap = F.sigmoid(heatmap)
														
 
															-            head_outs = {'heatmap': heatmap, 'size': size, 'offset': offset}
														
 
															-            if iou is not None:
														
 
															-                head_outs.update({'iou': iou})
														
 
															             return head_outs
														
 
															-    def get_loss(self, inputs, weights, heatmap, size, offset, iou=None):
														
 
															-        # heatmap head loss: CTFocalLoss
														
 
															+    def get_loss(self, inputs, weights, head_outs):
														
 
															+        # 1.heatmap(hm) head loss: CTFocalLoss
														
 
															+        heatmap = head_outs['heatmap']
														
 
															         heatmap_target = inputs['heatmap']
														
 
															-        heatmap = paddle.clip(F.sigmoid(heatmap), 1e-4, 1 - 1e-4)
														
 
															+        heatmap = paddle.clip(heatmap, 1e-4, 1 - 1e-4)
														
 
															         ctfocal_loss = CTFocalLoss()
														
 
															         heatmap_loss = ctfocal_loss(heatmap, heatmap_target)
														
 
															-        # size head loss: L1 loss or GIoU loss
														
 
															+        # 2.size(wh) head loss: L1 loss or GIoU loss
														
 
															+        size = head_outs['size']
														
 
															         index = inputs['index']
														
 
															         mask = inputs['index_mask']
														
 
															         size = paddle.transpose(size, perm=[0, 2, 3, 1])
														
 
															-        size_n, size_h, size_w, size_c = size.shape
														
 
															+        size_n, _, _, size_c = size.shape
														
 
															         size = paddle.reshape(size, shape=[size_n, -1, size_c])
														
 
															         index = paddle.unsqueeze(index, 2)
														
 
															         batch_inds = list()
														
@@ -208,7 +207,8 @@ class CenterNetHead(nn.Layer):
 
															                 else:
														
 
															                     # inputs['size'] is ltrb, but regress as wh
														
 
															                     # shape: [bs, max_per_img, 4]
														
 
															-                    size_target = inputs['size'][:, :, 0:2] + inputs['size'][:, :, 2:]
														
 
															+                    size_target = inputs['size'][:, :, 0:2] + inputs[
														
 
															+                        'size'][:, :, 2:]
														
 
															             size_target.stop_gradient = True
														
 
															             size_loss = F.l1_loss(
														
@@ -232,10 +232,11 @@ class CenterNetHead(nn.Layer):
 
															                 loc_reweight=None)
														
 
															             size_loss = size_loss / (pos_num + 1e-4)
														
 
															-        # offset head loss: L1 loss
														
 
															+        # 3.offset(reg) head loss: L1 loss
														
 
															+        offset = head_outs['offset']
														
 
															         offset_target = inputs['offset']
														
 
															         offset = paddle.transpose(offset, perm=[0, 2, 3, 1])
														
 
															-        offset_n, offset_h, offset_w, offset_c = offset.shape
														
 
															+        offset_n, _, _, offset_c = offset.shape
														
 
															         offset = paddle.reshape(offset, shape=[offset_n, -1, offset_c])
														
 
															         pos_offset = paddle.gather_nd(offset, index=index)
														
 
															         offset_mask = paddle.expand_as(mask, pos_offset)
														
@@ -249,10 +250,11 @@ class CenterNetHead(nn.Layer):
 
															             reduction='sum')
														
 
															         offset_loss = offset_loss / (pos_num + 1e-4)
														
 
															-        # iou head loss: GIoU loss
														
 
															-        if iou is not None:
														
 
															+        # 4.iou head loss: GIoU loss (optinal)
														
 
															+        if self.add_iou and 'iou' in self.loss_weight:
														
 
															+            iou = head_outs['iou']
														
 
															             iou = paddle.transpose(iou, perm=[0, 2, 3, 1])
														
 
															-            iou_n, iou_h, iou_w, iou_c = iou.shape
														
 
															+            iou_n, _, _, iou_c = iou.shape
														
 
															             iou = paddle.reshape(iou, shape=[iou_n, -1, iou_c])
														
 
															             pos_iou = paddle.gather_nd(iou, index=index)
														
 
															             iou_mask = paddle.expand_as(mask, pos_iou)
														
@@ -284,8 +286,8 @@ class CenterNetHead(nn.Layer):
 
															         det_loss = weights['heatmap'] * heatmap_loss + weights[
														
 
															             'size'] * size_loss + weights['offset'] * offset_loss
														
 
															-        if iou is not None:
														
 
															+        if self.add_iou and 'iou' in self.loss_weight:
														
 
															             losses.update({'iou_loss': iou_loss})
														
 
															-            det_loss = det_loss + weights['iou'] * iou_loss
														
 
															+            det_loss += weights['iou'] * iou_loss
														
 
															         losses.update({'det_loss': det_loss})
														
 
															         return losses
	`@@ -1 +1 @@`
	`-e3f8dd16bffca04060ec1edc388c5a618e15bbf8`
			`+00fe2a1c35603b6fb37b73265aecf6282e5e2ad4`