il y a 4 ans · 16c85bf3c2
--- a/paddlers/__init__.py
+++ b/paddlers/__init__.py
@@ -1 +1,5 @@
 
				-from . import datasets, transforms, utils, tools
			
 
				+from . import tasks, datasets, transforms, utils, tools, models
			
 
				+
			
 
				+# TODO, add these info in installation
			
 
				+env_info = {'place': 'gpu', 'num': 1}
			
 
				+__version__ = 0.1
			
--- a/paddlers/datasets/__init__.py
+++ b/paddlers/datasets/__init__.py
@@ -0,0 +1 @@
 
				+from .voc import VOCDetection
			
--- a/paddlers/datasets/voc.py
+++ b/paddlers/datasets/voc.py
@@ -0,0 +1,445 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+import copy
			
 
				+import os
			
 
				+import os.path as osp
			
 
				+import random
			
 
				+import re
			
 
				+import numpy as np
			
 
				+from collections import OrderedDict
			
 
				+import xml.etree.ElementTree as ET
			
 
				+from paddle.io import Dataset
			
 
				+from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
			
 
				+from paddlers.transforms import Decode, MixupImage
			
 
				+from paddlers.tools import YOLOAnchorCluster
			
 
				+
			
 
				+
			
 
				+class VOCDetection(Dataset):
			
 
				+    """读取PascalVOC格式的检测数据集，并对样本进行相应的处理。
			
 
				+
			
 
				+    Args:
			
 
				+        data_dir (str): 数据集所在的目录路径。
			
 
				+        file_list (str): 描述数据集图片文件和对应标注文件的文件路径（文本内每行路径为相对data_dir的相对路）。
			
 
				+        label_list (str): 描述数据集包含的类别信息文件路径。
			
 
				+        transforms (paddlers.det.transforms): 数据集中每个样本的预处理/增强算子。
			
 
				+        num_workers (int|str): 数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时，根据
			
 
				+            系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8，则`num_workers`为8，否则为CPU核数的
			
 
				+            一半。
			
 
				+        shuffle (bool): 是否需要对数据集中样本打乱顺序。默认为False。
			
 
				+        allow_empty (bool): 是否加载负样本。默认为False。
			
 
				+        empty_ratio (float): 用于指定负样本占总样本数的比例。如果小于0或大于等于1，则保留全部的负样本。默认为1。
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 data_dir,
			
 
				+                 file_list,
			
 
				+                 label_list,
			
 
				+                 transforms=None,
			
 
				+                 num_workers='auto',
			
 
				+                 shuffle=False,
			
 
				+                 allow_empty=False,
			
 
				+                 empty_ratio=1.):
			
 
				+        # matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
			
 
				+        # or matplotlib.backends is imported for the first time
			
 
				+        # pycocotools import matplotlib
			
 
				+        import matplotlib
			
 
				+        matplotlib.use('Agg')
			
 
				+        from pycocotools.coco import COCO
			
 
				+        super(VOCDetection, self).__init__()
			
 
				+        self.data_dir = data_dir
			
 
				+        self.data_fields = None
			
 
				+        self.transforms = copy.deepcopy(transforms)
			
 
				+        self.num_max_boxes = 50
			
 
				+
			
 
				+        self.use_mix = False
			
 
				+        if self.transforms is not None:
			
 
				+            for op in self.transforms.transforms:
			
 
				+                if isinstance(op, MixupImage):
			
 
				+                    self.mixup_op = copy.deepcopy(op)
			
 
				+                    self.use_mix = True
			
 
				+                    self.num_max_boxes *= 2
			
 
				+                    break
			
 
				+
			
 
				+        self.batch_transforms = None
			
 
				+        self.num_workers = get_num_workers(num_workers)
			
 
				+        self.shuffle = shuffle
			
 
				+        self.allow_empty = allow_empty
			
 
				+        self.empty_ratio = empty_ratio
			
 
				+        self.file_list = list()
			
 
				+        neg_file_list = list()
			
 
				+        self.labels = list()
			
 
				+
			
 
				+        annotations = dict()
			
 
				+        annotations['images'] = list()
			
 
				+        annotations['categories'] = list()
			
 
				+        annotations['annotations'] = list()
			
 
				+
			
 
				+        cname2cid = OrderedDict()
			
 
				+        label_id = 0
			
 
				+        with open(label_list, 'r', encoding=get_encoding(label_list)) as f:
			
 
				+            for line in f.readlines():
			
 
				+                cname2cid[line.strip()] = label_id
			
 
				+                label_id += 1
			
 
				+                self.labels.append(line.strip())
			
 
				+        logging.info("Starting to read file list from dataset...")
			
 
				+        for k, v in cname2cid.items():
			
 
				+            annotations['categories'].append({
			
 
				+                'supercategory': 'component',
			
 
				+                'id': v + 1,
			
 
				+                'name': k
			
 
				+            })
			
 
				+        ct = 0
			
 
				+        ann_ct = 0
			
 
				+        with open(file_list, 'r', encoding=get_encoding(file_list)) as f:
			
 
				+            while True:
			
 
				+                line = f.readline()
			
 
				+                if not line:
			
 
				+                    break
			
 
				+                if len(line.strip().split()) > 2:
			
 
				+                    raise Exception("A space is defined as the separator, "
			
 
				+                                    "but it exists in image or label name {}."
			
 
				+                                    .format(line))
			
 
				+                img_file, xml_file = [
			
 
				+                    osp.join(data_dir, x) for x in line.strip().split()[:2]
			
 
				+                ]
			
 
				+                img_file = path_normalization(img_file)
			
 
				+                xml_file = path_normalization(xml_file)
			
 
				+                if not is_pic(img_file):
			
 
				+                    continue
			
 
				+                if not osp.isfile(xml_file):
			
 
				+                    continue
			
 
				+                if not osp.exists(img_file):
			
 
				+                    logging.warning('The image file {} does not exist!'.format(
			
 
				+                        img_file))
			
 
				+                    continue
			
 
				+                if not osp.exists(xml_file):
			
 
				+                    logging.warning('The annotation file {} does not exist!'.
			
 
				+                                    format(xml_file))
			
 
				+                    continue
			
 
				+                tree = ET.parse(xml_file)
			
 
				+                if tree.find('id') is None:
			
 
				+                    im_id = np.asarray([ct])
			
 
				+                else:
			
 
				+                    ct = int(tree.find('id').text)
			
 
				+                    im_id = np.asarray([int(tree.find('id').text)])
			
 
				+                pattern = re.compile('<size>', re.IGNORECASE)
			
 
				+                size_tag = pattern.findall(
			
 
				+                    str(ET.tostringlist(tree.getroot())))
			
 
				+                if len(size_tag) > 0:
			
 
				+                    size_tag = size_tag[0][1:-1]
			
 
				+                    size_element = tree.find(size_tag)
			
 
				+                    pattern = re.compile('<width>', re.IGNORECASE)
			
 
				+                    width_tag = pattern.findall(
			
 
				+                        str(ET.tostringlist(size_element)))[0][1:-1]
			
 
				+                    im_w = float(size_element.find(width_tag).text)
			
 
				+                    pattern = re.compile('<height>', re.IGNORECASE)
			
 
				+                    height_tag = pattern.findall(
			
 
				+                        str(ET.tostringlist(size_element)))[0][1:-1]
			
 
				+                    im_h = float(size_element.find(height_tag).text)
			
 
				+                else:
			
 
				+                    im_w = 0
			
 
				+                    im_h = 0
			
 
				+
			
 
				+                pattern = re.compile('<object>', re.IGNORECASE)
			
 
				+                obj_match = pattern.findall(
			
 
				+                    str(ET.tostringlist(tree.getroot())))
			
 
				+                if len(obj_match) > 0:
			
 
				+                    obj_tag = obj_match[0][1:-1]
			
 
				+                    objs = tree.findall(obj_tag)
			
 
				+                else:
			
 
				+                    objs = list()
			
 
				+
			
 
				+                num_bbox, i = len(objs), 0
			
 
				+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
			
 
				+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				+                gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
			
 
				+                is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				+                difficult = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				+                for obj in objs:
			
 
				+                    pattern = re.compile('<name>', re.IGNORECASE)
			
 
				+                    name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][
			
 
				+                        1:-1]
			
 
				+                    cname = obj.find(name_tag).text.strip()
			
 
				+                    pattern = re.compile('<difficult>', re.IGNORECASE)
			
 
				+                    diff_tag = pattern.findall(str(ET.tostringlist(obj)))
			
 
				+                    if len(diff_tag) == 0:
			
 
				+                        _difficult = 0
			
 
				+                    else:
			
 
				+                        diff_tag = diff_tag[0][1:-1]
			
 
				+                        try:
			
 
				+                            _difficult = int(obj.find(diff_tag).text)
			
 
				+                        except Exception:
			
 
				+                            _difficult = 0
			
 
				+                    pattern = re.compile('<bndbox>', re.IGNORECASE)
			
 
				+                    box_tag = pattern.findall(str(ET.tostringlist(obj)))
			
 
				+                    if len(box_tag) == 0:
			
 
				+                        logging.warning(
			
 
				+                            "There's no field '<bndbox>' in one of object, "
			
 
				+                            "so this object will be ignored. xml file: {}".
			
 
				+                            format(xml_file))
			
 
				+                        continue
			
 
				+                    box_tag = box_tag[0][1:-1]
			
 
				+                    box_element = obj.find(box_tag)
			
 
				+                    pattern = re.compile('<xmin>', re.IGNORECASE)
			
 
				+                    xmin_tag = pattern.findall(
			
 
				+                        str(ET.tostringlist(box_element)))[0][1:-1]
			
 
				+                    x1 = float(box_element.find(xmin_tag).text)
			
 
				+                    pattern = re.compile('<ymin>', re.IGNORECASE)
			
 
				+                    ymin_tag = pattern.findall(
			
 
				+                        str(ET.tostringlist(box_element)))[0][1:-1]
			
 
				+                    y1 = float(box_element.find(ymin_tag).text)
			
 
				+                    pattern = re.compile('<xmax>', re.IGNORECASE)
			
 
				+                    xmax_tag = pattern.findall(
			
 
				+                        str(ET.tostringlist(box_element)))[0][1:-1]
			
 
				+                    x2 = float(box_element.find(xmax_tag).text)
			
 
				+                    pattern = re.compile('<ymax>', re.IGNORECASE)
			
 
				+                    ymax_tag = pattern.findall(
			
 
				+                        str(ET.tostringlist(box_element)))[0][1:-1]
			
 
				+                    y2 = float(box_element.find(ymax_tag).text)
			
 
				+                    x1 = max(0, x1)
			
 
				+                    y1 = max(0, y1)
			
 
				+                    if im_w > 0.5 and im_h > 0.5:
			
 
				+                        x2 = min(im_w - 1, x2)
			
 
				+                        y2 = min(im_h - 1, y2)
			
 
				+
			
 
				+                    if not (x2 >= x1 and y2 >= y1):
			
 
				+                        logging.warning(
			
 
				+                            "Bounding box for object {} does not satisfy xmin {} <= xmax {} and ymin {} <= ymax {}, "
			
 
				+                            "so this object is skipped. xml file: {}".format(i, x1, x2, y1, y2, xml_file))
			
 
				+                        continue
			
 
				+
			
 
				+                    gt_bbox[i, :] = [x1, y1, x2, y2]
			
 
				+                    gt_class[i, 0] = cname2cid[cname]
			
 
				+                    gt_score[i, 0] = 1.
			
 
				+                    is_crowd[i, 0] = 0
			
 
				+                    difficult[i, 0] = _difficult
			
 
				+                    i += 1
			
 
				+                    annotations['annotations'].append({
			
 
				+                        'iscrowd': 0,
			
 
				+                        'image_id': int(im_id[0]),
			
 
				+                        'bbox': [x1, y1, x2 - x1, y2 - y1],
			
 
				+                        'area': float((x2 - x1) * (y2 - y1)),
			
 
				+                        'category_id': cname2cid[cname] + 1,
			
 
				+                        'id': ann_ct,
			
 
				+                        'difficult': _difficult
			
 
				+                    })
			
 
				+                    ann_ct += 1
			
 
				+
			
 
				+                gt_bbox = gt_bbox[:i, :]
			
 
				+                gt_class = gt_class[:i, :]
			
 
				+                gt_score = gt_score[:i, :]
			
 
				+                is_crowd = is_crowd[:i, :]
			
 
				+                difficult = difficult[:i, :]
			
 
				+
			
 
				+                im_info = {
			
 
				+                    'im_id': im_id,
			
 
				+                    'image_shape': np.array(
			
 
				+                        [im_h, im_w], dtype=np.int32)
			
 
				+                }
			
 
				+                label_info = {
			
 
				+                    'is_crowd': is_crowd,
			
 
				+                    'gt_class': gt_class,
			
 
				+                    'gt_bbox': gt_bbox,
			
 
				+                    'gt_score': gt_score,
			
 
				+                    'difficult': difficult
			
 
				+                }
			
 
				+
			
 
				+                if gt_bbox.size > 0:
			
 
				+                    self.file_list.append({
			
 
				+                        'image': img_file,
			
 
				+                        **
			
 
				+                        im_info,
			
 
				+                        **
			
 
				+                        label_info
			
 
				+                    })
			
 
				+                    annotations['images'].append({
			
 
				+                        'height': im_h,
			
 
				+                        'width': im_w,
			
 
				+                        'id': int(im_id[0]),
			
 
				+                        'file_name': osp.split(img_file)[1]
			
 
				+                    })
			
 
				+                else:
			
 
				+                    neg_file_list.append({
			
 
				+                        'image': img_file,
			
 
				+                        **
			
 
				+                        im_info,
			
 
				+                        **
			
 
				+                        label_info
			
 
				+                    })
			
 
				+                ct += 1
			
 
				+
			
 
				+                if self.use_mix:
			
 
				+                    self.num_max_boxes = max(self.num_max_boxes, 2 * len(objs))
			
 
				+                else:
			
 
				+                    self.num_max_boxes = max(self.num_max_boxes, len(objs))
			
 
				+
			
 
				+        if not ct:
			
 
				+            logging.error(
			
 
				+                "No voc record found in %s' % (file_list)", exit=True)
			
 
				+        self.pos_num = len(self.file_list)
			
 
				+        if self.allow_empty and neg_file_list:
			
 
				+            self.file_list += self._sample_empty(neg_file_list)
			
 
				+        logging.info(
			
 
				+            "{} samples in file {}, including {} positive samples and {} negative samples.".
			
 
				+            format(
			
 
				+                len(self.file_list), file_list, self.pos_num,
			
 
				+                len(self.file_list) - self.pos_num))
			
 
				+        self.num_samples = len(self.file_list)
			
 
				+        self.coco_gt = COCO()
			
 
				+        self.coco_gt.dataset = annotations
			
 
				+        self.coco_gt.createIndex()
			
 
				+
			
 
				+        self._epoch = 0
			
 
				+
			
 
				+    def __getitem__(self, idx):
			
 
				+        sample = copy.deepcopy(self.file_list[idx])
			
 
				+        if self.data_fields is not None:
			
 
				+            sample = {k: sample[k] for k in self.data_fields}
			
 
				+        if self.use_mix and (self.mixup_op.mixup_epoch == -1 or
			
 
				+                             self._epoch < self.mixup_op.mixup_epoch):
			
 
				+            if self.num_samples > 1:
			
 
				+                mix_idx = random.randint(1, self.num_samples - 1)
			
 
				+                mix_pos = (mix_idx + idx) % self.num_samples
			
 
				+            else:
			
 
				+                mix_pos = 0
			
 
				+            sample_mix = copy.deepcopy(self.file_list[mix_pos])
			
 
				+            if self.data_fields is not None:
			
 
				+                sample_mix = {k: sample_mix[k] for k in self.data_fields}
			
 
				+            sample = self.mixup_op(sample=[
			
 
				+                Decode(to_rgb=False)(sample), Decode(to_rgb=False)(sample_mix)
			
 
				+            ])
			
 
				+        sample = self.transforms(sample)
			
 
				+        return sample
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return self.num_samples
			
 
				+
			
 
				+    def set_epoch(self, epoch_id):
			
 
				+        self._epoch = epoch_id
			
 
				+
			
 
				+    def cluster_yolo_anchor(self,
			
 
				+                            num_anchors,
			
 
				+                            image_size,
			
 
				+                            cache=True,
			
 
				+                            cache_path=None,
			
 
				+                            iters=300,
			
 
				+                            gen_iters=1000,
			
 
				+                            thresh=.25):
			
 
				+        """
			
 
				+        Cluster YOLO anchors.
			
 
				+
			
 
				+        Reference:
			
 
				+            https://github.com/ultralytics/yolov5/blob/master/utils/autoanchor.py
			
 
				+
			
 
				+        Args:
			
 
				+            num_anchors (int): number of clusters
			
 
				+            image_size (list or int): [h, w], being an int means image height and image width are the same.
			
 
				+            cache (bool): whether using cache
			
 
				+            cache_path (str or None, optional): cache directory path. If None, use `data_dir` of dataset.
			
 
				+            iters (int, optional): iters of kmeans algorithm
			
 
				+            gen_iters (int, optional): iters of genetic algorithm
			
 
				+            threshold (float, optional): anchor scale threshold
			
 
				+            verbose (bool, optional): whether print results
			
 
				+        """
			
 
				+        if cache_path is None:
			
 
				+            cache_path = self.data_dir
			
 
				+        cluster = YOLOAnchorCluster(
			
 
				+            num_anchors=num_anchors,
			
 
				+            dataset=self,
			
 
				+            image_size=image_size,
			
 
				+            cache=cache,
			
 
				+            cache_path=cache_path,
			
 
				+            iters=iters,
			
 
				+            gen_iters=gen_iters,
			
 
				+            thresh=thresh)
			
 
				+        anchors = cluster()
			
 
				+        return anchors
			
 
				+
			
 
				+    def add_negative_samples(self, image_dir, empty_ratio=1):
			
 
				+        """将背景图片加入训练
			
 
				+
			
 
				+        Args:
			
 
				+            image_dir (str)：背景图片所在的文件夹目录。
			
 
				+            empty_ratio (float or None): 用于指定负样本占总样本数的比例。如果为None，保留数据集初始化是设置的`empty_ratio`值，
			
 
				+                否则更新原有`empty_ratio`值。如果小于0或大于等于1，则保留全部的负样本。默认为1。
			
 
				+
			
 
				+        """
			
 
				+        import cv2
			
 
				+        if not osp.isdir(image_dir):
			
 
				+            raise Exception("{} is not a valid image directory.".format(
			
 
				+                image_dir))
			
 
				+        if empty_ratio is not None:
			
 
				+            self.empty_ratio = empty_ratio
			
 
				+        image_list = os.listdir(image_dir)
			
 
				+        max_img_id = max(
			
 
				+            len(self.file_list) - 1, max(self.coco_gt.getImgIds()))
			
 
				+        neg_file_list = list()
			
 
				+        for image in image_list:
			
 
				+            if not is_pic(image):
			
 
				+                continue
			
 
				+            gt_bbox = np.zeros((0, 4), dtype=np.float32)
			
 
				+            gt_class = np.zeros((0, 1), dtype=np.int32)
			
 
				+            gt_score = np.zeros((0, 1), dtype=np.float32)
			
 
				+            is_crowd = np.zeros((0, 1), dtype=np.int32)
			
 
				+            difficult = np.zeros((0, 1), dtype=np.int32)
			
 
				+
			
 
				+            max_img_id += 1
			
 
				+            im_fname = osp.join(image_dir, image)
			
 
				+            img_data = cv2.imread(im_fname, cv2.IMREAD_UNCHANGED)
			
 
				+            im_h, im_w, im_c = img_data.shape
			
 
				+
			
 
				+            im_info = {
			
 
				+                'im_id': np.asarray([max_img_id]),
			
 
				+                'image_shape': np.array(
			
 
				+                    [im_h, im_w], dtype=np.int32)
			
 
				+            }
			
 
				+            label_info = {
			
 
				+                'is_crowd': is_crowd,
			
 
				+                'gt_class': gt_class,
			
 
				+                'gt_bbox': gt_bbox,
			
 
				+                'gt_score': gt_score,
			
 
				+                'difficult': difficult
			
 
				+            }
			
 
				+            if 'gt_poly' in self.file_list[0]:
			
 
				+                label_info['gt_poly'] = []
			
 
				+
			
 
				+            neg_file_list.append({
			
 
				+                'image': im_fname,
			
 
				+                **
			
 
				+                im_info,
			
 
				+                **
			
 
				+                label_info
			
 
				+            })
			
 
				+        if neg_file_list:
			
 
				+            self.allow_empty = True
			
 
				+            self.file_list += self._sample_empty(neg_file_list)
			
 
				+        logging.info(
			
 
				+            "{} negative samples added. Dataset contains {} positive samples and {} negative samples.".
			
 
				+            format(
			
 
				+                len(self.file_list) - self.num_samples, self.pos_num,
			
 
				+                len(self.file_list) - self.pos_num))
			
 
				+        self.num_samples = len(self.file_list)
			
 
				+
			
 
				+    def _sample_empty(self, neg_file_list):
			
 
				+        if 0. <= self.empty_ratio < 1.:
			
 
				+            import random
			
 
				+            total_num = len(self.file_list)
			
 
				+            neg_num = total_num - self.pos_num
			
 
				+            sample_num = min((total_num * self.empty_ratio - neg_num) //
			
 
				+                             (1 - self.empty_ratio), len(neg_file_list))
			
 
				+            return random.sample(neg_file_list, sample_num)
			
 
				+        else:
			
 
				+            return neg_file_list
			
--- a/paddlers/models/__init__.py
+++ b/paddlers/models/__init__.py
@@ -0,0 +1,15 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import ppcd, ppcls, ppdet, ppseg
			
--- a/paddlers/models/ppcd/__init__.py
+++ b/paddlers/models/ppcd/__init__.py
--- a/paddlers/third_party/ppcd/__init__.py
+++ b/paddlers/third_party/ppcd/__init__.py
--- a/paddlers/models/ppdet/__init__.py
+++ b/paddlers/models/ppdet/__init__.py
@@ -0,0 +1,16 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import (core, data, engine, modeling, model_zoo, optimizer, metrics,
			
 
				+               utils, slim)
			
--- a/paddlers/models/ppdet/core/__init__.py
+++ b/paddlers/models/ppdet/core/__init__.py
@@ -0,0 +1,15 @@
 
				+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import config
			
--- a/paddlers/models/ppdet/core/config/__init__.py
+++ b/paddlers/models/ppdet/core/config/__init__.py
@@ -0,0 +1,13 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
--- a/paddlers/models/ppdet/core/config/schema.py
+++ b/paddlers/models/ppdet/core/config/schema.py
@@ -0,0 +1,248 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import print_function
			
 
				+from __future__ import division
			
 
				+
			
 
				+import inspect
			
 
				+import importlib
			
 
				+import re
			
 
				+
			
 
				+try:
			
 
				+    from docstring_parser import parse as doc_parse
			
 
				+except Exception:
			
 
				+
			
 
				+    def doc_parse(*args):
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+try:
			
 
				+    from typeguard import check_type
			
 
				+except Exception:
			
 
				+
			
 
				+    def check_type(*args):
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+__all__ = ['SchemaValue', 'SchemaDict', 'SharedConfig', 'extract_schema']
			
 
				+
			
 
				+
			
 
				+class SchemaValue(object):
			
 
				+    def __init__(self, name, doc='', type=None):
			
 
				+        super(SchemaValue, self).__init__()
			
 
				+        self.name = name
			
 
				+        self.doc = doc
			
 
				+        self.type = type
			
 
				+
			
 
				+    def set_default(self, value):
			
 
				+        self.default = value
			
 
				+
			
 
				+    def has_default(self):
			
 
				+        return hasattr(self, 'default')
			
 
				+
			
 
				+
			
 
				+class SchemaDict(dict):
			
 
				+    def __init__(self, **kwargs):
			
 
				+        super(SchemaDict, self).__init__()
			
 
				+        self.schema = {}
			
 
				+        self.strict = False
			
 
				+        self.doc = ""
			
 
				+        self.update(kwargs)
			
 
				+
			
 
				+    def __setitem__(self, key, value):
			
 
				+        # XXX also update regular dict to SchemaDict??
			
 
				+        if isinstance(value, dict) and key in self and isinstance(self[key],
			
 
				+                                                                  SchemaDict):
			
 
				+            self[key].update(value)
			
 
				+        else:
			
 
				+            super(SchemaDict, self).__setitem__(key, value)
			
 
				+
			
 
				+    def __missing__(self, key):
			
 
				+        if self.has_default(key):
			
 
				+            return self.schema[key].default
			
 
				+        elif key in self.schema:
			
 
				+            return self.schema[key]
			
 
				+        else:
			
 
				+            raise KeyError(key)
			
 
				+
			
 
				+    def copy(self):
			
 
				+        newone = SchemaDict()
			
 
				+        newone.__dict__.update(self.__dict__)
			
 
				+        newone.update(self)
			
 
				+        return newone
			
 
				+
			
 
				+    def set_schema(self, key, value):
			
 
				+        assert isinstance(value, SchemaValue)
			
 
				+        self.schema[key] = value
			
 
				+
			
 
				+    def set_strict(self, strict):
			
 
				+        self.strict = strict
			
 
				+
			
 
				+    def has_default(self, key):
			
 
				+        return key in self.schema and self.schema[key].has_default()
			
 
				+
			
 
				+    def is_default(self, key):
			
 
				+        if not self.has_default(key):
			
 
				+            return False
			
 
				+        if hasattr(self[key], '__dict__'):
			
 
				+            return True
			
 
				+        else:
			
 
				+            return key not in self or self[key] == self.schema[key].default
			
 
				+
			
 
				+    def find_default_keys(self):
			
 
				+        return [
			
 
				+            k for k in list(self.keys()) + list(self.schema.keys())
			
 
				+            if self.is_default(k)
			
 
				+        ]
			
 
				+
			
 
				+    def mandatory(self):
			
 
				+        return any([k for k in self.schema.keys() if not self.has_default(k)])
			
 
				+
			
 
				+    def find_missing_keys(self):
			
 
				+        missing = [
			
 
				+            k for k in self.schema.keys()
			
 
				+            if k not in self and not self.has_default(k)
			
 
				+        ]
			
 
				+        placeholders = [k for k in self if self[k] in ('<missing>', '<value>')]
			
 
				+        return missing + placeholders
			
 
				+
			
 
				+    def find_extra_keys(self):
			
 
				+        return list(set(self.keys()) - set(self.schema.keys()))
			
 
				+
			
 
				+    def find_mismatch_keys(self):
			
 
				+        mismatch_keys = []
			
 
				+        for arg in self.schema.values():
			
 
				+            if arg.type is not None:
			
 
				+                try:
			
 
				+                    check_type("{}.{}".format(self.name, arg.name),
			
 
				+                               self[arg.name], arg.type)
			
 
				+                except Exception:
			
 
				+                    mismatch_keys.append(arg.name)
			
 
				+        return mismatch_keys
			
 
				+
			
 
				+    def validate(self):
			
 
				+        missing_keys = self.find_missing_keys()
			
 
				+        if missing_keys:
			
 
				+            raise ValueError("Missing param for class<{}>: {}".format(
			
 
				+                self.name, ", ".join(missing_keys)))
			
 
				+        extra_keys = self.find_extra_keys()
			
 
				+        if extra_keys and self.strict:
			
 
				+            raise ValueError("Extraneous param for class<{}>: {}".format(
			
 
				+                self.name, ", ".join(extra_keys)))
			
 
				+        mismatch_keys = self.find_mismatch_keys()
			
 
				+        if mismatch_keys:
			
 
				+            raise TypeError("Wrong param type for class<{}>: {}".format(
			
 
				+                self.name, ", ".join(mismatch_keys)))
			
 
				+
			
 
				+
			
 
				+class SharedConfig(object):
			
 
				+    """
			
 
				+    Representation class for `__shared__` annotations, which work as follows:
			
 
				+
			
 
				+    - if `key` is set for the module in config file, its value will take
			
 
				+      precedence
			
 
				+    - if `key` is not set for the module but present in the config file, its
			
 
				+      value will be used
			
 
				+    - otherwise, use the provided `default_value` as fallback
			
 
				+
			
 
				+    Args:
			
 
				+        key: config[key] will be injected
			
 
				+        default_value: fallback value
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, key, default_value=None):
			
 
				+        super(SharedConfig, self).__init__()
			
 
				+        self.key = key
			
 
				+        self.default_value = default_value
			
 
				+
			
 
				+
			
 
				+def extract_schema(cls):
			
 
				+    """
			
 
				+    Extract schema from a given class
			
 
				+
			
 
				+    Args:
			
 
				+        cls (type): Class from which to extract.
			
 
				+
			
 
				+    Returns:
			
 
				+        schema (SchemaDict): Extracted schema.
			
 
				+    """
			
 
				+    ctor = cls.__init__
			
 
				+    # python 2 compatibility
			
 
				+    if hasattr(inspect, 'getfullargspec'):
			
 
				+        argspec = inspect.getfullargspec(ctor)
			
 
				+        annotations = argspec.annotations
			
 
				+        has_kwargs = argspec.varkw is not None
			
 
				+    else:
			
 
				+        argspec = inspect.getfullargspec(ctor)
			
 
				+        # python 2 type hinting workaround, see pep-3107
			
 
				+        # however, since `typeguard` does not support python 2, type checking
			
 
				+        # is still python 3 only for now
			
 
				+        annotations = getattr(ctor, '__annotations__', {})
			
 
				+        has_kwargs = argspec.varkw is not None
			
 
				+
			
 
				+    names = [arg for arg in argspec.args if arg != 'self']
			
 
				+    defaults = argspec.defaults
			
 
				+    num_defaults = argspec.defaults is not None and len(argspec.defaults) or 0
			
 
				+    num_required = len(names) - num_defaults
			
 
				+
			
 
				+    docs = cls.__doc__
			
 
				+    if docs is None and getattr(cls, '__category__', None) == 'op':
			
 
				+        docs = cls.__call__.__doc__
			
 
				+    try:
			
 
				+        docstring = doc_parse(docs)
			
 
				+    except Exception:
			
 
				+        docstring = None
			
 
				+
			
 
				+    if docstring is None:
			
 
				+        comments = {}
			
 
				+    else:
			
 
				+        comments = {}
			
 
				+        for p in docstring.params:
			
 
				+            match_obj = re.match('^([a-zA-Z_]+[a-zA-Z_0-9]*).*', p.arg_name)
			
 
				+            if match_obj is not None:
			
 
				+                comments[match_obj.group(1)] = p.description
			
 
				+
			
 
				+    schema = SchemaDict()
			
 
				+    schema.name = cls.__name__
			
 
				+    schema.doc = ""
			
 
				+    if docs is not None:
			
 
				+        start_pos = docs[0] == '\n' and 1 or 0
			
 
				+        schema.doc = docs[start_pos:].split("\n")[0].strip()
			
 
				+    # XXX handle paddle's weird doc convention
			
 
				+    if '**' == schema.doc[:2] and '**' == schema.doc[-2:]:
			
 
				+        schema.doc = schema.doc[2:-2].strip()
			
 
				+    schema.category = hasattr(cls, '__category__') and getattr(
			
 
				+        cls, '__category__') or 'module'
			
 
				+    schema.strict = not has_kwargs
			
 
				+    schema.pymodule = importlib.import_module(cls.__module__)
			
 
				+    schema.inject = getattr(cls, '__inject__', [])
			
 
				+    schema.shared = getattr(cls, '__shared__', [])
			
 
				+    for idx, name in enumerate(names):
			
 
				+        comment = name in comments and comments[name] or name
			
 
				+        if name in schema.inject:
			
 
				+            type_ = None
			
 
				+        else:
			
 
				+            type_ = name in annotations and annotations[name] or None
			
 
				+        value_schema = SchemaValue(name, comment, type_)
			
 
				+        if name in schema.shared:
			
 
				+            assert idx >= num_required, "shared config must have default value"
			
 
				+            default = defaults[idx - num_required]
			
 
				+            value_schema.set_default(SharedConfig(name, default))
			
 
				+        elif idx >= num_required:
			
 
				+            default = defaults[idx - num_required]
			
 
				+            value_schema.set_default(default)
			
 
				+        schema.set_schema(name, value_schema)
			
 
				+
			
 
				+    return schema
			
--- a/paddlers/models/ppdet/core/config/yaml_helpers.py
+++ b/paddlers/models/ppdet/core/config/yaml_helpers.py
@@ -0,0 +1,118 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import importlib
			
 
				+import inspect
			
 
				+
			
 
				+import yaml
			
 
				+from .schema import SharedConfig
			
 
				+
			
 
				+__all__ = ['serializable', 'Callable']
			
 
				+
			
 
				+
			
 
				+def represent_dictionary_order(self, dict_data):
			
 
				+    return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items())
			
 
				+
			
 
				+
			
 
				+def setup_orderdict():
			
 
				+    from collections import OrderedDict
			
 
				+    yaml.add_representer(OrderedDict, represent_dictionary_order)
			
 
				+
			
 
				+
			
 
				+def _make_python_constructor(cls):
			
 
				+    def python_constructor(loader, node):
			
 
				+        if isinstance(node, yaml.SequenceNode):
			
 
				+            args = loader.construct_sequence(node, deep=True)
			
 
				+            return cls(*args)
			
 
				+        else:
			
 
				+            kwargs = loader.construct_mapping(node, deep=True)
			
 
				+            try:
			
 
				+                return cls(**kwargs)
			
 
				+            except Exception as ex:
			
 
				+                print("Error when construct {} instance from yaml config".
			
 
				+                      format(cls.__name__))
			
 
				+                raise ex
			
 
				+
			
 
				+    return python_constructor
			
 
				+
			
 
				+
			
 
				+def _make_python_representer(cls):
			
 
				+    # python 2 compatibility
			
 
				+    if hasattr(inspect, 'getfullargspec'):
			
 
				+        argspec = inspect.getfullargspec(cls)
			
 
				+    else:
			
 
				+        argspec = inspect.getfullargspec(cls.__init__)
			
 
				+    argnames = [arg for arg in argspec.args if arg != 'self']
			
 
				+
			
 
				+    def python_representer(dumper, obj):
			
 
				+        if argnames:
			
 
				+            data = {name: getattr(obj, name) for name in argnames}
			
 
				+        else:
			
 
				+            data = obj.__dict__
			
 
				+        if '_id' in data:
			
 
				+            del data['_id']
			
 
				+        return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
			
 
				+
			
 
				+    return python_representer
			
 
				+
			
 
				+
			
 
				+def serializable(cls):
			
 
				+    """
			
 
				+    Add loader and dumper for given class, which must be
			
 
				+    "trivially serializable"
			
 
				+
			
 
				+    Args:
			
 
				+        cls: class to be serialized
			
 
				+
			
 
				+    Returns: cls
			
 
				+    """
			
 
				+    yaml.add_constructor(u'!{}'.format(cls.__name__),
			
 
				+                         _make_python_constructor(cls))
			
 
				+    yaml.add_representer(cls, _make_python_representer(cls))
			
 
				+    return cls
			
 
				+
			
 
				+
			
 
				+yaml.add_representer(SharedConfig,
			
 
				+                     lambda d, o: d.represent_data(o.default_value))
			
 
				+
			
 
				+
			
 
				+@serializable
			
 
				+class Callable(object):
			
 
				+    """
			
 
				+    Helper to be used in Yaml for creating arbitrary class objects
			
 
				+
			
 
				+    Args:
			
 
				+        full_type (str): the full module path to target function
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, full_type, args=[], kwargs={}):
			
 
				+        super(Callable, self).__init__()
			
 
				+        self.full_type = full_type
			
 
				+        self.args = args
			
 
				+        self.kwargs = kwargs
			
 
				+
			
 
				+    def __call__(self):
			
 
				+        if '.' in self.full_type:
			
 
				+            idx = self.full_type.rfind('.')
			
 
				+            module = importlib.import_module(self.full_type[:idx])
			
 
				+            func_name = self.full_type[idx + 1:]
			
 
				+        else:
			
 
				+            try:
			
 
				+                module = importlib.import_module('builtins')
			
 
				+            except Exception:
			
 
				+                module = importlib.import_module('__builtin__')
			
 
				+            func_name = self.full_type
			
 
				+
			
 
				+        func = getattr(module, func_name)
			
 
				+        return func(*self.args, **self.kwargs)
			
--- a/paddlers/models/ppdet/core/workspace.py
+++ b/paddlers/models/ppdet/core/workspace.py
@@ -0,0 +1,278 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import print_function
			
 
				+from __future__ import division
			
 
				+
			
 
				+import importlib
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+import yaml
			
 
				+import collections
			
 
				+
			
 
				+try:
			
 
				+    collectionsAbc = collections.abc
			
 
				+except AttributeError:
			
 
				+    collectionsAbc = collections
			
 
				+
			
 
				+from .config.schema import SchemaDict, SharedConfig, extract_schema
			
 
				+from .config.yaml_helpers import serializable
			
 
				+
			
 
				+__all__ = [
			
 
				+    'global_config',
			
 
				+    'load_config',
			
 
				+    'merge_config',
			
 
				+    'get_registered_modules',
			
 
				+    'create',
			
 
				+    'register',
			
 
				+    'serializable',
			
 
				+    'dump_value',
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def dump_value(value):
			
 
				+    # XXX this is hackish, but collections.abc is not available in python 2
			
 
				+    if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
			
 
				+        value = yaml.dump(value, default_flow_style=True)
			
 
				+        value = value.replace('\n', '')
			
 
				+        value = value.replace('...', '')
			
 
				+        return "'{}'".format(value)
			
 
				+    else:
			
 
				+        # primitive types
			
 
				+        return str(value)
			
 
				+
			
 
				+
			
 
				+class AttrDict(dict):
			
 
				+    """Single level attribute dict, NOT recursive"""
			
 
				+
			
 
				+    def __init__(self, **kwargs):
			
 
				+        super(AttrDict, self).__init__()
			
 
				+        super(AttrDict, self).update(kwargs)
			
 
				+
			
 
				+    def __getattr__(self, key):
			
 
				+        if key in self:
			
 
				+            return self[key]
			
 
				+        raise AttributeError("object has no attribute '{}'".format(key))
			
 
				+
			
 
				+
			
 
				+global_config = AttrDict()
			
 
				+
			
 
				+BASE_KEY = '_BASE_'
			
 
				+
			
 
				+
			
 
				+# parse and load _BASE_ recursively
			
 
				+def _load_config_with_base(file_path):
			
 
				+    with open(file_path) as f:
			
 
				+        file_cfg = yaml.load(f, Loader=yaml.Loader)
			
 
				+
			
 
				+    # NOTE: cfgs outside have higher priority than cfgs in _BASE_
			
 
				+    if BASE_KEY in file_cfg:
			
 
				+        all_base_cfg = AttrDict()
			
 
				+        base_ymls = list(file_cfg[BASE_KEY])
			
 
				+        for base_yml in base_ymls:
			
 
				+            if base_yml.startswith("~"):
			
 
				+                base_yml = os.path.expanduser(base_yml)
			
 
				+            if not base_yml.startswith('/'):
			
 
				+                base_yml = os.path.join(os.path.dirname(file_path), base_yml)
			
 
				+
			
 
				+            with open(base_yml) as f:
			
 
				+                base_cfg = _load_config_with_base(base_yml)
			
 
				+                all_base_cfg = merge_config(base_cfg, all_base_cfg)
			
 
				+
			
 
				+        del file_cfg[BASE_KEY]
			
 
				+        return merge_config(file_cfg, all_base_cfg)
			
 
				+
			
 
				+    return file_cfg
			
 
				+
			
 
				+
			
 
				+def load_config(file_path):
			
 
				+    """
			
 
				+    Load config from file.
			
 
				+
			
 
				+    Args:
			
 
				+        file_path (str): Path of the config file to be loaded.
			
 
				+
			
 
				+    Returns: global config
			
 
				+    """
			
 
				+    _, ext = os.path.splitext(file_path)
			
 
				+    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
			
 
				+
			
 
				+    # load config from file and merge into global config
			
 
				+    cfg = _load_config_with_base(file_path)
			
 
				+    cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0]
			
 
				+    merge_config(cfg)
			
 
				+
			
 
				+    return global_config
			
 
				+
			
 
				+
			
 
				+def dict_merge(dct, merge_dct):
			
 
				+    """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
			
 
				+    updating only top-level keys, dict_merge recurses down into dicts nested
			
 
				+    to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
			
 
				+    ``dct``.
			
 
				+
			
 
				+    Args:
			
 
				+        dct: dict onto which the merge is executed
			
 
				+        merge_dct: dct merged into dct
			
 
				+
			
 
				+    Returns: dct
			
 
				+    """
			
 
				+    for k, v in merge_dct.items():
			
 
				+        if (k in dct and isinstance(dct[k], dict) and
			
 
				+                isinstance(merge_dct[k], collectionsAbc.Mapping)):
			
 
				+            dict_merge(dct[k], merge_dct[k])
			
 
				+        else:
			
 
				+            dct[k] = merge_dct[k]
			
 
				+    return dct
			
 
				+
			
 
				+
			
 
				+def merge_config(config, another_cfg=None):
			
 
				+    """
			
 
				+    Merge config into global config or another_cfg.
			
 
				+
			
 
				+    Args:
			
 
				+        config (dict): Config to be merged.
			
 
				+
			
 
				+    Returns: global config
			
 
				+    """
			
 
				+    global global_config
			
 
				+    dct = another_cfg or global_config
			
 
				+    return dict_merge(dct, config)
			
 
				+
			
 
				+
			
 
				+def get_registered_modules():
			
 
				+    return {
			
 
				+        k: v
			
 
				+        for k, v in global_config.items() if isinstance(v, SchemaDict)
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def make_partial(cls):
			
 
				+    op_module = importlib.import_module(cls.__op__.__module__)
			
 
				+    op = getattr(op_module, cls.__op__.__name__)
			
 
				+    cls.__category__ = getattr(cls, '__category__', None) or 'op'
			
 
				+
			
 
				+    def partial_apply(self, *args, **kwargs):
			
 
				+        kwargs_ = self.__dict__.copy()
			
 
				+        kwargs_.update(kwargs)
			
 
				+        return op(*args, **kwargs_)
			
 
				+
			
 
				+    if getattr(cls, '__append_doc__', True):  # XXX should default to True?
			
 
				+        if sys.version_info[0] > 2:
			
 
				+            cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__)
			
 
				+            cls.__init__.__doc__ = op.__doc__
			
 
				+            cls.__call__ = partial_apply
			
 
				+            cls.__call__.__doc__ = op.__doc__
			
 
				+        else:
			
 
				+            # XXX work around for python 2
			
 
				+            partial_apply.__doc__ = op.__doc__
			
 
				+            cls.__call__ = partial_apply
			
 
				+    return cls
			
 
				+
			
 
				+
			
 
				+def register(cls):
			
 
				+    """
			
 
				+    Register a given module class.
			
 
				+
			
 
				+    Args:
			
 
				+        cls (type): Module class to be registered.
			
 
				+
			
 
				+    Returns: cls
			
 
				+    """
			
 
				+    if cls.__name__ in global_config:
			
 
				+        raise ValueError("Module class already registered: {}".format(
			
 
				+            cls.__name__))
			
 
				+    if hasattr(cls, '__op__'):
			
 
				+        cls = make_partial(cls)
			
 
				+    global_config[cls.__name__] = extract_schema(cls)
			
 
				+    return cls
			
 
				+
			
 
				+
			
 
				+def create(cls_or_name, **kwargs):
			
 
				+    """
			
 
				+    Create an instance of given module class.
			
 
				+
			
 
				+    Args:
			
 
				+        cls_or_name (type or str): Class of which to create instance.
			
 
				+
			
 
				+    Returns: instance of type `cls_or_name`
			
 
				+    """
			
 
				+    assert type(cls_or_name) in [type, str
			
 
				+                                 ], "should be a class or name of a class"
			
 
				+    name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
			
 
				+    assert name in global_config and \
			
 
				+        isinstance(global_config[name], SchemaDict), \
			
 
				+        "the module {} is not registered".format(name)
			
 
				+    config = global_config[name]
			
 
				+    cls = getattr(config.pymodule, name)
			
 
				+    cls_kwargs = {}
			
 
				+    cls_kwargs.update(global_config[name])
			
 
				+
			
 
				+    # parse `shared` annoation of registered modules
			
 
				+    if getattr(config, 'shared', None):
			
 
				+        for k in config.shared:
			
 
				+            target_key = config[k]
			
 
				+            shared_conf = config.schema[k].default
			
 
				+            assert isinstance(shared_conf, SharedConfig)
			
 
				+            if target_key is not None and not isinstance(target_key,
			
 
				+                                                         SharedConfig):
			
 
				+                continue  # value is given for the module
			
 
				+            elif shared_conf.key in global_config:
			
 
				+                # `key` is present in config
			
 
				+                cls_kwargs[k] = global_config[shared_conf.key]
			
 
				+            else:
			
 
				+                cls_kwargs[k] = shared_conf.default_value
			
 
				+
			
 
				+    # parse `inject` annoation of registered modules
			
 
				+    if getattr(cls, 'from_config', None):
			
 
				+        cls_kwargs.update(cls.from_config(config, **kwargs))
			
 
				+
			
 
				+    if getattr(config, 'inject', None):
			
 
				+        for k in config.inject:
			
 
				+            target_key = config[k]
			
 
				+            # optional dependency
			
 
				+            if target_key is None:
			
 
				+                continue
			
 
				+
			
 
				+            if isinstance(target_key, dict) or hasattr(target_key, '__dict__'):
			
 
				+                if 'name' not in target_key.keys():
			
 
				+                    continue
			
 
				+                inject_name = str(target_key['name'])
			
 
				+                if inject_name not in global_config:
			
 
				+                    raise ValueError(
			
 
				+                        "Missing injection name {} and check it's name in cfg file".
			
 
				+                        format(k))
			
 
				+                target = global_config[inject_name]
			
 
				+                for i, v in target_key.items():
			
 
				+                    if i == 'name':
			
 
				+                        continue
			
 
				+                    target[i] = v
			
 
				+                if isinstance(target, SchemaDict):
			
 
				+                    cls_kwargs[k] = create(inject_name)
			
 
				+            elif isinstance(target_key, str):
			
 
				+                if target_key not in global_config:
			
 
				+                    raise ValueError("Missing injection config:", target_key)
			
 
				+                target = global_config[target_key]
			
 
				+                if isinstance(target, SchemaDict):
			
 
				+                    cls_kwargs[k] = create(target_key)
			
 
				+                elif hasattr(target, '__dict__'):  # serialized object
			
 
				+                    cls_kwargs[k] = target
			
 
				+            else:
			
 
				+                raise ValueError("Unsupported injection type:", target_key)
			
 
				+    # prevent modification of global config values of reference types
			
 
				+    # (e.g., list, dict) from within the created module instances
			
 
				+    #kwargs = copy.deepcopy(kwargs)
			
 
				+    return cls(**cls_kwargs)
			
--- a/paddlers/models/ppdet/data/__init__.py
+++ b/paddlers/models/ppdet/data/__init__.py
@@ -0,0 +1,21 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import source
			
 
				+from . import transform
			
 
				+from . import reader
			
 
				+
			
 
				+from .source import *
			
 
				+from .transform import *
			
 
				+from .reader import *
			
--- a/paddlers/models/ppdet/data/crop_utils/__init__.py
+++ b/paddlers/models/ppdet/data/crop_utils/__init__.py
@@ -0,0 +1,13 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
--- a/paddlers/models/ppdet/data/crop_utils/annotation_cropper.py
+++ b/paddlers/models/ppdet/data/crop_utils/annotation_cropper.py
@@ -0,0 +1,585 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import copy
			
 
				+import math
			
 
				+import random
			
 
				+import numpy as np
			
 
				+from copy import deepcopy
			
 
				+from typing import List, Tuple
			
 
				+from collections import defaultdict
			
 
				+
			
 
				+from .chip_box_utils import nms, transform_chip_boxes2image_boxes
			
 
				+from .chip_box_utils import find_chips_to_cover_overlaped_boxes
			
 
				+from .chip_box_utils import transform_chip_box
			
 
				+from .chip_box_utils import intersection_over_box
			
 
				+
			
 
				+
			
 
				+class AnnoCropper(object):
			
 
				+    def __init__(self,
			
 
				+                 image_target_sizes: List[int],
			
 
				+                 valid_box_ratio_ranges: List[List[float]],
			
 
				+                 chip_target_size: int,
			
 
				+                 chip_target_stride: int,
			
 
				+                 use_neg_chip: bool=False,
			
 
				+                 max_neg_num_per_im: int=8,
			
 
				+                 max_per_img: int=-1,
			
 
				+                 nms_thresh: int=0.5):
			
 
				+        """
			
 
				+        Generate chips by chip_target_size and chip_target_stride.
			
 
				+        These two parameters just like kernel_size and stride in cnn.
			
 
				+
			
 
				+        Each image has its raw size. After resizing, then get its target size.
			
 
				+        The resizing scale = target_size / raw_size.
			
 
				+        So are chips of the image.
			
 
				+        box_ratio = box_raw_size / image_raw_size = box_target_size / image_target_size
			
 
				+        The 'size' above mentioned is the size of long-side of image, box or chip.
			
 
				+
			
 
				+        :param image_target_sizes: [2000, 1000]
			
 
				+        :param valid_box_ratio_ranges:  [[-1, 0.1],[0.08, -1]]
			
 
				+        :param chip_target_size: 500
			
 
				+        :param chip_target_stride: 200
			
 
				+        """
			
 
				+        self.target_sizes = image_target_sizes
			
 
				+        self.valid_box_ratio_ranges = valid_box_ratio_ranges
			
 
				+        assert len(self.target_sizes) == len(self.valid_box_ratio_ranges)
			
 
				+        self.scale_num = len(self.target_sizes)
			
 
				+        self.chip_target_size = chip_target_size  # is target size
			
 
				+        self.chip_target_stride = chip_target_stride  # is target stride
			
 
				+        self.use_neg_chip = use_neg_chip
			
 
				+        self.max_neg_num_per_im = max_neg_num_per_im
			
 
				+        self.max_per_img = max_per_img
			
 
				+        self.nms_thresh = nms_thresh
			
 
				+
			
 
				+    def crop_anno_records(self, records: List[dict]):
			
 
				+        """
			
 
				+        The main logic:
			
 
				+        # foreach record(image):
			
 
				+        #   foreach scale:
			
 
				+        #     1 generate chips by chip size and stride for each scale
			
 
				+        #     2 get pos chips
			
 
				+        #     - validate boxes: current scale; h,w >= 1
			
 
				+        #     - find pos chips greedily by valid gt boxes in each scale
			
 
				+        #     - for every valid gt box, find its corresponding pos chips in each scale
			
 
				+        #     3 get neg chips
			
 
				+        #     - If given proposals, find neg boxes in them which are not in pos chips
			
 
				+        #     - If got neg boxes in last step, we find neg chips and assign neg boxes to neg chips such as 2.
			
 
				+        # 4 sample neg chips if too much each image
			
 
				+        #   transform this image-scale annotations to chips(pos chips&neg chips) annotations
			
 
				+
			
 
				+        :param records, standard coco_record but with extra key `proposals`(Px4), which are predicted by stage1
			
 
				+                        model and maybe have neg boxes in them.
			
 
				+        :return: new_records, list of dict like
			
 
				+        {
			
 
				+            'im_file': 'fake_image1.jpg',
			
 
				+            'im_id': np.array([1]),  # new _global_chip_id as im_id
			
 
				+            'h': h,  # chip height
			
 
				+            'w': w,  # chip width
			
 
				+            'is_crowd': is_crowd,  # Nx1 -> Mx1
			
 
				+            'gt_class': gt_class,  # Nx1 -> Mx1
			
 
				+            'gt_bbox': gt_bbox,  # Nx4 -> Mx4, 4 represents [x1,y1,x2,y2]
			
 
				+            'gt_poly': gt_poly,  # [None]xN -> [None]xM
			
 
				+            'chip': [x1, y1, x2, y2]  # added
			
 
				+        }
			
 
				+
			
 
				+        Attention:
			
 
				+        ------------------------------>x
			
 
				+        |
			
 
				+        |    (x1,y1)------
			
 
				+        |       |        |
			
 
				+        |       |        |
			
 
				+        |       |        |
			
 
				+        |       |        |
			
 
				+        |       |        |
			
 
				+        |       ----------
			
 
				+        |                 (x2,y2)
			
 
				+        |
			
 
				+        ↓
			
 
				+        y
			
 
				+
			
 
				+        If we use [x1, y1, x2, y2] to represent boxes or chips,
			
 
				+        (x1,y1) is the left-top point which is in the box,
			
 
				+        but (x2,y2) is the right-bottom point which is not in the box.
			
 
				+        So x1 in [0, w-1], x2 in [1, w], y1 in [0, h-1], y2 in [1,h].
			
 
				+        And you can use x2-x1 to get width, and you can use image[y1:y2, x1:x2] to get the box area.
			
 
				+        """
			
 
				+
			
 
				+        self.chip_records = []
			
 
				+        self._global_chip_id = 1
			
 
				+        for r in records:
			
 
				+            self._cur_im_pos_chips = [
			
 
				+            ]  # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int]
			
 
				+            self._cur_im_neg_chips = []  # element: (chip, neg_box_num)
			
 
				+            for scale_i in range(self.scale_num):
			
 
				+                self._get_current_scale_parameters(scale_i, r)
			
 
				+
			
 
				+                # Cx4
			
 
				+                chips = self._create_chips(r['h'], r['w'], self._cur_scale)
			
 
				+
			
 
				+                # # dict: chipid->[box_id, ...]
			
 
				+                pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips(
			
 
				+                    r['gt_bbox'], chips)
			
 
				+
			
 
				+                # dict: chipid->neg_box_num
			
 
				+                neg_chip2box_num = self._get_neg_boxes_and_chips(
			
 
				+                    chips,
			
 
				+                    list(pos_chip2boxes_idx.keys()), r.get('proposals', None))
			
 
				+
			
 
				+                self._add_to_cur_im_chips(chips, pos_chip2boxes_idx,
			
 
				+                                          neg_chip2box_num)
			
 
				+
			
 
				+            cur_image_records = self._trans_all_chips2annotations(r)
			
 
				+            self.chip_records.extend(cur_image_records)
			
 
				+        return self.chip_records
			
 
				+
			
 
				+    def _add_to_cur_im_chips(self, chips, pos_chip2boxes_idx,
			
 
				+                             neg_chip2box_num):
			
 
				+        for pos_chipid, boxes_idx in pos_chip2boxes_idx.items():
			
 
				+            chip = np.array(chips[pos_chipid])  # copy chips slice
			
 
				+            self._cur_im_pos_chips.append((chip, boxes_idx))
			
 
				+
			
 
				+        if neg_chip2box_num is None:
			
 
				+            return
			
 
				+
			
 
				+        for neg_chipid, neg_box_num in neg_chip2box_num.items():
			
 
				+            chip = np.array(chips[neg_chipid])
			
 
				+            self._cur_im_neg_chips.append((chip, neg_box_num))
			
 
				+
			
 
				+    def _trans_all_chips2annotations(self, r):
			
 
				+        gt_bbox = r['gt_bbox']
			
 
				+        im_file = r['im_file']
			
 
				+        is_crowd = r['is_crowd']
			
 
				+        gt_class = r['gt_class']
			
 
				+        # gt_poly = r['gt_poly']   # [None]xN
			
 
				+        # remaining keys: im_id, h, w
			
 
				+        chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox,
			
 
				+                                                         is_crowd, gt_class)
			
 
				+
			
 
				+        if not self.use_neg_chip:
			
 
				+            return chip_records
			
 
				+
			
 
				+        sampled_neg_chips = self._sample_neg_chips()
			
 
				+        neg_chip_records = self._trans_neg_chips2annotations(im_file,
			
 
				+                                                             sampled_neg_chips)
			
 
				+        chip_records.extend(neg_chip_records)
			
 
				+        return chip_records
			
 
				+
			
 
				+    def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd,
			
 
				+                                     gt_class):
			
 
				+        chip_records = []
			
 
				+        for chip, boxes_idx in self._cur_im_pos_chips:
			
 
				+            chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx,
			
 
				+                                                            chip)
			
 
				+            x1, y1, x2, y2 = chip
			
 
				+            chip_h = y2 - y1
			
 
				+            chip_w = x2 - x1
			
 
				+            rec = {
			
 
				+                'im_file': im_file,
			
 
				+                'im_id': np.array([self._global_chip_id]),
			
 
				+                'h': chip_h,
			
 
				+                'w': chip_w,
			
 
				+                'gt_bbox': chip_bbox,
			
 
				+                'is_crowd': is_crowd[final_boxes_idx].copy(),
			
 
				+                'gt_class': gt_class[final_boxes_idx].copy(),
			
 
				+                # 'gt_poly': [None] * len(final_boxes_idx),
			
 
				+                'chip': chip
			
 
				+            }
			
 
				+            self._global_chip_id += 1
			
 
				+            chip_records.append(rec)
			
 
				+        return chip_records
			
 
				+
			
 
				+    def _sample_neg_chips(self):
			
 
				+        pos_num = len(self._cur_im_pos_chips)
			
 
				+        neg_num = len(self._cur_im_neg_chips)
			
 
				+        sample_num = min(pos_num + 2, self.max_neg_num_per_im)
			
 
				+        assert sample_num >= 1
			
 
				+        if neg_num <= sample_num:
			
 
				+            return self._cur_im_neg_chips
			
 
				+
			
 
				+        candidate_num = int(sample_num * 1.5)
			
 
				+        candidate_neg_chips = sorted(
			
 
				+            self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num]
			
 
				+        random.shuffle(candidate_neg_chips)
			
 
				+        sampled_neg_chips = candidate_neg_chips[:sample_num]
			
 
				+        return sampled_neg_chips
			
 
				+
			
 
				+    def _trans_neg_chips2annotations(self,
			
 
				+                                     im_file: str,
			
 
				+                                     sampled_neg_chips: List[Tuple]):
			
 
				+        chip_records = []
			
 
				+        for chip, neg_box_num in sampled_neg_chips:
			
 
				+            x1, y1, x2, y2 = chip
			
 
				+            chip_h = y2 - y1
			
 
				+            chip_w = x2 - x1
			
 
				+            rec = {
			
 
				+                'im_file': im_file,
			
 
				+                'im_id': np.array([self._global_chip_id]),
			
 
				+                'h': chip_h,
			
 
				+                'w': chip_w,
			
 
				+                'gt_bbox': np.zeros(
			
 
				+                    (0, 4), dtype=np.float32),
			
 
				+                'is_crowd': np.zeros(
			
 
				+                    (0, 1), dtype=np.int32),
			
 
				+                'gt_class': np.zeros(
			
 
				+                    (0, 1), dtype=np.int32),
			
 
				+                # 'gt_poly': [],
			
 
				+                'chip': chip
			
 
				+            }
			
 
				+            self._global_chip_id += 1
			
 
				+            chip_records.append(rec)
			
 
				+        return chip_records
			
 
				+
			
 
				+    def _get_current_scale_parameters(self, scale_i, r):
			
 
				+        im_size = max(r['h'], r['w'])
			
 
				+        im_target_size = self.target_sizes[scale_i]
			
 
				+        self._cur_im_size, self._cur_im_target_size = im_size, im_target_size
			
 
				+        self._cur_scale = self._get_current_scale(im_target_size, im_size)
			
 
				+        self._cur_valid_ratio_range = self.valid_box_ratio_ranges[scale_i]
			
 
				+
			
 
				+    def _get_current_scale(self, im_target_size, im_size):
			
 
				+        return im_target_size / im_size
			
 
				+
			
 
				+    def _create_chips(self, h: int, w: int, scale: float):
			
 
				+        """
			
 
				+        Generate chips by chip_target_size and chip_target_stride.
			
 
				+        These two parameters just like kernel_size and stride in cnn.
			
 
				+        :return: chips, Cx4, xy in raw size dimension
			
 
				+        """
			
 
				+        chip_size = self.chip_target_size  # omit target for simplicity
			
 
				+        stride = self.chip_target_stride
			
 
				+        width = int(scale * w)
			
 
				+        height = int(scale * h)
			
 
				+        min_chip_location_diff = 20  # in target size
			
 
				+
			
 
				+        assert chip_size >= stride
			
 
				+        chip_overlap = chip_size - stride
			
 
				+        if (width - chip_overlap
			
 
				+            ) % stride > min_chip_location_diff:  # 不能被stride整除的部分比较大，则保留
			
 
				+            w_steps = max(1, int(math.ceil((width - chip_overlap) / stride)))
			
 
				+        else:  # 不能被stride整除的部分比较小，则丢弃
			
 
				+            w_steps = max(1, int(math.floor((width - chip_overlap) / stride)))
			
 
				+        if (height - chip_overlap) % stride > min_chip_location_diff:
			
 
				+            h_steps = max(1, int(math.ceil((height - chip_overlap) / stride)))
			
 
				+        else:
			
 
				+            h_steps = max(1, int(math.floor((height - chip_overlap) / stride)))
			
 
				+
			
 
				+        chips = list()
			
 
				+        for j in range(h_steps):
			
 
				+            for i in range(w_steps):
			
 
				+                x1 = i * stride
			
 
				+                y1 = j * stride
			
 
				+                x2 = min(x1 + chip_size, width)
			
 
				+                y2 = min(y1 + chip_size, height)
			
 
				+                chips.append([x1, y1, x2, y2])
			
 
				+
			
 
				+        # check  chip size
			
 
				+        for item in chips:
			
 
				+            if item[2] - item[0] > chip_size * 1.1 or item[3] - item[
			
 
				+                    1] > chip_size * 1.1:
			
 
				+                raise ValueError(item)
			
 
				+        chips = np.array(chips, dtype=np.float)
			
 
				+
			
 
				+        raw_size_chips = chips / scale
			
 
				+        return raw_size_chips
			
 
				+
			
 
				+    def _get_valid_boxes_and_pos_chips(self, gt_bbox, chips):
			
 
				+        valid_ratio_range = self._cur_valid_ratio_range
			
 
				+        im_size = self._cur_im_size
			
 
				+        scale = self._cur_scale
			
 
				+        #   Nx4            N
			
 
				+        valid_boxes, valid_boxes_idx = self._validate_boxes(
			
 
				+            valid_ratio_range, im_size, gt_bbox, scale)
			
 
				+        # dict: chipid->[box_id, ...]
			
 
				+        pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes,
			
 
				+                                                  valid_boxes_idx)
			
 
				+        return pos_chip2boxes_idx
			
 
				+
			
 
				+    def _validate_boxes(self,
			
 
				+                        valid_ratio_range: List[float],
			
 
				+                        im_size: int,
			
 
				+                        gt_boxes: 'np.array of Nx4',
			
 
				+                        scale: float):
			
 
				+        """
			
 
				+        :return: valid_boxes: Nx4, valid_boxes_idx: N
			
 
				+        """
			
 
				+        ws = (gt_boxes[:, 2] - gt_boxes[:, 0]).astype(np.int32)
			
 
				+        hs = (gt_boxes[:, 3] - gt_boxes[:, 1]).astype(np.int32)
			
 
				+        maxs = np.maximum(ws, hs)
			
 
				+        box_ratio = maxs / im_size
			
 
				+        mins = np.minimum(ws, hs)
			
 
				+        target_mins = mins * scale
			
 
				+
			
 
				+        low = valid_ratio_range[0] if valid_ratio_range[0] > 0 else 0
			
 
				+        high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo(
			
 
				+            np.float).max
			
 
				+
			
 
				+        valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) &
			
 
				+                                     (target_mins >= 2))[0]
			
 
				+        valid_boxes = gt_boxes[valid_boxes_idx]
			
 
				+        return valid_boxes, valid_boxes_idx
			
 
				+
			
 
				+    def _find_pos_chips(self,
			
 
				+                        chips: 'Cx4',
			
 
				+                        valid_boxes: 'Bx4',
			
 
				+                        valid_boxes_idx: 'B'):
			
 
				+        """
			
 
				+        :return: pos_chip2boxes_idx, dict: chipid->[box_id, ...]
			
 
				+        """
			
 
				+        iob = intersection_over_box(chips, valid_boxes)  # overlap, CxB
			
 
				+
			
 
				+        iob_threshold_to_find_chips = 1.
			
 
				+        pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes(
			
 
				+            iob, iob_threshold_to_find_chips)
			
 
				+        pos_chip_ids = set(pos_chip_ids)
			
 
				+
			
 
				+        iob_threshold_to_assign_box = 0.5
			
 
				+        pos_chip2boxes_idx = self._assign_boxes_to_pos_chips(
			
 
				+            iob, iob_threshold_to_assign_box, pos_chip_ids, valid_boxes_idx)
			
 
				+        return pos_chip2boxes_idx
			
 
				+
			
 
				+    def _find_chips_to_cover_overlaped_boxes(self, iob, overlap_threshold):
			
 
				+        return find_chips_to_cover_overlaped_boxes(iob, overlap_threshold)
			
 
				+
			
 
				+    def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids,
			
 
				+                                   valid_boxes_idx):
			
 
				+        chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
			
 
				+        pos_chip2boxes_idx = defaultdict(list)
			
 
				+        for chip_id, box_id in zip(chip_ids, box_ids):
			
 
				+            if chip_id not in pos_chip_ids:
			
 
				+                continue
			
 
				+            raw_gt_box_idx = valid_boxes_idx[box_id]
			
 
				+            pos_chip2boxes_idx[chip_id].append(raw_gt_box_idx)
			
 
				+        return pos_chip2boxes_idx
			
 
				+
			
 
				+    def _get_neg_boxes_and_chips(self,
			
 
				+                                 chips: 'Cx4',
			
 
				+                                 pos_chip_ids: 'D',
			
 
				+                                 proposals: 'Px4'):
			
 
				+        """
			
 
				+        :param chips:
			
 
				+        :param pos_chip_ids:
			
 
				+        :param proposals:
			
 
				+        :return: neg_chip2box_num, None or dict: chipid->neg_box_num
			
 
				+        """
			
 
				+        if not self.use_neg_chip:
			
 
				+            return None
			
 
				+
			
 
				+        # train proposals maybe None
			
 
				+        if proposals is None or len(proposals) < 1:
			
 
				+            return None
			
 
				+
			
 
				+        valid_ratio_range = self._cur_valid_ratio_range
			
 
				+        im_size = self._cur_im_size
			
 
				+        scale = self._cur_scale
			
 
				+
			
 
				+        valid_props, _ = self._validate_boxes(valid_ratio_range, im_size,
			
 
				+                                              proposals, scale)
			
 
				+        neg_boxes = self._find_neg_boxes(chips, pos_chip_ids, valid_props)
			
 
				+        neg_chip2box_num = self._find_neg_chips(chips, pos_chip_ids, neg_boxes)
			
 
				+        return neg_chip2box_num
			
 
				+
			
 
				+    def _find_neg_boxes(self,
			
 
				+                        chips: 'Cx4',
			
 
				+                        pos_chip_ids: 'D',
			
 
				+                        valid_props: 'Px4'):
			
 
				+        """
			
 
				+        :return: neg_boxes: Nx4
			
 
				+        """
			
 
				+        if len(pos_chip_ids) == 0:
			
 
				+            return valid_props
			
 
				+
			
 
				+        pos_chips = chips[pos_chip_ids]
			
 
				+        iob = intersection_over_box(pos_chips, valid_props)
			
 
				+        overlap_per_prop = np.max(iob, axis=0)
			
 
				+        non_overlap_props_idx = overlap_per_prop < 0.5
			
 
				+        neg_boxes = valid_props[non_overlap_props_idx]
			
 
				+        return neg_boxes
			
 
				+
			
 
				+    def _find_neg_chips(self,
			
 
				+                        chips: 'Cx4',
			
 
				+                        pos_chip_ids: 'D',
			
 
				+                        neg_boxes: 'Nx4'):
			
 
				+        """
			
 
				+        :return: neg_chip2box_num, dict: chipid->neg_box_num
			
 
				+        """
			
 
				+        neg_chip_ids = np.setdiff1d(np.arange(len(chips)), pos_chip_ids)
			
 
				+        neg_chips = chips[neg_chip_ids]
			
 
				+
			
 
				+        iob = intersection_over_box(neg_chips, neg_boxes)
			
 
				+        iob_threshold_to_find_chips = 0.7
			
 
				+        chosen_neg_chip_ids, chip_id2overlap_box_num = \
			
 
				+            self._find_chips_to_cover_overlaped_boxes(iob, iob_threshold_to_find_chips)
			
 
				+
			
 
				+        neg_chipid2box_num = {}
			
 
				+        for cid in chosen_neg_chip_ids:
			
 
				+            box_num = chip_id2overlap_box_num[cid]
			
 
				+            raw_chip_id = neg_chip_ids[cid]
			
 
				+            neg_chipid2box_num[raw_chip_id] = box_num
			
 
				+        return neg_chipid2box_num
			
 
				+
			
 
				+    def crop_infer_anno_records(self, records: List[dict]):
			
 
				+        """
			
 
				+        transform image record to chips record
			
 
				+        :param records:
			
 
				+        :return: new_records, list of dict like
			
 
				+        {
			
 
				+            'im_file': 'fake_image1.jpg',
			
 
				+            'im_id': np.array([1]),  # new _global_chip_id as im_id
			
 
				+            'h': h,  # chip height
			
 
				+            'w': w,  # chip width
			
 
				+            'chip': [x1, y1, x2, y2]  # added
			
 
				+            'ori_im_h': ori_im_h  # added, origin image height
			
 
				+            'ori_im_w': ori_im_w  # added, origin image width
			
 
				+            'scale_i': 0  # added,
			
 
				+        }
			
 
				+        """
			
 
				+        self.chip_records = []
			
 
				+        self._global_chip_id = 1  # im_id start from 1
			
 
				+        self._global_chip_id2img_id = {}
			
 
				+
			
 
				+        for r in records:
			
 
				+            for scale_i in range(self.scale_num):
			
 
				+                self._get_current_scale_parameters(scale_i, r)
			
 
				+                # Cx4
			
 
				+                chips = self._create_chips(r['h'], r['w'], self._cur_scale)
			
 
				+                cur_img_chip_record = self._get_chips_records(r, chips,
			
 
				+                                                              scale_i)
			
 
				+                self.chip_records.extend(cur_img_chip_record)
			
 
				+
			
 
				+        return self.chip_records
			
 
				+
			
 
				+    def _get_chips_records(self, rec, chips, scale_i):
			
 
				+        cur_img_chip_records = []
			
 
				+        ori_im_h = rec["h"]
			
 
				+        ori_im_w = rec["w"]
			
 
				+        im_file = rec["im_file"]
			
 
				+        ori_im_id = rec["im_id"]
			
 
				+        for id, chip in enumerate(chips):
			
 
				+            chip_rec = {}
			
 
				+            x1, y1, x2, y2 = chip
			
 
				+            chip_h = y2 - y1
			
 
				+            chip_w = x2 - x1
			
 
				+            chip_rec["im_file"] = im_file
			
 
				+            chip_rec["im_id"] = self._global_chip_id
			
 
				+            chip_rec["h"] = chip_h
			
 
				+            chip_rec["w"] = chip_w
			
 
				+            chip_rec["chip"] = chip
			
 
				+            chip_rec["ori_im_h"] = ori_im_h
			
 
				+            chip_rec["ori_im_w"] = ori_im_w
			
 
				+            chip_rec["scale_i"] = scale_i
			
 
				+
			
 
				+            self._global_chip_id2img_id[self._global_chip_id] = int(ori_im_id)
			
 
				+            self._global_chip_id += 1
			
 
				+            cur_img_chip_records.append(chip_rec)
			
 
				+
			
 
				+        return cur_img_chip_records
			
 
				+
			
 
				+    def aggregate_chips_detections(self, results, records=None):
			
 
				+        """
			
 
				+        # 1. transform chip dets to image dets
			
 
				+        # 2. nms boxes per image;
			
 
				+        # 3. format output results
			
 
				+        :param results:
			
 
				+        :param roidb:
			
 
				+        :return:
			
 
				+        """
			
 
				+        results = deepcopy(results)
			
 
				+        records = records if records else self.chip_records
			
 
				+        img_id2bbox = self._transform_chip2image_bboxes(results, records)
			
 
				+        nms_img_id2bbox = self._nms_dets(img_id2bbox)
			
 
				+        aggregate_results = self._reformat_results(nms_img_id2bbox)
			
 
				+        return aggregate_results
			
 
				+
			
 
				+    def _transform_chip2image_bboxes(self, results, records):
			
 
				+        # 1. Transform chip dets to image dets;
			
 
				+        # 2. Filter valid range;
			
 
				+        # 3. Reformat and Aggregate chip dets to Get scale_cls_dets
			
 
				+        img_id2bbox = defaultdict(list)
			
 
				+        for result in results:
			
 
				+            bbox_locs = result['bbox']
			
 
				+            bbox_nums = result['bbox_num']
			
 
				+            if len(bbox_locs) == 1 and bbox_locs[0][
			
 
				+                    0] == -1:  # current batch has no detections
			
 
				+                # bbox_locs = array([[-1.]], dtype=float32); bbox_nums = [[1]]
			
 
				+                # MultiClassNMS output: If there is no detected boxes for all images, lod will be set to {1} and Out only contains one value which is -1.
			
 
				+                continue
			
 
				+            im_ids = result['im_id']  # replace with range(len(bbox_nums))
			
 
				+
			
 
				+            last_bbox_num = 0
			
 
				+            for idx, im_id in enumerate(im_ids):
			
 
				+
			
 
				+                cur_bbox_len = bbox_nums[idx]
			
 
				+                bboxes = bbox_locs[last_bbox_num:last_bbox_num + cur_bbox_len]
			
 
				+                last_bbox_num += cur_bbox_len
			
 
				+                # box: [num_id, score, xmin, ymin, xmax, ymax]
			
 
				+                if len(bboxes) == 0:  # current image has no detections
			
 
				+                    continue
			
 
				+
			
 
				+                chip_rec = records[int(im_id) -
			
 
				+                                   1]  # im_id starts from 1, type is np.int64
			
 
				+                image_size = max(chip_rec["ori_im_h"], chip_rec["ori_im_w"])
			
 
				+
			
 
				+                bboxes = transform_chip_boxes2image_boxes(
			
 
				+                    bboxes, chip_rec["chip"], chip_rec["ori_im_h"],
			
 
				+                    chip_rec["ori_im_w"])
			
 
				+
			
 
				+                scale_i = chip_rec["scale_i"]
			
 
				+                cur_scale = self._get_current_scale(self.target_sizes[scale_i],
			
 
				+                                                    image_size)
			
 
				+                _, valid_boxes_idx = self._validate_boxes(
			
 
				+                    self.valid_box_ratio_ranges[scale_i], image_size,
			
 
				+                    bboxes[:, 2:], cur_scale)
			
 
				+                ori_img_id = self._global_chip_id2img_id[int(im_id)]
			
 
				+
			
 
				+                img_id2bbox[ori_img_id].append(bboxes[valid_boxes_idx])
			
 
				+
			
 
				+        return img_id2bbox
			
 
				+
			
 
				+    def _nms_dets(self, img_id2bbox):
			
 
				+        # 1. NMS on each image-class
			
 
				+        # 2. Limit number of detections to MAX_PER_IMAGE if requested
			
 
				+        max_per_img = self.max_per_img
			
 
				+        nms_thresh = self.nms_thresh
			
 
				+
			
 
				+        for img_id in img_id2bbox:
			
 
				+            box = img_id2bbox[
			
 
				+                img_id]  # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
			
 
				+            box = np.concatenate(box, axis=0)
			
 
				+            nms_dets = nms(box, nms_thresh)
			
 
				+            if max_per_img > 0:
			
 
				+                if len(nms_dets) > max_per_img:
			
 
				+                    keep = np.argsort(-nms_dets[:, 1])[:max_per_img]
			
 
				+                    nms_dets = nms_dets[keep]
			
 
				+
			
 
				+            img_id2bbox[img_id] = nms_dets
			
 
				+
			
 
				+        return img_id2bbox
			
 
				+
			
 
				+    def _reformat_results(self, img_id2bbox):
			
 
				+        """reformat results"""
			
 
				+        im_ids = img_id2bbox.keys()
			
 
				+        results = []
			
 
				+        for img_id in im_ids:  # output by original im_id order
			
 
				+            if len(img_id2bbox[img_id]) == 0:
			
 
				+                bbox = np.array(
			
 
				+                    [[-1., 0., 0., 0., 0., 0.]])  # edge case: no detections
			
 
				+                bbox_num = np.array([0])
			
 
				+            else:
			
 
				+                # np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
			
 
				+                bbox = img_id2bbox[img_id]
			
 
				+                bbox_num = np.array([len(bbox)])
			
 
				+            res = dict(
			
 
				+                im_id=np.array([[img_id]]), bbox=bbox, bbox_num=bbox_num)
			
 
				+            results.append(res)
			
 
				+        return results
			
--- a/paddlers/models/ppdet/data/crop_utils/chip_box_utils.py
+++ b/paddlers/models/ppdet/data/crop_utils/chip_box_utils.py
@@ -0,0 +1,170 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+def bbox_area(boxes):
			
 
				+    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
			
 
				+
			
 
				+
			
 
				+def intersection_over_box(chips, boxes):
			
 
				+    """
			
 
				+    intersection area over box area
			
 
				+    :param chips:  C
			
 
				+    :param boxes:  B
			
 
				+    :return: iob, CxB
			
 
				+    """
			
 
				+    M = chips.shape[0]
			
 
				+    N = boxes.shape[0]
			
 
				+    if M * N == 0:
			
 
				+        return np.zeros([M, N], dtype='float32')
			
 
				+
			
 
				+    box_area = bbox_area(boxes)  # B
			
 
				+
			
 
				+    inter_x2y2 = np.minimum(np.expand_dims(chips, 1)[:, :, 2:],
			
 
				+                            boxes[:, 2:])  # CxBX2
			
 
				+    inter_x1y1 = np.maximum(np.expand_dims(chips, 1)[:, :, :2],
			
 
				+                            boxes[:, :2])  # CxBx2
			
 
				+    inter_wh = inter_x2y2 - inter_x1y1
			
 
				+    inter_wh = np.clip(inter_wh, a_min=0, a_max=None)
			
 
				+    inter_area = inter_wh[:, :, 0] * inter_wh[:, :, 1]  # CxB
			
 
				+
			
 
				+    iob = inter_area / np.expand_dims(box_area, 0)
			
 
				+    return iob
			
 
				+
			
 
				+
			
 
				+def clip_boxes(boxes, im_shape):
			
 
				+    """
			
 
				+    Clip boxes to image boundaries.
			
 
				+    :param boxes: [N, 4]
			
 
				+    :param im_shape: tuple of 2, [h, w]
			
 
				+    :return: [N, 4]
			
 
				+    """
			
 
				+    # x1 >= 0
			
 
				+    boxes[:, 0] = np.clip(boxes[:, 0], 0, im_shape[1] - 1)
			
 
				+    # y1 >= 0
			
 
				+    boxes[:, 1] = np.clip(boxes[:, 1], 0, im_shape[0] - 1)
			
 
				+    # x2 < im_shape[1]
			
 
				+    boxes[:, 2] = np.clip(boxes[:, 2], 1, im_shape[1])
			
 
				+    # y2 < im_shape[0]
			
 
				+    boxes[:, 3] = np.clip(boxes[:, 3], 1, im_shape[0])
			
 
				+    return boxes
			
 
				+
			
 
				+
			
 
				+def transform_chip_box(gt_bbox: 'Gx4', boxes_idx: 'B', chip: '4'):
			
 
				+    boxes_idx = np.array(boxes_idx)
			
 
				+    cur_gt_bbox = gt_bbox[boxes_idx].copy()  # Bx4
			
 
				+    x1, y1, x2, y2 = chip
			
 
				+    cur_gt_bbox[:, 0] -= x1
			
 
				+    cur_gt_bbox[:, 1] -= y1
			
 
				+    cur_gt_bbox[:, 2] -= x1
			
 
				+    cur_gt_bbox[:, 3] -= y1
			
 
				+    h = y2 - y1
			
 
				+    w = x2 - x1
			
 
				+    cur_gt_bbox = clip_boxes(cur_gt_bbox, (h, w))
			
 
				+    ws = (cur_gt_bbox[:, 2] - cur_gt_bbox[:, 0]).astype(np.int32)
			
 
				+    hs = (cur_gt_bbox[:, 3] - cur_gt_bbox[:, 1]).astype(np.int32)
			
 
				+    valid_idx = (ws >= 2) & (hs >= 2)
			
 
				+    return cur_gt_bbox[valid_idx], boxes_idx[valid_idx]
			
 
				+
			
 
				+
			
 
				+def find_chips_to_cover_overlaped_boxes(iob, overlap_threshold):
			
 
				+    chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
			
 
				+    chip_id2overlap_box_num = np.bincount(chip_ids)  # 1d array
			
 
				+    chip_id2overlap_box_num = np.pad(
			
 
				+        chip_id2overlap_box_num, (0, len(iob) - len(chip_id2overlap_box_num)),
			
 
				+        constant_values=0)
			
 
				+
			
 
				+    chosen_chip_ids = []
			
 
				+    while len(box_ids) > 0:
			
 
				+        value_counts = np.bincount(chip_ids)  # 1d array
			
 
				+        max_count_chip_id = np.argmax(value_counts)
			
 
				+        assert max_count_chip_id not in chosen_chip_ids
			
 
				+        chosen_chip_ids.append(max_count_chip_id)
			
 
				+
			
 
				+        box_ids_in_cur_chip = box_ids[chip_ids == max_count_chip_id]
			
 
				+        ids_not_in_cur_boxes_mask = np.logical_not(
			
 
				+            np.isin(box_ids, box_ids_in_cur_chip))
			
 
				+        chip_ids = chip_ids[ids_not_in_cur_boxes_mask]
			
 
				+        box_ids = box_ids[ids_not_in_cur_boxes_mask]
			
 
				+    return chosen_chip_ids, chip_id2overlap_box_num
			
 
				+
			
 
				+
			
 
				+def transform_chip_boxes2image_boxes(chip_boxes, chip, img_h, img_w):
			
 
				+    chip_boxes = np.array(sorted(chip_boxes, key=lambda item: -item[1]))
			
 
				+    xmin, ymin, _, _ = chip
			
 
				+    # Transform to origin image loc
			
 
				+    chip_boxes[:, 2] += xmin
			
 
				+    chip_boxes[:, 4] += xmin
			
 
				+    chip_boxes[:, 3] += ymin
			
 
				+    chip_boxes[:, 5] += ymin
			
 
				+    chip_boxes = clip_boxes(chip_boxes, (img_h, img_w))
			
 
				+    return chip_boxes
			
 
				+
			
 
				+
			
 
				+def nms(dets, thresh):
			
 
				+    """Apply classic DPM-style greedy NMS."""
			
 
				+    if dets.shape[0] == 0:
			
 
				+        return dets[[], :]
			
 
				+    scores = dets[:, 1]
			
 
				+    x1 = dets[:, 2]
			
 
				+    y1 = dets[:, 3]
			
 
				+    x2 = dets[:, 4]
			
 
				+    y2 = dets[:, 5]
			
 
				+
			
 
				+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
			
 
				+    order = scores.argsort()[::-1]
			
 
				+
			
 
				+    ndets = dets.shape[0]
			
 
				+    suppressed = np.zeros((ndets), dtype=np.int)
			
 
				+
			
 
				+    # nominal indices
			
 
				+    # _i, _j
			
 
				+    # sorted indices
			
 
				+    # i, j
			
 
				+    # temp variables for box i's (the box currently under consideration)
			
 
				+    # ix1, iy1, ix2, iy2, iarea
			
 
				+
			
 
				+    # variables for computing overlap with box j (lower scoring box)
			
 
				+    # xx1, yy1, xx2, yy2
			
 
				+    # w, h
			
 
				+    # inter, ovr
			
 
				+
			
 
				+    for _i in range(ndets):
			
 
				+        i = order[_i]
			
 
				+        if suppressed[i] == 1:
			
 
				+            continue
			
 
				+        ix1 = x1[i]
			
 
				+        iy1 = y1[i]
			
 
				+        ix2 = x2[i]
			
 
				+        iy2 = y2[i]
			
 
				+        iarea = areas[i]
			
 
				+        for _j in range(_i + 1, ndets):
			
 
				+            j = order[_j]
			
 
				+            if suppressed[j] == 1:
			
 
				+                continue
			
 
				+            xx1 = max(ix1, x1[j])
			
 
				+            yy1 = max(iy1, y1[j])
			
 
				+            xx2 = min(ix2, x2[j])
			
 
				+            yy2 = min(iy2, y2[j])
			
 
				+            w = max(0.0, xx2 - xx1 + 1)
			
 
				+            h = max(0.0, yy2 - yy1 + 1)
			
 
				+            inter = w * h
			
 
				+            ovr = inter / (iarea + areas[j] - inter)
			
 
				+            if ovr >= thresh:
			
 
				+                suppressed[j] = 1
			
 
				+    keep = np.where(suppressed == 0)[0]
			
 
				+    dets = dets[keep, :]
			
 
				+    return dets
			
--- a/paddlers/models/ppdet/data/reader.py
+++ b/paddlers/models/ppdet/data/reader.py
@@ -0,0 +1,302 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import traceback
			
 
				+import six
			
 
				+import sys
			
 
				+if sys.version_info >= (3, 0):
			
 
				+    pass
			
 
				+else:
			
 
				+    pass
			
 
				+import numpy as np
			
 
				+
			
 
				+from paddle.io import DataLoader, DistributedBatchSampler
			
 
				+from paddle.fluid.dataloader.collate import default_collate_fn
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+from . import transform
			
 
				+from .shm_utils import _get_shared_memory_size_in_M
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger('reader')
			
 
				+
			
 
				+MAIN_PID = os.getpid()
			
 
				+
			
 
				+
			
 
				+class Compose(object):
			
 
				+    def __init__(self, transforms, num_classes=80):
			
 
				+        self.transforms = transforms
			
 
				+        self.transforms_cls = []
			
 
				+        for t in self.transforms:
			
 
				+            for k, v in t.items():
			
 
				+                op_cls = getattr(transform, k)
			
 
				+                f = op_cls(**v)
			
 
				+                if hasattr(f, 'num_classes'):
			
 
				+                    f.num_classes = num_classes
			
 
				+
			
 
				+                self.transforms_cls.append(f)
			
 
				+
			
 
				+    def __call__(self, data):
			
 
				+        for f in self.transforms_cls:
			
 
				+            try:
			
 
				+                data = f(data)
			
 
				+            except Exception as e:
			
 
				+                stack_info = traceback.format_exc()
			
 
				+                logger.warning("fail to map sample transform [{}] "
			
 
				+                               "with error: {} and stack:\n{}".format(
			
 
				+                                   f, e, str(stack_info)))
			
 
				+                raise e
			
 
				+
			
 
				+        return data
			
 
				+
			
 
				+
			
 
				+class BatchCompose(Compose):
			
 
				+    def __init__(self, transforms, num_classes=80, collate_batch=True):
			
 
				+        super(BatchCompose, self).__init__(transforms, num_classes)
			
 
				+        self.collate_batch = collate_batch
			
 
				+
			
 
				+    def __call__(self, data):
			
 
				+        for f in self.transforms_cls:
			
 
				+            try:
			
 
				+                data = f(data)
			
 
				+            except Exception as e:
			
 
				+                stack_info = traceback.format_exc()
			
 
				+                logger.warning("fail to map batch transform [{}] "
			
 
				+                               "with error: {} and stack:\n{}".format(
			
 
				+                                   f, e, str(stack_info)))
			
 
				+                raise e
			
 
				+
			
 
				+        # remove keys which is not needed by model
			
 
				+        extra_key = ['h', 'w', 'flipped']
			
 
				+        for k in extra_key:
			
 
				+            for sample in data:
			
 
				+                if k in sample:
			
 
				+                    sample.pop(k)
			
 
				+
			
 
				+        # batch data, if user-define batch function needed
			
 
				+        # use user-defined here
			
 
				+        if self.collate_batch:
			
 
				+            batch_data = default_collate_fn(data)
			
 
				+        else:
			
 
				+            batch_data = {}
			
 
				+            for k in data[0].keys():
			
 
				+                tmp_data = []
			
 
				+                for i in range(len(data)):
			
 
				+                    tmp_data.append(data[i][k])
			
 
				+                if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
			
 
				+                    tmp_data = np.stack(tmp_data, axis=0)
			
 
				+                batch_data[k] = tmp_data
			
 
				+        return batch_data
			
 
				+
			
 
				+
			
 
				+class BaseDataLoader(object):
			
 
				+    """
			
 
				+    Base DataLoader implementation for detection models
			
 
				+
			
 
				+    Args:
			
 
				+        sample_transforms (list): a list of transforms to perform
			
 
				+                                  on each sample
			
 
				+        batch_transforms (list): a list of transforms to perform
			
 
				+                                 on batch
			
 
				+        batch_size (int): batch size for batch collating, default 1.
			
 
				+        shuffle (bool): whether to shuffle samples
			
 
				+        drop_last (bool): whether to drop the last incomplete,
			
 
				+                          default False
			
 
				+        num_classes (int): class number of dataset, default 80
			
 
				+        collate_batch (bool): whether to collate batch in dataloader.
			
 
				+            If set to True, the samples will collate into batch according
			
 
				+            to the batch size. Otherwise, the ground-truth will not collate,
			
 
				+            which is used when the number of ground-truch is different in
			
 
				+            samples.
			
 
				+        use_shared_memory (bool): whether to use shared memory to
			
 
				+                accelerate data loading, enable this only if you
			
 
				+                are sure that the shared memory size of your OS
			
 
				+                is larger than memory cost of input datas of model.
			
 
				+                Note that shared memory will be automatically
			
 
				+                disabled if the shared memory of OS is less than
			
 
				+                1G, which is not enough for detection models.
			
 
				+                Default False.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 sample_transforms=[],
			
 
				+                 batch_transforms=[],
			
 
				+                 batch_size=1,
			
 
				+                 shuffle=False,
			
 
				+                 drop_last=False,
			
 
				+                 num_classes=80,
			
 
				+                 collate_batch=True,
			
 
				+                 use_shared_memory=False,
			
 
				+                 **kwargs):
			
 
				+        # sample transform
			
 
				+        self._sample_transforms = Compose(
			
 
				+            sample_transforms, num_classes=num_classes)
			
 
				+
			
 
				+        # batch transfrom
			
 
				+        self._batch_transforms = BatchCompose(batch_transforms, num_classes,
			
 
				+                                              collate_batch)
			
 
				+        self.batch_size = batch_size
			
 
				+        self.shuffle = shuffle
			
 
				+        self.drop_last = drop_last
			
 
				+        self.use_shared_memory = use_shared_memory
			
 
				+        self.kwargs = kwargs
			
 
				+
			
 
				+    def __call__(self,
			
 
				+                 dataset,
			
 
				+                 worker_num,
			
 
				+                 batch_sampler=None,
			
 
				+                 return_list=False):
			
 
				+        self.dataset = dataset
			
 
				+        self.dataset.check_or_download_dataset()
			
 
				+        self.dataset.parse_dataset()
			
 
				+        # get data
			
 
				+        self.dataset.set_transform(self._sample_transforms)
			
 
				+        # set kwargs
			
 
				+        self.dataset.set_kwargs(**self.kwargs)
			
 
				+        # batch sampler
			
 
				+        if batch_sampler is None:
			
 
				+            self._batch_sampler = DistributedBatchSampler(
			
 
				+                self.dataset,
			
 
				+                batch_size=self.batch_size,
			
 
				+                shuffle=self.shuffle,
			
 
				+                drop_last=self.drop_last)
			
 
				+        else:
			
 
				+            self._batch_sampler = batch_sampler
			
 
				+
			
 
				+        # DataLoader do not start sub-process in Windows and Mac
			
 
				+        # system, do not need to use shared memory
			
 
				+        use_shared_memory = self.use_shared_memory and \
			
 
				+                            sys.platform not in ['win32', 'darwin']
			
 
				+        # check whether shared memory size is bigger than 1G(1024M)
			
 
				+        if use_shared_memory:
			
 
				+            shm_size = _get_shared_memory_size_in_M()
			
 
				+            if shm_size is not None and shm_size < 1024.:
			
 
				+                logger.warning("Shared memory size is less than 1G, "
			
 
				+                               "disable shared_memory in DataLoader")
			
 
				+                use_shared_memory = False
			
 
				+
			
 
				+        self.dataloader = DataLoader(
			
 
				+            dataset=self.dataset,
			
 
				+            batch_sampler=self._batch_sampler,
			
 
				+            collate_fn=self._batch_transforms,
			
 
				+            num_workers=worker_num,
			
 
				+            return_list=return_list,
			
 
				+            use_shared_memory=use_shared_memory)
			
 
				+        self.loader = iter(self.dataloader)
			
 
				+
			
 
				+        return self
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return len(self._batch_sampler)
			
 
				+
			
 
				+    def __iter__(self):
			
 
				+        return self
			
 
				+
			
 
				+    def __next__(self):
			
 
				+        try:
			
 
				+            return next(self.loader)
			
 
				+        except StopIteration:
			
 
				+            self.loader = iter(self.dataloader)
			
 
				+            six.reraise(*sys.exc_info())
			
 
				+
			
 
				+    def next(self):
			
 
				+        # python2 compatibility
			
 
				+        return self.__next__()
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TrainReader(BaseDataLoader):
			
 
				+    __shared__ = ['num_classes']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 sample_transforms=[],
			
 
				+                 batch_transforms=[],
			
 
				+                 batch_size=1,
			
 
				+                 shuffle=True,
			
 
				+                 drop_last=True,
			
 
				+                 num_classes=80,
			
 
				+                 collate_batch=True,
			
 
				+                 **kwargs):
			
 
				+        super(TrainReader, self).__init__(sample_transforms, batch_transforms,
			
 
				+                                          batch_size, shuffle, drop_last,
			
 
				+                                          num_classes, collate_batch, **kwargs)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class EvalReader(BaseDataLoader):
			
 
				+    __shared__ = ['num_classes']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 sample_transforms=[],
			
 
				+                 batch_transforms=[],
			
 
				+                 batch_size=1,
			
 
				+                 shuffle=False,
			
 
				+                 drop_last=True,
			
 
				+                 num_classes=80,
			
 
				+                 **kwargs):
			
 
				+        super(EvalReader, self).__init__(sample_transforms, batch_transforms,
			
 
				+                                         batch_size, shuffle, drop_last,
			
 
				+                                         num_classes, **kwargs)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TestReader(BaseDataLoader):
			
 
				+    __shared__ = ['num_classes']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 sample_transforms=[],
			
 
				+                 batch_transforms=[],
			
 
				+                 batch_size=1,
			
 
				+                 shuffle=False,
			
 
				+                 drop_last=False,
			
 
				+                 num_classes=80,
			
 
				+                 **kwargs):
			
 
				+        super(TestReader, self).__init__(sample_transforms, batch_transforms,
			
 
				+                                         batch_size, shuffle, drop_last,
			
 
				+                                         num_classes, **kwargs)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class EvalMOTReader(BaseDataLoader):
			
 
				+    __shared__ = ['num_classes']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 sample_transforms=[],
			
 
				+                 batch_transforms=[],
			
 
				+                 batch_size=1,
			
 
				+                 shuffle=False,
			
 
				+                 drop_last=False,
			
 
				+                 num_classes=1,
			
 
				+                 **kwargs):
			
 
				+        super(EvalMOTReader, self).__init__(
			
 
				+            sample_transforms, batch_transforms, batch_size, shuffle,
			
 
				+            drop_last, num_classes, **kwargs)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TestMOTReader(BaseDataLoader):
			
 
				+    __shared__ = ['num_classes']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 sample_transforms=[],
			
 
				+                 batch_transforms=[],
			
 
				+                 batch_size=1,
			
 
				+                 shuffle=False,
			
 
				+                 drop_last=False,
			
 
				+                 num_classes=1,
			
 
				+                 **kwargs):
			
 
				+        super(TestMOTReader, self).__init__(
			
 
				+            sample_transforms, batch_transforms, batch_size, shuffle,
			
 
				+            drop_last, num_classes, **kwargs)
			
--- a/paddlers/models/ppdet/data/shm_utils.py
+++ b/paddlers/models/ppdet/data/shm_utils.py
@@ -0,0 +1,67 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+
			
 
				+SIZE_UNIT = ['K', 'M', 'G', 'T']
			
 
				+SHM_QUERY_CMD = 'df -h'
			
 
				+SHM_KEY = 'shm'
			
 
				+SHM_DEFAULT_MOUNT = '/dev/shm'
			
 
				+
			
 
				+# [ shared memory size check ]
			
 
				+# In detection models, image/target data occupies a lot of memory, and
			
 
				+# will occupy lots of shared memory in multi-process DataLoader, we use
			
 
				+# following code to get shared memory size and perform a size check to
			
 
				+# disable shared memory use if shared memory size is not enough.
			
 
				+# Shared memory getting process as follows:
			
 
				+# 1. use `df -h` get all mount info
			
 
				+# 2. pick up spaces whose mount info contains 'shm'
			
 
				+# 3. if 'shm' space number is only 1, return its size
			
 
				+# 4. if there are multiple 'shm' space, try to find the default mount
			
 
				+#    directory '/dev/shm' is Linux-like system, otherwise return the
			
 
				+#    biggest space size.
			
 
				+
			
 
				+
			
 
				+def _parse_size_in_M(size_str):
			
 
				+    num, unit = size_str[:-1], size_str[-1]
			
 
				+    assert unit in SIZE_UNIT, \
			
 
				+            "unknown shm size unit {}".format(unit)
			
 
				+    return float(num) * \
			
 
				+            (1024 ** (SIZE_UNIT.index(unit) - 1))
			
 
				+
			
 
				+
			
 
				+def _get_shared_memory_size_in_M():
			
 
				+    try:
			
 
				+        df_infos = os.popen(SHM_QUERY_CMD).readlines()
			
 
				+    except:
			
 
				+        return None
			
 
				+    else:
			
 
				+        shm_infos = []
			
 
				+        for df_info in df_infos:
			
 
				+            info = df_info.strip()
			
 
				+            if info.find(SHM_KEY) >= 0:
			
 
				+                shm_infos.append(info.split())
			
 
				+
			
 
				+        if len(shm_infos) == 0:
			
 
				+            return None
			
 
				+        elif len(shm_infos) == 1:
			
 
				+            return _parse_size_in_M(shm_infos[0][3])
			
 
				+        else:
			
 
				+            default_mount_infos = [
			
 
				+                si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
			
 
				+            ]
			
 
				+            if default_mount_infos:
			
 
				+                return _parse_size_in_M(default_mount_infos[0][3])
			
 
				+            else:
			
 
				+                return max([_parse_size_in_M(si[3]) for si in shm_infos])
			
--- a/paddlers/models/ppdet/data/source/__init__.py
+++ b/paddlers/models/ppdet/data/source/__init__.py
@@ -0,0 +1,29 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import coco
			
 
				+from . import voc
			
 
				+from . import widerface
			
 
				+from . import category
			
 
				+from . import keypoint_coco
			
 
				+from . import mot
			
 
				+from . import sniper_coco
			
 
				+
			
 
				+from .coco import *
			
 
				+from .voc import *
			
 
				+from .widerface import *
			
 
				+from .category import *
			
 
				+from .keypoint_coco import *
			
 
				+from .mot import *
			
 
				+from .sniper_coco import SniperCOCODataSet
			
--- a/paddlers/models/ppdet/data/source/category.py
+++ b/paddlers/models/ppdet/data/source/category.py
@@ -0,0 +1,904 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+
			
 
				+from paddlers.models.ppdet.data.source.voc import pascalvoc_label
			
 
				+from paddlers.models.ppdet.data.source.widerface import widerface_label
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = ['get_categories']
			
 
				+
			
 
				+
			
 
				+def get_categories(metric_type, anno_file=None, arch=None):
			
 
				+    """
			
 
				+    Get class id to category id map and category id
			
 
				+    to category name map from annotation file.
			
 
				+
			
 
				+    Args:
			
 
				+        metric_type (str): metric type, currently support 'coco', 'voc', 'oid'
			
 
				+            and 'widerface'.
			
 
				+        anno_file (str): annotation file path
			
 
				+    """
			
 
				+    if arch == 'keypoint_arch':
			
 
				+        return (None, {'id': 'keypoint'})
			
 
				+
			
 
				+    if metric_type.lower() == 'coco' or metric_type.lower(
			
 
				+    ) == 'rbox' or metric_type.lower() == 'snipercoco':
			
 
				+        if anno_file and os.path.isfile(anno_file):
			
 
				+            # lazy import pycocotools here
			
 
				+            from pycocotools.coco import COCO
			
 
				+
			
 
				+            coco = COCO(anno_file)
			
 
				+            cats = coco.loadCats(coco.getCatIds())
			
 
				+
			
 
				+            clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
			
 
				+            catid2name = {cat['id']: cat['name'] for cat in cats}
			
 
				+            return clsid2catid, catid2name
			
 
				+
			
 
				+        # anno file not exist, load default categories of COCO17
			
 
				+        else:
			
 
				+            if metric_type.lower() == 'rbox':
			
 
				+                return _dota_category()
			
 
				+
			
 
				+            return _coco17_category()
			
 
				+
			
 
				+    elif metric_type.lower() == 'voc':
			
 
				+        if anno_file and os.path.isfile(anno_file):
			
 
				+            cats = []
			
 
				+            with open(anno_file) as f:
			
 
				+                for line in f.readlines():
			
 
				+                    cats.append(line.strip())
			
 
				+
			
 
				+            if cats[0] == 'background':
			
 
				+                cats = cats[1:]
			
 
				+
			
 
				+            clsid2catid = {i: i for i in range(len(cats))}
			
 
				+            catid2name = {i: name for i, name in enumerate(cats)}
			
 
				+
			
 
				+            return clsid2catid, catid2name
			
 
				+
			
 
				+        # anno file not exist, load default categories of
			
 
				+        # VOC all 20 categories
			
 
				+        else:
			
 
				+            return _vocall_category()
			
 
				+
			
 
				+    elif metric_type.lower() == 'oid':
			
 
				+        if anno_file and os.path.isfile(anno_file):
			
 
				+            logger.warning("only default categories support for OID19")
			
 
				+        return _oid19_category()
			
 
				+
			
 
				+    elif metric_type.lower() == 'widerface':
			
 
				+        return _widerface_category()
			
 
				+
			
 
				+    elif metric_type.lower() == 'keypointtopdowncocoeval' or metric_type.lower(
			
 
				+    ) == 'keypointtopdownmpiieval':
			
 
				+        return (None, {'id': 'keypoint'})
			
 
				+
			
 
				+    elif metric_type.lower() in ['mot', 'motdet', 'reid']:
			
 
				+        if anno_file and os.path.isfile(anno_file):
			
 
				+            cats = []
			
 
				+            with open(anno_file) as f:
			
 
				+                for line in f.readlines():
			
 
				+                    cats.append(line.strip())
			
 
				+            if cats[0] == 'background':
			
 
				+                cats = cats[1:]
			
 
				+            clsid2catid = {i: i for i in range(len(cats))}
			
 
				+            catid2name = {i: name for i, name in enumerate(cats)}
			
 
				+            return clsid2catid, catid2name
			
 
				+        # anno file not exist, load default category 'pedestrian'.
			
 
				+        else:
			
 
				+            return _mot_category(category='pedestrian')
			
 
				+
			
 
				+    elif metric_type.lower() in ['kitti', 'bdd100kmot']:
			
 
				+        return _mot_category(category='vehicle')
			
 
				+
			
 
				+    elif metric_type.lower() in ['mcmot']:
			
 
				+        if anno_file and os.path.isfile(anno_file):
			
 
				+            cats = []
			
 
				+            with open(anno_file) as f:
			
 
				+                for line in f.readlines():
			
 
				+                    cats.append(line.strip())
			
 
				+            if cats[0] == 'background':
			
 
				+                cats = cats[1:]
			
 
				+            clsid2catid = {i: i for i in range(len(cats))}
			
 
				+            catid2name = {i: name for i, name in enumerate(cats)}
			
 
				+            return clsid2catid, catid2name
			
 
				+        # anno file not exist, load default categories of visdrone all 10 categories
			
 
				+        else:
			
 
				+            return _visdrone_category()
			
 
				+
			
 
				+    else:
			
 
				+        raise ValueError("unknown metric type {}".format(metric_type))
			
 
				+
			
 
				+
			
 
				+def _mot_category(category='pedestrian'):
			
 
				+    """
			
 
				+    Get class id to category id map and category id
			
 
				+    to category name map of mot dataset
			
 
				+    """
			
 
				+    label_map = {category: 0}
			
 
				+    label_map = sorted(label_map.items(), key=lambda x: x[1])
			
 
				+    cats = [l[0] for l in label_map]
			
 
				+
			
 
				+    clsid2catid = {i: i for i in range(len(cats))}
			
 
				+    catid2name = {i: name for i, name in enumerate(cats)}
			
 
				+
			
 
				+    return clsid2catid, catid2name
			
 
				+
			
 
				+
			
 
				+def _coco17_category():
			
 
				+    """
			
 
				+    Get class id to category id map and category id
			
 
				+    to category name map of COCO2017 dataset
			
 
				+
			
 
				+    """
			
 
				+    clsid2catid = {
			
 
				+        1: 1,
			
 
				+        2: 2,
			
 
				+        3: 3,
			
 
				+        4: 4,
			
 
				+        5: 5,
			
 
				+        6: 6,
			
 
				+        7: 7,
			
 
				+        8: 8,
			
 
				+        9: 9,
			
 
				+        10: 10,
			
 
				+        11: 11,
			
 
				+        12: 13,
			
 
				+        13: 14,
			
 
				+        14: 15,
			
 
				+        15: 16,
			
 
				+        16: 17,
			
 
				+        17: 18,
			
 
				+        18: 19,
			
 
				+        19: 20,
			
 
				+        20: 21,
			
 
				+        21: 22,
			
 
				+        22: 23,
			
 
				+        23: 24,
			
 
				+        24: 25,
			
 
				+        25: 27,
			
 
				+        26: 28,
			
 
				+        27: 31,
			
 
				+        28: 32,
			
 
				+        29: 33,
			
 
				+        30: 34,
			
 
				+        31: 35,
			
 
				+        32: 36,
			
 
				+        33: 37,
			
 
				+        34: 38,
			
 
				+        35: 39,
			
 
				+        36: 40,
			
 
				+        37: 41,
			
 
				+        38: 42,
			
 
				+        39: 43,
			
 
				+        40: 44,
			
 
				+        41: 46,
			
 
				+        42: 47,
			
 
				+        43: 48,
			
 
				+        44: 49,
			
 
				+        45: 50,
			
 
				+        46: 51,
			
 
				+        47: 52,
			
 
				+        48: 53,
			
 
				+        49: 54,
			
 
				+        50: 55,
			
 
				+        51: 56,
			
 
				+        52: 57,
			
 
				+        53: 58,
			
 
				+        54: 59,
			
 
				+        55: 60,
			
 
				+        56: 61,
			
 
				+        57: 62,
			
 
				+        58: 63,
			
 
				+        59: 64,
			
 
				+        60: 65,
			
 
				+        61: 67,
			
 
				+        62: 70,
			
 
				+        63: 72,
			
 
				+        64: 73,
			
 
				+        65: 74,
			
 
				+        66: 75,
			
 
				+        67: 76,
			
 
				+        68: 77,
			
 
				+        69: 78,
			
 
				+        70: 79,
			
 
				+        71: 80,
			
 
				+        72: 81,
			
 
				+        73: 82,
			
 
				+        74: 84,
			
 
				+        75: 85,
			
 
				+        76: 86,
			
 
				+        77: 87,
			
 
				+        78: 88,
			
 
				+        79: 89,
			
 
				+        80: 90
			
 
				+    }
			
 
				+
			
 
				+    catid2name = {
			
 
				+        0: 'background',
			
 
				+        1: 'person',
			
 
				+        2: 'bicycle',
			
 
				+        3: 'car',
			
 
				+        4: 'motorcycle',
			
 
				+        5: 'airplane',
			
 
				+        6: 'bus',
			
 
				+        7: 'train',
			
 
				+        8: 'truck',
			
 
				+        9: 'boat',
			
 
				+        10: 'traffic light',
			
 
				+        11: 'fire hydrant',
			
 
				+        13: 'stop sign',
			
 
				+        14: 'parking meter',
			
 
				+        15: 'bench',
			
 
				+        16: 'bird',
			
 
				+        17: 'cat',
			
 
				+        18: 'dog',
			
 
				+        19: 'horse',
			
 
				+        20: 'sheep',
			
 
				+        21: 'cow',
			
 
				+        22: 'elephant',
			
 
				+        23: 'bear',
			
 
				+        24: 'zebra',
			
 
				+        25: 'giraffe',
			
 
				+        27: 'backpack',
			
 
				+        28: 'umbrella',
			
 
				+        31: 'handbag',
			
 
				+        32: 'tie',
			
 
				+        33: 'suitcase',
			
 
				+        34: 'frisbee',
			
 
				+        35: 'skis',
			
 
				+        36: 'snowboard',
			
 
				+        37: 'sports ball',
			
 
				+        38: 'kite',
			
 
				+        39: 'baseball bat',
			
 
				+        40: 'baseball glove',
			
 
				+        41: 'skateboard',
			
 
				+        42: 'surfboard',
			
 
				+        43: 'tennis racket',
			
 
				+        44: 'bottle',
			
 
				+        46: 'wine glass',
			
 
				+        47: 'cup',
			
 
				+        48: 'fork',
			
 
				+        49: 'knife',
			
 
				+        50: 'spoon',
			
 
				+        51: 'bowl',
			
 
				+        52: 'banana',
			
 
				+        53: 'apple',
			
 
				+        54: 'sandwich',
			
 
				+        55: 'orange',
			
 
				+        56: 'broccoli',
			
 
				+        57: 'carrot',
			
 
				+        58: 'hot dog',
			
 
				+        59: 'pizza',
			
 
				+        60: 'donut',
			
 
				+        61: 'cake',
			
 
				+        62: 'chair',
			
 
				+        63: 'couch',
			
 
				+        64: 'potted plant',
			
 
				+        65: 'bed',
			
 
				+        67: 'dining table',
			
 
				+        70: 'toilet',
			
 
				+        72: 'tv',
			
 
				+        73: 'laptop',
			
 
				+        74: 'mouse',
			
 
				+        75: 'remote',
			
 
				+        76: 'keyboard',
			
 
				+        77: 'cell phone',
			
 
				+        78: 'microwave',
			
 
				+        79: 'oven',
			
 
				+        80: 'toaster',
			
 
				+        81: 'sink',
			
 
				+        82: 'refrigerator',
			
 
				+        84: 'book',
			
 
				+        85: 'clock',
			
 
				+        86: 'vase',
			
 
				+        87: 'scissors',
			
 
				+        88: 'teddy bear',
			
 
				+        89: 'hair drier',
			
 
				+        90: 'toothbrush'
			
 
				+    }
			
 
				+
			
 
				+    clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
			
 
				+    catid2name.pop(0)
			
 
				+
			
 
				+    return clsid2catid, catid2name
			
 
				+
			
 
				+
			
 
				+def _dota_category():
			
 
				+    """
			
 
				+    Get class id to category id map and category id
			
 
				+    to category name map of dota dataset
			
 
				+    """
			
 
				+    catid2name = {
			
 
				+        0: 'background',
			
 
				+        1: 'plane',
			
 
				+        2: 'baseball-diamond',
			
 
				+        3: 'bridge',
			
 
				+        4: 'ground-track-field',
			
 
				+        5: 'small-vehicle',
			
 
				+        6: 'large-vehicle',
			
 
				+        7: 'ship',
			
 
				+        8: 'tennis-court',
			
 
				+        9: 'basketball-court',
			
 
				+        10: 'storage-tank',
			
 
				+        11: 'soccer-ball-field',
			
 
				+        12: 'roundabout',
			
 
				+        13: 'harbor',
			
 
				+        14: 'swimming-pool',
			
 
				+        15: 'helicopter'
			
 
				+    }
			
 
				+    catid2name.pop(0)
			
 
				+    clsid2catid = {i: i + 1 for i in range(len(catid2name))}
			
 
				+    return clsid2catid, catid2name
			
 
				+
			
 
				+
			
 
				+def _vocall_category():
			
 
				+    """
			
 
				+    Get class id to category id map and category id
			
 
				+    to category name map of mixup voc dataset
			
 
				+
			
 
				+    """
			
 
				+    label_map = pascalvoc_label()
			
 
				+    label_map = sorted(label_map.items(), key=lambda x: x[1])
			
 
				+    cats = [l[0] for l in label_map]
			
 
				+
			
 
				+    clsid2catid = {i: i for i in range(len(cats))}
			
 
				+    catid2name = {i: name for i, name in enumerate(cats)}
			
 
				+
			
 
				+    return clsid2catid, catid2name
			
 
				+
			
 
				+
			
 
				+def _widerface_category():
			
 
				+    label_map = widerface_label()
			
 
				+    label_map = sorted(label_map.items(), key=lambda x: x[1])
			
 
				+    cats = [l[0] for l in label_map]
			
 
				+    clsid2catid = {i: i for i in range(len(cats))}
			
 
				+    catid2name = {i: name for i, name in enumerate(cats)}
			
 
				+
			
 
				+    return clsid2catid, catid2name
			
 
				+
			
 
				+
			
 
				+def _oid19_category():
			
 
				+    clsid2catid = {k: k + 1 for k in range(500)}
			
 
				+
			
 
				+    catid2name = {
			
 
				+        0: "background",
			
 
				+        1: "Infant bed",
			
 
				+        2: "Rose",
			
 
				+        3: "Flag",
			
 
				+        4: "Flashlight",
			
 
				+        5: "Sea turtle",
			
 
				+        6: "Camera",
			
 
				+        7: "Animal",
			
 
				+        8: "Glove",
			
 
				+        9: "Crocodile",
			
 
				+        10: "Cattle",
			
 
				+        11: "House",
			
 
				+        12: "Guacamole",
			
 
				+        13: "Penguin",
			
 
				+        14: "Vehicle registration plate",
			
 
				+        15: "Bench",
			
 
				+        16: "Ladybug",
			
 
				+        17: "Human nose",
			
 
				+        18: "Watermelon",
			
 
				+        19: "Flute",
			
 
				+        20: "Butterfly",
			
 
				+        21: "Washing machine",
			
 
				+        22: "Raccoon",
			
 
				+        23: "Segway",
			
 
				+        24: "Taco",
			
 
				+        25: "Jellyfish",
			
 
				+        26: "Cake",
			
 
				+        27: "Pen",
			
 
				+        28: "Cannon",
			
 
				+        29: "Bread",
			
 
				+        30: "Tree",
			
 
				+        31: "Shellfish",
			
 
				+        32: "Bed",
			
 
				+        33: "Hamster",
			
 
				+        34: "Hat",
			
 
				+        35: "Toaster",
			
 
				+        36: "Sombrero",
			
 
				+        37: "Tiara",
			
 
				+        38: "Bowl",
			
 
				+        39: "Dragonfly",
			
 
				+        40: "Moths and butterflies",
			
 
				+        41: "Antelope",
			
 
				+        42: "Vegetable",
			
 
				+        43: "Torch",
			
 
				+        44: "Building",
			
 
				+        45: "Power plugs and sockets",
			
 
				+        46: "Blender",
			
 
				+        47: "Billiard table",
			
 
				+        48: "Cutting board",
			
 
				+        49: "Bronze sculpture",
			
 
				+        50: "Turtle",
			
 
				+        51: "Broccoli",
			
 
				+        52: "Tiger",
			
 
				+        53: "Mirror",
			
 
				+        54: "Bear",
			
 
				+        55: "Zucchini",
			
 
				+        56: "Dress",
			
 
				+        57: "Volleyball",
			
 
				+        58: "Guitar",
			
 
				+        59: "Reptile",
			
 
				+        60: "Golf cart",
			
 
				+        61: "Tart",
			
 
				+        62: "Fedora",
			
 
				+        63: "Carnivore",
			
 
				+        64: "Car",
			
 
				+        65: "Lighthouse",
			
 
				+        66: "Coffeemaker",
			
 
				+        67: "Food processor",
			
 
				+        68: "Truck",
			
 
				+        69: "Bookcase",
			
 
				+        70: "Surfboard",
			
 
				+        71: "Footwear",
			
 
				+        72: "Bench",
			
 
				+        73: "Necklace",
			
 
				+        74: "Flower",
			
 
				+        75: "Radish",
			
 
				+        76: "Marine mammal",
			
 
				+        77: "Frying pan",
			
 
				+        78: "Tap",
			
 
				+        79: "Peach",
			
 
				+        80: "Knife",
			
 
				+        81: "Handbag",
			
 
				+        82: "Laptop",
			
 
				+        83: "Tent",
			
 
				+        84: "Ambulance",
			
 
				+        85: "Christmas tree",
			
 
				+        86: "Eagle",
			
 
				+        87: "Limousine",
			
 
				+        88: "Kitchen & dining room table",
			
 
				+        89: "Polar bear",
			
 
				+        90: "Tower",
			
 
				+        91: "Football",
			
 
				+        92: "Willow",
			
 
				+        93: "Human head",
			
 
				+        94: "Stop sign",
			
 
				+        95: "Banana",
			
 
				+        96: "Mixer",
			
 
				+        97: "Binoculars",
			
 
				+        98: "Dessert",
			
 
				+        99: "Bee",
			
 
				+        100: "Chair",
			
 
				+        101: "Wood-burning stove",
			
 
				+        102: "Flowerpot",
			
 
				+        103: "Beaker",
			
 
				+        104: "Oyster",
			
 
				+        105: "Woodpecker",
			
 
				+        106: "Harp",
			
 
				+        107: "Bathtub",
			
 
				+        108: "Wall clock",
			
 
				+        109: "Sports uniform",
			
 
				+        110: "Rhinoceros",
			
 
				+        111: "Beehive",
			
 
				+        112: "Cupboard",
			
 
				+        113: "Chicken",
			
 
				+        114: "Man",
			
 
				+        115: "Blue jay",
			
 
				+        116: "Cucumber",
			
 
				+        117: "Balloon",
			
 
				+        118: "Kite",
			
 
				+        119: "Fireplace",
			
 
				+        120: "Lantern",
			
 
				+        121: "Missile",
			
 
				+        122: "Book",
			
 
				+        123: "Spoon",
			
 
				+        124: "Grapefruit",
			
 
				+        125: "Squirrel",
			
 
				+        126: "Orange",
			
 
				+        127: "Coat",
			
 
				+        128: "Punching bag",
			
 
				+        129: "Zebra",
			
 
				+        130: "Billboard",
			
 
				+        131: "Bicycle",
			
 
				+        132: "Door handle",
			
 
				+        133: "Mechanical fan",
			
 
				+        134: "Ring binder",
			
 
				+        135: "Table",
			
 
				+        136: "Parrot",
			
 
				+        137: "Sock",
			
 
				+        138: "Vase",
			
 
				+        139: "Weapon",
			
 
				+        140: "Shotgun",
			
 
				+        141: "Glasses",
			
 
				+        142: "Seahorse",
			
 
				+        143: "Belt",
			
 
				+        144: "Watercraft",
			
 
				+        145: "Window",
			
 
				+        146: "Giraffe",
			
 
				+        147: "Lion",
			
 
				+        148: "Tire",
			
 
				+        149: "Vehicle",
			
 
				+        150: "Canoe",
			
 
				+        151: "Tie",
			
 
				+        152: "Shelf",
			
 
				+        153: "Picture frame",
			
 
				+        154: "Printer",
			
 
				+        155: "Human leg",
			
 
				+        156: "Boat",
			
 
				+        157: "Slow cooker",
			
 
				+        158: "Croissant",
			
 
				+        159: "Candle",
			
 
				+        160: "Pancake",
			
 
				+        161: "Pillow",
			
 
				+        162: "Coin",
			
 
				+        163: "Stretcher",
			
 
				+        164: "Sandal",
			
 
				+        165: "Woman",
			
 
				+        166: "Stairs",
			
 
				+        167: "Harpsichord",
			
 
				+        168: "Stool",
			
 
				+        169: "Bus",
			
 
				+        170: "Suitcase",
			
 
				+        171: "Human mouth",
			
 
				+        172: "Juice",
			
 
				+        173: "Skull",
			
 
				+        174: "Door",
			
 
				+        175: "Violin",
			
 
				+        176: "Chopsticks",
			
 
				+        177: "Digital clock",
			
 
				+        178: "Sunflower",
			
 
				+        179: "Leopard",
			
 
				+        180: "Bell pepper",
			
 
				+        181: "Harbor seal",
			
 
				+        182: "Snake",
			
 
				+        183: "Sewing machine",
			
 
				+        184: "Goose",
			
 
				+        185: "Helicopter",
			
 
				+        186: "Seat belt",
			
 
				+        187: "Coffee cup",
			
 
				+        188: "Microwave oven",
			
 
				+        189: "Hot dog",
			
 
				+        190: "Countertop",
			
 
				+        191: "Serving tray",
			
 
				+        192: "Dog bed",
			
 
				+        193: "Beer",
			
 
				+        194: "Sunglasses",
			
 
				+        195: "Golf ball",
			
 
				+        196: "Waffle",
			
 
				+        197: "Palm tree",
			
 
				+        198: "Trumpet",
			
 
				+        199: "Ruler",
			
 
				+        200: "Helmet",
			
 
				+        201: "Ladder",
			
 
				+        202: "Office building",
			
 
				+        203: "Tablet computer",
			
 
				+        204: "Toilet paper",
			
 
				+        205: "Pomegranate",
			
 
				+        206: "Skirt",
			
 
				+        207: "Gas stove",
			
 
				+        208: "Cookie",
			
 
				+        209: "Cart",
			
 
				+        210: "Raven",
			
 
				+        211: "Egg",
			
 
				+        212: "Burrito",
			
 
				+        213: "Goat",
			
 
				+        214: "Kitchen knife",
			
 
				+        215: "Skateboard",
			
 
				+        216: "Salt and pepper shakers",
			
 
				+        217: "Lynx",
			
 
				+        218: "Boot",
			
 
				+        219: "Platter",
			
 
				+        220: "Ski",
			
 
				+        221: "Swimwear",
			
 
				+        222: "Swimming pool",
			
 
				+        223: "Drinking straw",
			
 
				+        224: "Wrench",
			
 
				+        225: "Drum",
			
 
				+        226: "Ant",
			
 
				+        227: "Human ear",
			
 
				+        228: "Headphones",
			
 
				+        229: "Fountain",
			
 
				+        230: "Bird",
			
 
				+        231: "Jeans",
			
 
				+        232: "Television",
			
 
				+        233: "Crab",
			
 
				+        234: "Microphone",
			
 
				+        235: "Home appliance",
			
 
				+        236: "Snowplow",
			
 
				+        237: "Beetle",
			
 
				+        238: "Artichoke",
			
 
				+        239: "Jet ski",
			
 
				+        240: "Stationary bicycle",
			
 
				+        241: "Human hair",
			
 
				+        242: "Brown bear",
			
 
				+        243: "Starfish",
			
 
				+        244: "Fork",
			
 
				+        245: "Lobster",
			
 
				+        246: "Corded phone",
			
 
				+        247: "Drink",
			
 
				+        248: "Saucer",
			
 
				+        249: "Carrot",
			
 
				+        250: "Insect",
			
 
				+        251: "Clock",
			
 
				+        252: "Castle",
			
 
				+        253: "Tennis racket",
			
 
				+        254: "Ceiling fan",
			
 
				+        255: "Asparagus",
			
 
				+        256: "Jaguar",
			
 
				+        257: "Musical instrument",
			
 
				+        258: "Train",
			
 
				+        259: "Cat",
			
 
				+        260: "Rifle",
			
 
				+        261: "Dumbbell",
			
 
				+        262: "Mobile phone",
			
 
				+        263: "Taxi",
			
 
				+        264: "Shower",
			
 
				+        265: "Pitcher",
			
 
				+        266: "Lemon",
			
 
				+        267: "Invertebrate",
			
 
				+        268: "Turkey",
			
 
				+        269: "High heels",
			
 
				+        270: "Bust",
			
 
				+        271: "Elephant",
			
 
				+        272: "Scarf",
			
 
				+        273: "Barrel",
			
 
				+        274: "Trombone",
			
 
				+        275: "Pumpkin",
			
 
				+        276: "Box",
			
 
				+        277: "Tomato",
			
 
				+        278: "Frog",
			
 
				+        279: "Bidet",
			
 
				+        280: "Human face",
			
 
				+        281: "Houseplant",
			
 
				+        282: "Van",
			
 
				+        283: "Shark",
			
 
				+        284: "Ice cream",
			
 
				+        285: "Swim cap",
			
 
				+        286: "Falcon",
			
 
				+        287: "Ostrich",
			
 
				+        288: "Handgun",
			
 
				+        289: "Whiteboard",
			
 
				+        290: "Lizard",
			
 
				+        291: "Pasta",
			
 
				+        292: "Snowmobile",
			
 
				+        293: "Light bulb",
			
 
				+        294: "Window blind",
			
 
				+        295: "Muffin",
			
 
				+        296: "Pretzel",
			
 
				+        297: "Computer monitor",
			
 
				+        298: "Horn",
			
 
				+        299: "Furniture",
			
 
				+        300: "Sandwich",
			
 
				+        301: "Fox",
			
 
				+        302: "Convenience store",
			
 
				+        303: "Fish",
			
 
				+        304: "Fruit",
			
 
				+        305: "Earrings",
			
 
				+        306: "Curtain",
			
 
				+        307: "Grape",
			
 
				+        308: "Sofa bed",
			
 
				+        309: "Horse",
			
 
				+        310: "Luggage and bags",
			
 
				+        311: "Desk",
			
 
				+        312: "Crutch",
			
 
				+        313: "Bicycle helmet",
			
 
				+        314: "Tick",
			
 
				+        315: "Airplane",
			
 
				+        316: "Canary",
			
 
				+        317: "Spatula",
			
 
				+        318: "Watch",
			
 
				+        319: "Lily",
			
 
				+        320: "Kitchen appliance",
			
 
				+        321: "Filing cabinet",
			
 
				+        322: "Aircraft",
			
 
				+        323: "Cake stand",
			
 
				+        324: "Candy",
			
 
				+        325: "Sink",
			
 
				+        326: "Mouse",
			
 
				+        327: "Wine",
			
 
				+        328: "Wheelchair",
			
 
				+        329: "Goldfish",
			
 
				+        330: "Refrigerator",
			
 
				+        331: "French fries",
			
 
				+        332: "Drawer",
			
 
				+        333: "Treadmill",
			
 
				+        334: "Picnic basket",
			
 
				+        335: "Dice",
			
 
				+        336: "Cabbage",
			
 
				+        337: "Football helmet",
			
 
				+        338: "Pig",
			
 
				+        339: "Person",
			
 
				+        340: "Shorts",
			
 
				+        341: "Gondola",
			
 
				+        342: "Honeycomb",
			
 
				+        343: "Doughnut",
			
 
				+        344: "Chest of drawers",
			
 
				+        345: "Land vehicle",
			
 
				+        346: "Bat",
			
 
				+        347: "Monkey",
			
 
				+        348: "Dagger",
			
 
				+        349: "Tableware",
			
 
				+        350: "Human foot",
			
 
				+        351: "Mug",
			
 
				+        352: "Alarm clock",
			
 
				+        353: "Pressure cooker",
			
 
				+        354: "Human hand",
			
 
				+        355: "Tortoise",
			
 
				+        356: "Baseball glove",
			
 
				+        357: "Sword",
			
 
				+        358: "Pear",
			
 
				+        359: "Miniskirt",
			
 
				+        360: "Traffic sign",
			
 
				+        361: "Girl",
			
 
				+        362: "Roller skates",
			
 
				+        363: "Dinosaur",
			
 
				+        364: "Porch",
			
 
				+        365: "Human beard",
			
 
				+        366: "Submarine sandwich",
			
 
				+        367: "Screwdriver",
			
 
				+        368: "Strawberry",
			
 
				+        369: "Wine glass",
			
 
				+        370: "Seafood",
			
 
				+        371: "Racket",
			
 
				+        372: "Wheel",
			
 
				+        373: "Sea lion",
			
 
				+        374: "Toy",
			
 
				+        375: "Tea",
			
 
				+        376: "Tennis ball",
			
 
				+        377: "Waste container",
			
 
				+        378: "Mule",
			
 
				+        379: "Cricket ball",
			
 
				+        380: "Pineapple",
			
 
				+        381: "Coconut",
			
 
				+        382: "Doll",
			
 
				+        383: "Coffee table",
			
 
				+        384: "Snowman",
			
 
				+        385: "Lavender",
			
 
				+        386: "Shrimp",
			
 
				+        387: "Maple",
			
 
				+        388: "Cowboy hat",
			
 
				+        389: "Goggles",
			
 
				+        390: "Rugby ball",
			
 
				+        391: "Caterpillar",
			
 
				+        392: "Poster",
			
 
				+        393: "Rocket",
			
 
				+        394: "Organ",
			
 
				+        395: "Saxophone",
			
 
				+        396: "Traffic light",
			
 
				+        397: "Cocktail",
			
 
				+        398: "Plastic bag",
			
 
				+        399: "Squash",
			
 
				+        400: "Mushroom",
			
 
				+        401: "Hamburger",
			
 
				+        402: "Light switch",
			
 
				+        403: "Parachute",
			
 
				+        404: "Teddy bear",
			
 
				+        405: "Winter melon",
			
 
				+        406: "Deer",
			
 
				+        407: "Musical keyboard",
			
 
				+        408: "Plumbing fixture",
			
 
				+        409: "Scoreboard",
			
 
				+        410: "Baseball bat",
			
 
				+        411: "Envelope",
			
 
				+        412: "Adhesive tape",
			
 
				+        413: "Briefcase",
			
 
				+        414: "Paddle",
			
 
				+        415: "Bow and arrow",
			
 
				+        416: "Telephone",
			
 
				+        417: "Sheep",
			
 
				+        418: "Jacket",
			
 
				+        419: "Boy",
			
 
				+        420: "Pizza",
			
 
				+        421: "Otter",
			
 
				+        422: "Office supplies",
			
 
				+        423: "Couch",
			
 
				+        424: "Cello",
			
 
				+        425: "Bull",
			
 
				+        426: "Camel",
			
 
				+        427: "Ball",
			
 
				+        428: "Duck",
			
 
				+        429: "Whale",
			
 
				+        430: "Shirt",
			
 
				+        431: "Tank",
			
 
				+        432: "Motorcycle",
			
 
				+        433: "Accordion",
			
 
				+        434: "Owl",
			
 
				+        435: "Porcupine",
			
 
				+        436: "Sun hat",
			
 
				+        437: "Nail",
			
 
				+        438: "Scissors",
			
 
				+        439: "Swan",
			
 
				+        440: "Lamp",
			
 
				+        441: "Crown",
			
 
				+        442: "Piano",
			
 
				+        443: "Sculpture",
			
 
				+        444: "Cheetah",
			
 
				+        445: "Oboe",
			
 
				+        446: "Tin can",
			
 
				+        447: "Mango",
			
 
				+        448: "Tripod",
			
 
				+        449: "Oven",
			
 
				+        450: "Mouse",
			
 
				+        451: "Barge",
			
 
				+        452: "Coffee",
			
 
				+        453: "Snowboard",
			
 
				+        454: "Common fig",
			
 
				+        455: "Salad",
			
 
				+        456: "Marine invertebrates",
			
 
				+        457: "Umbrella",
			
 
				+        458: "Kangaroo",
			
 
				+        459: "Human arm",
			
 
				+        460: "Measuring cup",
			
 
				+        461: "Snail",
			
 
				+        462: "Loveseat",
			
 
				+        463: "Suit",
			
 
				+        464: "Teapot",
			
 
				+        465: "Bottle",
			
 
				+        466: "Alpaca",
			
 
				+        467: "Kettle",
			
 
				+        468: "Trousers",
			
 
				+        469: "Popcorn",
			
 
				+        470: "Centipede",
			
 
				+        471: "Spider",
			
 
				+        472: "Sparrow",
			
 
				+        473: "Plate",
			
 
				+        474: "Bagel",
			
 
				+        475: "Personal care",
			
 
				+        476: "Apple",
			
 
				+        477: "Brassiere",
			
 
				+        478: "Bathroom cabinet",
			
 
				+        479: "studio couch",
			
 
				+        480: "Computer keyboard",
			
 
				+        481: "Table tennis racket",
			
 
				+        482: "Sushi",
			
 
				+        483: "Cabinetry",
			
 
				+        484: "Street light",
			
 
				+        485: "Towel",
			
 
				+        486: "Nightstand",
			
 
				+        487: "Rabbit",
			
 
				+        488: "Dolphin",
			
 
				+        489: "Dog",
			
 
				+        490: "Jug",
			
 
				+        491: "Wok",
			
 
				+        492: "Fire hydrant",
			
 
				+        493: "Human eye",
			
 
				+        494: "Skyscraper",
			
 
				+        495: "Backpack",
			
 
				+        496: "Potato",
			
 
				+        497: "Paper towel",
			
 
				+        498: "Lifejacket",
			
 
				+        499: "Bicycle wheel",
			
 
				+        500: "Toilet",
			
 
				+    }
			
 
				+
			
 
				+    return clsid2catid, catid2name
			
 
				+
			
 
				+
			
 
				+def _visdrone_category():
			
 
				+    clsid2catid = {i: i for i in range(10)}
			
 
				+
			
 
				+    catid2name = {
			
 
				+        0: 'pedestrian',
			
 
				+        1: 'people',
			
 
				+        2: 'bicycle',
			
 
				+        3: 'car',
			
 
				+        4: 'van',
			
 
				+        5: 'truck',
			
 
				+        6: 'tricycle',
			
 
				+        7: 'awning-tricycle',
			
 
				+        8: 'bus',
			
 
				+        9: 'motor'
			
 
				+    }
			
 
				+    return clsid2catid, catid2name
			
--- a/paddlers/models/ppdet/data/source/coco.py
+++ b/paddlers/models/ppdet/data/source/coco.py
@@ -0,0 +1,251 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import numpy as np
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from .dataset import DetDataset
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class COCODataSet(DetDataset):
			
 
				+    """
			
 
				+    Load dataset with COCO format.
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): root directory for dataset.
			
 
				+        image_dir (str): directory for images.
			
 
				+        anno_path (str): coco annotation file path.
			
 
				+        data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				+        sample_num (int): number of samples to load, -1 means all.
			
 
				+        load_crowd (bool): whether to load crowded ground-truth.
			
 
				+            False as default
			
 
				+        allow_empty (bool): whether to load empty entry. False as default
			
 
				+        empty_ratio (float): the ratio of empty record number to total
			
 
				+            record's, if empty_ratio is out of [0. ,1.), do not sample the
			
 
				+            records and use all the empty entries. 1. as default
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir=None,
			
 
				+                 image_dir=None,
			
 
				+                 anno_path=None,
			
 
				+                 data_fields=['image'],
			
 
				+                 sample_num=-1,
			
 
				+                 load_crowd=False,
			
 
				+                 allow_empty=False,
			
 
				+                 empty_ratio=1.):
			
 
				+        super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path,
			
 
				+                                          data_fields, sample_num)
			
 
				+        self.load_image_only = False
			
 
				+        self.load_semantic = False
			
 
				+        self.load_crowd = load_crowd
			
 
				+        self.allow_empty = allow_empty
			
 
				+        self.empty_ratio = empty_ratio
			
 
				+
			
 
				+    def _sample_empty(self, records, num):
			
 
				+        # if empty_ratio is out of [0. ,1.), do not sample the records
			
 
				+        if self.empty_ratio < 0. or self.empty_ratio >= 1.:
			
 
				+            return records
			
 
				+        import random
			
 
				+        sample_num = min(
			
 
				+            int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
			
 
				+        records = random.sample(records, sample_num)
			
 
				+        return records
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
			
 
				+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
			
 
				+
			
 
				+        assert anno_path.endswith('.json'), \
			
 
				+            'invalid coco annotation file: ' + anno_path
			
 
				+        from pycocotools.coco import COCO
			
 
				+        coco = COCO(anno_path)
			
 
				+        img_ids = coco.getImgIds()
			
 
				+        img_ids.sort()
			
 
				+        cat_ids = coco.getCatIds()
			
 
				+        records = []
			
 
				+        empty_records = []
			
 
				+        ct = 0
			
 
				+
			
 
				+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
			
 
				+        self.cname2cid = dict({
			
 
				+            coco.loadCats(catid)[0]['name']: clsid
			
 
				+            for catid, clsid in self.catid2clsid.items()
			
 
				+        })
			
 
				+
			
 
				+        if 'annotations' not in coco.dataset:
			
 
				+            self.load_image_only = True
			
 
				+            logger.warning(
			
 
				+                'Annotation file: {} does not contains ground truth '
			
 
				+                'and load image information only.'.format(anno_path))
			
 
				+
			
 
				+        for img_id in img_ids:
			
 
				+            img_anno = coco.loadImgs([img_id])[0]
			
 
				+            im_fname = img_anno['file_name']
			
 
				+            im_w = float(img_anno['width'])
			
 
				+            im_h = float(img_anno['height'])
			
 
				+
			
 
				+            im_path = os.path.join(image_dir,
			
 
				+                                   im_fname) if image_dir else im_fname
			
 
				+            is_empty = False
			
 
				+            if not os.path.exists(im_path):
			
 
				+                logger.warning('Illegal image file: {}, and it will be '
			
 
				+                               'ignored'.format(im_path))
			
 
				+                continue
			
 
				+
			
 
				+            if im_w < 0 or im_h < 0:
			
 
				+                logger.warning(
			
 
				+                    'Illegal width: {} or height: {} in annotation, '
			
 
				+                    'and im_id: {} will be ignored'.format(im_w, im_h, img_id))
			
 
				+                continue
			
 
				+
			
 
				+            coco_rec = {
			
 
				+                'im_file': im_path,
			
 
				+                'im_id': np.array([img_id]),
			
 
				+                'h': im_h,
			
 
				+                'w': im_w,
			
 
				+            } if 'image' in self.data_fields else {}
			
 
				+
			
 
				+            if not self.load_image_only:
			
 
				+                ins_anno_ids = coco.getAnnIds(
			
 
				+                    imgIds=[img_id],
			
 
				+                    iscrowd=None if self.load_crowd else False)
			
 
				+                instances = coco.loadAnns(ins_anno_ids)
			
 
				+
			
 
				+                bboxes = []
			
 
				+                is_rbox_anno = False
			
 
				+                for inst in instances:
			
 
				+                    # check gt bbox
			
 
				+                    if inst.get('ignore', False):
			
 
				+                        continue
			
 
				+                    if 'bbox' not in inst.keys():
			
 
				+                        continue
			
 
				+                    else:
			
 
				+                        if not any(np.array(inst['bbox'])):
			
 
				+                            continue
			
 
				+
			
 
				+                    # read rbox anno or not
			
 
				+                    is_rbox_anno = True if len(inst['bbox']) == 5 else False
			
 
				+                    if is_rbox_anno:
			
 
				+                        xc, yc, box_w, box_h, angle = inst['bbox']
			
 
				+                        x1 = xc - box_w / 2.0
			
 
				+                        y1 = yc - box_h / 2.0
			
 
				+                        x2 = x1 + box_w
			
 
				+                        y2 = y1 + box_h
			
 
				+                    else:
			
 
				+                        x1, y1, box_w, box_h = inst['bbox']
			
 
				+                        x2 = x1 + box_w
			
 
				+                        y2 = y1 + box_h
			
 
				+                    eps = 1e-5
			
 
				+                    if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
			
 
				+                        inst['clean_bbox'] = [
			
 
				+                            round(float(x), 3) for x in [x1, y1, x2, y2]
			
 
				+                        ]
			
 
				+                        if is_rbox_anno:
			
 
				+                            inst['clean_rbox'] = [xc, yc, box_w, box_h, angle]
			
 
				+                        bboxes.append(inst)
			
 
				+                    else:
			
 
				+                        logger.warning(
			
 
				+                            'Found an invalid bbox in annotations: im_id: {}, '
			
 
				+                            'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
			
 
				+                                img_id, float(inst['area']), x1, y1, x2, y2))
			
 
				+
			
 
				+                num_bbox = len(bboxes)
			
 
				+                if num_bbox <= 0 and not self.allow_empty:
			
 
				+                    continue
			
 
				+                elif num_bbox <= 0:
			
 
				+                    is_empty = True
			
 
				+
			
 
				+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
			
 
				+                if is_rbox_anno:
			
 
				+                    gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
			
 
				+                gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				+                is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				+                gt_poly = [None] * num_bbox
			
 
				+
			
 
				+                has_segmentation = False
			
 
				+                for i, box in enumerate(bboxes):
			
 
				+                    catid = box['category_id']
			
 
				+                    gt_class[i][0] = self.catid2clsid[catid]
			
 
				+                    gt_bbox[i, :] = box['clean_bbox']
			
 
				+                    # xc, yc, w, h, theta
			
 
				+                    if is_rbox_anno:
			
 
				+                        gt_rbox[i, :] = box['clean_rbox']
			
 
				+                    is_crowd[i][0] = box['iscrowd']
			
 
				+                    # check RLE format
			
 
				+                    if 'segmentation' in box and box['iscrowd'] == 1:
			
 
				+                        gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
			
 
				+                    elif 'segmentation' in box and box['segmentation']:
			
 
				+                        if not np.array(box['segmentation']
			
 
				+                                        ).size > 0 and not self.allow_empty:
			
 
				+                            bboxes.pop(i)
			
 
				+                            gt_poly.pop(i)
			
 
				+                            np.delete(is_crowd, i)
			
 
				+                            np.delete(gt_class, i)
			
 
				+                            np.delete(gt_bbox, i)
			
 
				+                        else:
			
 
				+                            gt_poly[i] = box['segmentation']
			
 
				+                        has_segmentation = True
			
 
				+
			
 
				+                if has_segmentation and not any(
			
 
				+                        gt_poly) and not self.allow_empty:
			
 
				+                    continue
			
 
				+
			
 
				+                if is_rbox_anno:
			
 
				+                    gt_rec = {
			
 
				+                        'is_crowd': is_crowd,
			
 
				+                        'gt_class': gt_class,
			
 
				+                        'gt_bbox': gt_bbox,
			
 
				+                        'gt_rbox': gt_rbox,
			
 
				+                        'gt_poly': gt_poly,
			
 
				+                    }
			
 
				+                else:
			
 
				+                    gt_rec = {
			
 
				+                        'is_crowd': is_crowd,
			
 
				+                        'gt_class': gt_class,
			
 
				+                        'gt_bbox': gt_bbox,
			
 
				+                        'gt_poly': gt_poly,
			
 
				+                    }
			
 
				+
			
 
				+                for k, v in gt_rec.items():
			
 
				+                    if k in self.data_fields:
			
 
				+                        coco_rec[k] = v
			
 
				+
			
 
				+                # TODO: remove load_semantic
			
 
				+                if self.load_semantic and 'semantic' in self.data_fields:
			
 
				+                    seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
			
 
				+                                            'train2017', im_fname[:-3] + 'png')
			
 
				+                    coco_rec.update({'semantic': seg_path})
			
 
				+
			
 
				+            logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
			
 
				+                im_path, img_id, im_h, im_w))
			
 
				+            if is_empty:
			
 
				+                empty_records.append(coco_rec)
			
 
				+            else:
			
 
				+                records.append(coco_rec)
			
 
				+            ct += 1
			
 
				+            if self.sample_num > 0 and ct >= self.sample_num:
			
 
				+                break
			
 
				+        assert ct > 0, 'not found any coco record in %s' % (anno_path)
			
 
				+        logger.debug('{} samples in file {}'.format(ct, anno_path))
			
 
				+        if self.allow_empty and len(empty_records) > 0:
			
 
				+            empty_records = self._sample_empty(empty_records, len(records))
			
 
				+            records += empty_records
			
 
				+        self.roidbs = records
			
--- a/paddlers/models/ppdet/data/source/dataset.py
+++ b/paddlers/models/ppdet/data/source/dataset.py
@@ -0,0 +1,197 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import numpy as np
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except Exception:
			
 
				+    from collections import Sequence
			
 
				+from paddle.io import Dataset
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from paddlers.models.ppdet.utils.download import get_dataset_path
			
 
				+import copy
			
 
				+
			
 
				+
			
 
				+@serializable
			
 
				+class DetDataset(Dataset):
			
 
				+    """
			
 
				+    Load detection dataset.
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): root directory for dataset.
			
 
				+        image_dir (str): directory for images.
			
 
				+        anno_path (str): annotation file path.
			
 
				+        data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				+        sample_num (int): number of samples to load, -1 means all.
			
 
				+        use_default_label (bool): whether to load default label list.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir=None,
			
 
				+                 image_dir=None,
			
 
				+                 anno_path=None,
			
 
				+                 data_fields=['image'],
			
 
				+                 sample_num=-1,
			
 
				+                 use_default_label=None,
			
 
				+                 **kwargs):
			
 
				+        super(DetDataset, self).__init__()
			
 
				+        self.dataset_dir = dataset_dir if dataset_dir is not None else ''
			
 
				+        self.anno_path = anno_path
			
 
				+        self.image_dir = image_dir if image_dir is not None else ''
			
 
				+        self.data_fields = data_fields
			
 
				+        self.sample_num = sample_num
			
 
				+        self.use_default_label = use_default_label
			
 
				+        self._epoch = 0
			
 
				+        self._curr_iter = 0
			
 
				+
			
 
				+    def __len__(self, ):
			
 
				+        return len(self.roidbs)
			
 
				+
			
 
				+    def __getitem__(self, idx):
			
 
				+        # data batch
			
 
				+        roidb = copy.deepcopy(self.roidbs[idx])
			
 
				+        if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
			
 
				+            n = len(self.roidbs)
			
 
				+            idx = np.random.randint(n)
			
 
				+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
			
 
				+        elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
			
 
				+            n = len(self.roidbs)
			
 
				+            idx = np.random.randint(n)
			
 
				+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
			
 
				+        elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
			
 
				+            n = len(self.roidbs)
			
 
				+            roidb = [roidb, ] + [
			
 
				+                copy.deepcopy(self.roidbs[np.random.randint(n)])
			
 
				+                for _ in range(3)
			
 
				+            ]
			
 
				+        if isinstance(roidb, Sequence):
			
 
				+            for r in roidb:
			
 
				+                r['curr_iter'] = self._curr_iter
			
 
				+        else:
			
 
				+            roidb['curr_iter'] = self._curr_iter
			
 
				+        self._curr_iter += 1
			
 
				+
			
 
				+        return self.transform(roidb)
			
 
				+
			
 
				+    def check_or_download_dataset(self):
			
 
				+        self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path,
			
 
				+                                            self.image_dir)
			
 
				+
			
 
				+    def set_kwargs(self, **kwargs):
			
 
				+        self.mixup_epoch = kwargs.get('mixup_epoch', -1)
			
 
				+        self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
			
 
				+        self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
			
 
				+
			
 
				+    def set_transform(self, transform):
			
 
				+        self.transform = transform
			
 
				+
			
 
				+    def set_epoch(self, epoch_id):
			
 
				+        self._epoch = epoch_id
			
 
				+
			
 
				+    def parse_dataset(self, ):
			
 
				+        raise NotImplementedError(
			
 
				+            "Need to implement parse_dataset method of Dataset")
			
 
				+
			
 
				+    def get_anno(self):
			
 
				+        if self.anno_path is None:
			
 
				+            return
			
 
				+        return os.path.join(self.dataset_dir, self.anno_path)
			
 
				+
			
 
				+
			
 
				+def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
			
 
				+    return f.lower().endswith(extensions)
			
 
				+
			
 
				+
			
 
				+def _make_dataset(dir):
			
 
				+    dir = os.path.expanduser(dir)
			
 
				+    if not os.path.isdir(dir):
			
 
				+        raise ('{} should be a dir'.format(dir))
			
 
				+    images = []
			
 
				+    for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
			
 
				+        for fname in sorted(fnames):
			
 
				+            path = os.path.join(root, fname)
			
 
				+            if _is_valid_file(path):
			
 
				+                images.append(path)
			
 
				+    return images
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class ImageFolder(DetDataset):
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir=None,
			
 
				+                 image_dir=None,
			
 
				+                 anno_path=None,
			
 
				+                 sample_num=-1,
			
 
				+                 use_default_label=None,
			
 
				+                 **kwargs):
			
 
				+        super(ImageFolder, self).__init__(
			
 
				+            dataset_dir,
			
 
				+            image_dir,
			
 
				+            anno_path,
			
 
				+            sample_num=sample_num,
			
 
				+            use_default_label=use_default_label)
			
 
				+        self._imid2path = {}
			
 
				+        self.roidbs = None
			
 
				+        self.sample_num = sample_num
			
 
				+
			
 
				+    def check_or_download_dataset(self):
			
 
				+        if self.dataset_dir:
			
 
				+            # NOTE: ImageFolder is only used for prediction, in
			
 
				+            #       infer mode, image_dir is set by set_images
			
 
				+            #       so we only check anno_path here
			
 
				+            self.dataset_dir = get_dataset_path(self.dataset_dir,
			
 
				+                                                self.anno_path, None)
			
 
				+
			
 
				+    def parse_dataset(self, ):
			
 
				+        if not self.roidbs:
			
 
				+            self.roidbs = self._load_images()
			
 
				+
			
 
				+    def _parse(self):
			
 
				+        image_dir = self.image_dir
			
 
				+        if not isinstance(image_dir, Sequence):
			
 
				+            image_dir = [image_dir]
			
 
				+        images = []
			
 
				+        for im_dir in image_dir:
			
 
				+            if os.path.isdir(im_dir):
			
 
				+                im_dir = os.path.join(self.dataset_dir, im_dir)
			
 
				+                images.extend(_make_dataset(im_dir))
			
 
				+            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
			
 
				+                images.append(im_dir)
			
 
				+        return images
			
 
				+
			
 
				+    def _load_images(self):
			
 
				+        images = self._parse()
			
 
				+        ct = 0
			
 
				+        records = []
			
 
				+        for image in images:
			
 
				+            assert image != '' and os.path.isfile(image), \
			
 
				+                    "Image {} not found".format(image)
			
 
				+            if self.sample_num > 0 and ct >= self.sample_num:
			
 
				+                break
			
 
				+            rec = {'im_id': np.array([ct]), 'im_file': image}
			
 
				+            self._imid2path[ct] = image
			
 
				+            ct += 1
			
 
				+            records.append(rec)
			
 
				+        assert len(records) > 0, "No image file found"
			
 
				+        return records
			
 
				+
			
 
				+    def get_imid2path(self):
			
 
				+        return self._imid2path
			
 
				+
			
 
				+    def set_images(self, images):
			
 
				+        self.image_dir = images
			
 
				+        self.roidbs = self._load_images()
			
--- a/paddlers/models/ppdet/data/source/keypoint_coco.py
+++ b/paddlers/models/ppdet/data/source/keypoint_coco.py
@@ -0,0 +1,669 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+"""
			
 
				+this code is base on https://github.com/open-mmlab/mmpose
			
 
				+"""
			
 
				+import os
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import json
			
 
				+import copy
			
 
				+import pycocotools
			
 
				+from pycocotools.coco import COCO
			
 
				+from .dataset import DetDataset
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+
			
 
				+
			
 
				+@serializable
			
 
				+class KeypointBottomUpBaseDataset(DetDataset):
			
 
				+    """Base class for bottom-up datasets.
			
 
				+
			
 
				+    All datasets should subclass it.
			
 
				+    All subclasses should overwrite:
			
 
				+        Methods:`_get_imganno`
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): Root path to the dataset.
			
 
				+        anno_path (str): Relative path to the annotation file.
			
 
				+        image_dir (str): Path to a directory where images are held.
			
 
				+            Default: None.
			
 
				+        num_joints (int): keypoint numbers
			
 
				+        transform (composed(operators)): A sequence of data transforms.
			
 
				+        shard (list): [rank, worldsize], the distributed env params
			
 
				+        test_mode (bool): Store True when building test or
			
 
				+            validation dataset. Default: False.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir,
			
 
				+                 image_dir,
			
 
				+                 anno_path,
			
 
				+                 num_joints,
			
 
				+                 transform=[],
			
 
				+                 shard=[0, 1],
			
 
				+                 test_mode=False):
			
 
				+        super().__init__(dataset_dir, image_dir, anno_path)
			
 
				+        self.image_info = {}
			
 
				+        self.ann_info = {}
			
 
				+
			
 
				+        self.img_prefix = os.path.join(dataset_dir, image_dir)
			
 
				+        self.transform = transform
			
 
				+        self.test_mode = test_mode
			
 
				+
			
 
				+        self.ann_info['num_joints'] = num_joints
			
 
				+        self.img_ids = []
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        pass
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        """Get dataset length."""
			
 
				+        return len(self.img_ids)
			
 
				+
			
 
				+    def _get_imganno(self, idx):
			
 
				+        """Get anno for a single image."""
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    def __getitem__(self, idx):
			
 
				+        """Prepare image for training given the index."""
			
 
				+        records = copy.deepcopy(self._get_imganno(idx))
			
 
				+        records['image'] = cv2.imread(records['image_file'])
			
 
				+        records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
			
 
				+        records['mask'] = (records['mask'] + 0).astype('uint8')
			
 
				+        records = self.transform(records)
			
 
				+        return records
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        return
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
			
 
				+    """COCO dataset for bottom-up pose estimation.
			
 
				+
			
 
				+    The dataset loads raw features and apply specified transforms
			
 
				+    to return a dict containing the image tensors and other information.
			
 
				+
			
 
				+    COCO keypoint indexes::
			
 
				+
			
 
				+        0: 'nose',
			
 
				+        1: 'left_eye',
			
 
				+        2: 'right_eye',
			
 
				+        3: 'left_ear',
			
 
				+        4: 'right_ear',
			
 
				+        5: 'left_shoulder',
			
 
				+        6: 'right_shoulder',
			
 
				+        7: 'left_elbow',
			
 
				+        8: 'right_elbow',
			
 
				+        9: 'left_wrist',
			
 
				+        10: 'right_wrist',
			
 
				+        11: 'left_hip',
			
 
				+        12: 'right_hip',
			
 
				+        13: 'left_knee',
			
 
				+        14: 'right_knee',
			
 
				+        15: 'left_ankle',
			
 
				+        16: 'right_ankle'
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): Root path to the dataset.
			
 
				+        anno_path (str): Relative path to the annotation file.
			
 
				+        image_dir (str): Path to a directory where images are held.
			
 
				+            Default: None.
			
 
				+        num_joints (int): keypoint numbers
			
 
				+        transform (composed(operators)): A sequence of data transforms.
			
 
				+        shard (list): [rank, worldsize], the distributed env params
			
 
				+        test_mode (bool): Store True when building test or
			
 
				+            validation dataset. Default: False.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir,
			
 
				+                 image_dir,
			
 
				+                 anno_path,
			
 
				+                 num_joints,
			
 
				+                 transform=[],
			
 
				+                 shard=[0, 1],
			
 
				+                 test_mode=False):
			
 
				+        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
			
 
				+                         transform, shard, test_mode)
			
 
				+
			
 
				+        self.ann_file = os.path.join(dataset_dir, anno_path)
			
 
				+        self.shard = shard
			
 
				+        self.test_mode = test_mode
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        self.coco = COCO(self.ann_file)
			
 
				+
			
 
				+        self.img_ids = self.coco.getImgIds()
			
 
				+        if not self.test_mode:
			
 
				+            self.img_ids = [
			
 
				+                img_id for img_id in self.img_ids
			
 
				+                if len(self.coco.getAnnIds(
			
 
				+                    imgIds=img_id, iscrowd=None)) > 0
			
 
				+            ]
			
 
				+        blocknum = int(len(self.img_ids) / self.shard[1])
			
 
				+        self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
			
 
				+            self.shard[0] + 1))]
			
 
				+        self.num_images = len(self.img_ids)
			
 
				+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
			
 
				+        self.dataset_name = 'coco'
			
 
				+
			
 
				+        cat_ids = self.coco.getCatIds()
			
 
				+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
			
 
				+        print('=> num_images: {}'.format(self.num_images))
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _get_mapping_id_name(imgs):
			
 
				+        """
			
 
				+        Args:
			
 
				+            imgs (dict): dict of image info.
			
 
				+
			
 
				+        Returns:
			
 
				+            tuple: Image name & id mapping dicts.
			
 
				+
			
 
				+            - id2name (dict): Mapping image id to name.
			
 
				+            - name2id (dict): Mapping image name to id.
			
 
				+        """
			
 
				+        id2name = {}
			
 
				+        name2id = {}
			
 
				+        for image_id, image in imgs.items():
			
 
				+            file_name = image['file_name']
			
 
				+            id2name[image_id] = file_name
			
 
				+            name2id[file_name] = image_id
			
 
				+
			
 
				+        return id2name, name2id
			
 
				+
			
 
				+    def _get_imganno(self, idx):
			
 
				+        """Get anno for a single image.
			
 
				+
			
 
				+        Args:
			
 
				+            idx (int): image idx
			
 
				+
			
 
				+        Returns:
			
 
				+            dict: info for model training
			
 
				+        """
			
 
				+        coco = self.coco
			
 
				+        img_id = self.img_ids[idx]
			
 
				+        ann_ids = coco.getAnnIds(imgIds=img_id)
			
 
				+        anno = coco.loadAnns(ann_ids)
			
 
				+
			
 
				+        mask = self._get_mask(anno, idx)
			
 
				+        anno = [
			
 
				+            obj for obj in anno
			
 
				+            if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
			
 
				+        ]
			
 
				+
			
 
				+        joints, orgsize = self._get_joints(anno, idx)
			
 
				+
			
 
				+        db_rec = {}
			
 
				+        db_rec['im_id'] = img_id
			
 
				+        db_rec['image_file'] = os.path.join(self.img_prefix,
			
 
				+                                            self.id2name[img_id])
			
 
				+        db_rec['mask'] = mask
			
 
				+        db_rec['joints'] = joints
			
 
				+        db_rec['im_shape'] = orgsize
			
 
				+
			
 
				+        return db_rec
			
 
				+
			
 
				+    def _get_joints(self, anno, idx):
			
 
				+        """Get joints for all people in an image."""
			
 
				+        num_people = len(anno)
			
 
				+
			
 
				+        joints = np.zeros(
			
 
				+            (num_people, self.ann_info['num_joints'], 3), dtype=np.float32)
			
 
				+
			
 
				+        for i, obj in enumerate(anno):
			
 
				+            joints[i, :self.ann_info['num_joints'], :3] = \
			
 
				+                np.array(obj['keypoints']).reshape([-1, 3])
			
 
				+
			
 
				+        img_info = self.coco.loadImgs(self.img_ids[idx])[0]
			
 
				+        joints[..., 0] /= img_info['width']
			
 
				+        joints[..., 1] /= img_info['height']
			
 
				+        orgsize = np.array([img_info['height'], img_info['width']])
			
 
				+
			
 
				+        return joints, orgsize
			
 
				+
			
 
				+    def _get_mask(self, anno, idx):
			
 
				+        """Get ignore masks to mask out losses."""
			
 
				+        coco = self.coco
			
 
				+        img_info = coco.loadImgs(self.img_ids[idx])[0]
			
 
				+
			
 
				+        m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
			
 
				+
			
 
				+        for obj in anno:
			
 
				+            if 'segmentation' in obj:
			
 
				+                if obj['iscrowd']:
			
 
				+                    rle = pycocotools.mask.frPyObjects(obj['segmentation'],
			
 
				+                                                       img_info['height'],
			
 
				+                                                       img_info['width'])
			
 
				+                    m += pycocotools.mask.decode(rle)
			
 
				+                elif obj['num_keypoints'] == 0:
			
 
				+                    rles = pycocotools.mask.frPyObjects(obj['segmentation'],
			
 
				+                                                        img_info['height'],
			
 
				+                                                        img_info['width'])
			
 
				+                    for rle in rles:
			
 
				+                        m += pycocotools.mask.decode(rle)
			
 
				+
			
 
				+        return m < 0.5
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
			
 
				+    """CrowdPose dataset for bottom-up pose estimation.
			
 
				+
			
 
				+    The dataset loads raw features and apply specified transforms
			
 
				+    to return a dict containing the image tensors and other information.
			
 
				+
			
 
				+    CrowdPose keypoint indexes::
			
 
				+
			
 
				+        0: 'left_shoulder',
			
 
				+        1: 'right_shoulder',
			
 
				+        2: 'left_elbow',
			
 
				+        3: 'right_elbow',
			
 
				+        4: 'left_wrist',
			
 
				+        5: 'right_wrist',
			
 
				+        6: 'left_hip',
			
 
				+        7: 'right_hip',
			
 
				+        8: 'left_knee',
			
 
				+        9: 'right_knee',
			
 
				+        10: 'left_ankle',
			
 
				+        11: 'right_ankle',
			
 
				+        12: 'top_head',
			
 
				+        13: 'neck'
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): Root path to the dataset.
			
 
				+        anno_path (str): Relative path to the annotation file.
			
 
				+        image_dir (str): Path to a directory where images are held.
			
 
				+            Default: None.
			
 
				+        num_joints (int): keypoint numbers
			
 
				+        transform (composed(operators)): A sequence of data transforms.
			
 
				+        shard (list): [rank, worldsize], the distributed env params
			
 
				+        test_mode (bool): Store True when building test or
			
 
				+            validation dataset. Default: False.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir,
			
 
				+                 image_dir,
			
 
				+                 anno_path,
			
 
				+                 num_joints,
			
 
				+                 transform=[],
			
 
				+                 shard=[0, 1],
			
 
				+                 test_mode=False):
			
 
				+        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
			
 
				+                         transform, shard, test_mode)
			
 
				+
			
 
				+        self.ann_file = os.path.join(dataset_dir, anno_path)
			
 
				+        self.shard = shard
			
 
				+        self.test_mode = test_mode
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        self.coco = COCO(self.ann_file)
			
 
				+
			
 
				+        self.img_ids = self.coco.getImgIds()
			
 
				+        if not self.test_mode:
			
 
				+            self.img_ids = [
			
 
				+                img_id for img_id in self.img_ids
			
 
				+                if len(self.coco.getAnnIds(
			
 
				+                    imgIds=img_id, iscrowd=None)) > 0
			
 
				+            ]
			
 
				+        blocknum = int(len(self.img_ids) / self.shard[1])
			
 
				+        self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
			
 
				+            self.shard[0] + 1))]
			
 
				+        self.num_images = len(self.img_ids)
			
 
				+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
			
 
				+
			
 
				+        self.dataset_name = 'crowdpose'
			
 
				+        print('=> num_images: {}'.format(self.num_images))
			
 
				+
			
 
				+
			
 
				+@serializable
			
 
				+class KeypointTopDownBaseDataset(DetDataset):
			
 
				+    """Base class for top_down datasets.
			
 
				+
			
 
				+    All datasets should subclass it.
			
 
				+    All subclasses should overwrite:
			
 
				+        Methods:`_get_db`
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): Root path to the dataset.
			
 
				+        image_dir (str): Path to a directory where images are held.
			
 
				+        anno_path (str): Relative path to the annotation file.
			
 
				+        num_joints (int): keypoint numbers
			
 
				+        transform (composed(operators)): A sequence of data transforms.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir,
			
 
				+                 image_dir,
			
 
				+                 anno_path,
			
 
				+                 num_joints,
			
 
				+                 transform=[]):
			
 
				+        super().__init__(dataset_dir, image_dir, anno_path)
			
 
				+        self.image_info = {}
			
 
				+        self.ann_info = {}
			
 
				+
			
 
				+        self.img_prefix = os.path.join(dataset_dir, image_dir)
			
 
				+        self.transform = transform
			
 
				+
			
 
				+        self.ann_info['num_joints'] = num_joints
			
 
				+        self.db = []
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        """Get dataset length."""
			
 
				+        return len(self.db)
			
 
				+
			
 
				+    def _get_db(self):
			
 
				+        """Get a sample"""
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    def __getitem__(self, idx):
			
 
				+        """Prepare sample for training given the index."""
			
 
				+        records = copy.deepcopy(self.db[idx])
			
 
				+        records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR |
			
 
				+                                      cv2.IMREAD_IGNORE_ORIENTATION)
			
 
				+        records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
			
 
				+        records['score'] = records['score'] if 'score' in records else 1
			
 
				+        records = self.transform(records)
			
 
				+        # print('records', records)
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
			
 
				+    """COCO dataset for top-down pose estimation.
			
 
				+
			
 
				+    The dataset loads raw features and apply specified transforms
			
 
				+    to return a dict containing the image tensors and other information.
			
 
				+
			
 
				+    COCO keypoint indexes:
			
 
				+
			
 
				+        0: 'nose',
			
 
				+        1: 'left_eye',
			
 
				+        2: 'right_eye',
			
 
				+        3: 'left_ear',
			
 
				+        4: 'right_ear',
			
 
				+        5: 'left_shoulder',
			
 
				+        6: 'right_shoulder',
			
 
				+        7: 'left_elbow',
			
 
				+        8: 'right_elbow',
			
 
				+        9: 'left_wrist',
			
 
				+        10: 'right_wrist',
			
 
				+        11: 'left_hip',
			
 
				+        12: 'right_hip',
			
 
				+        13: 'left_knee',
			
 
				+        14: 'right_knee',
			
 
				+        15: 'left_ankle',
			
 
				+        16: 'right_ankle'
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): Root path to the dataset.
			
 
				+        image_dir (str): Path to a directory where images are held.
			
 
				+        anno_path (str): Relative path to the annotation file.
			
 
				+        num_joints (int): Keypoint numbers
			
 
				+        trainsize (list):[w, h] Image target size
			
 
				+        transform (composed(operators)): A sequence of data transforms.
			
 
				+        bbox_file (str): Path to a detection bbox file
			
 
				+            Default: None.
			
 
				+        use_gt_bbox (bool): Whether to use ground truth bbox
			
 
				+            Default: True.
			
 
				+        pixel_std (int): The pixel std of the scale
			
 
				+            Default: 200.
			
 
				+        image_thre (float): The threshold to filter the detection box
			
 
				+            Default: 0.0.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir,
			
 
				+                 image_dir,
			
 
				+                 anno_path,
			
 
				+                 num_joints,
			
 
				+                 trainsize,
			
 
				+                 transform=[],
			
 
				+                 bbox_file=None,
			
 
				+                 use_gt_bbox=True,
			
 
				+                 pixel_std=200,
			
 
				+                 image_thre=0.0):
			
 
				+        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
			
 
				+                         transform)
			
 
				+
			
 
				+        self.bbox_file = bbox_file
			
 
				+        self.use_gt_bbox = use_gt_bbox
			
 
				+        self.trainsize = trainsize
			
 
				+        self.pixel_std = pixel_std
			
 
				+        self.image_thre = image_thre
			
 
				+        self.dataset_name = 'coco'
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        if self.use_gt_bbox:
			
 
				+            self.db = self._load_coco_keypoint_annotations()
			
 
				+        else:
			
 
				+            self.db = self._load_coco_person_detection_results()
			
 
				+
			
 
				+    def _load_coco_keypoint_annotations(self):
			
 
				+        coco = COCO(self.get_anno())
			
 
				+        img_ids = coco.getImgIds()
			
 
				+        gt_db = []
			
 
				+        for index in img_ids:
			
 
				+            im_ann = coco.loadImgs(index)[0]
			
 
				+            width = im_ann['width']
			
 
				+            height = im_ann['height']
			
 
				+            file_name = im_ann['file_name']
			
 
				+            im_id = int(im_ann["id"])
			
 
				+
			
 
				+            annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
			
 
				+            objs = coco.loadAnns(annIds)
			
 
				+
			
 
				+            valid_objs = []
			
 
				+            for obj in objs:
			
 
				+                x, y, w, h = obj['bbox']
			
 
				+                x1 = np.max((0, x))
			
 
				+                y1 = np.max((0, y))
			
 
				+                x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
			
 
				+                y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
			
 
				+                if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
			
 
				+                    obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
			
 
				+                    valid_objs.append(obj)
			
 
				+            objs = valid_objs
			
 
				+
			
 
				+            rec = []
			
 
				+            for obj in objs:
			
 
				+                if max(obj['keypoints']) == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                joints = np.zeros(
			
 
				+                    (self.ann_info['num_joints'], 3), dtype=np.float)
			
 
				+                joints_vis = np.zeros(
			
 
				+                    (self.ann_info['num_joints'], 3), dtype=np.float)
			
 
				+                for ipt in range(self.ann_info['num_joints']):
			
 
				+                    joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
			
 
				+                    joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
			
 
				+                    joints[ipt, 2] = 0
			
 
				+                    t_vis = obj['keypoints'][ipt * 3 + 2]
			
 
				+                    if t_vis > 1:
			
 
				+                        t_vis = 1
			
 
				+                    joints_vis[ipt, 0] = t_vis
			
 
				+                    joints_vis[ipt, 1] = t_vis
			
 
				+                    joints_vis[ipt, 2] = 0
			
 
				+
			
 
				+                center, scale = self._box2cs(obj['clean_bbox'][:4])
			
 
				+                rec.append({
			
 
				+                    'image_file': os.path.join(self.img_prefix, file_name),
			
 
				+                    'center': center,
			
 
				+                    'scale': scale,
			
 
				+                    'joints': joints,
			
 
				+                    'joints_vis': joints_vis,
			
 
				+                    'im_id': im_id,
			
 
				+                })
			
 
				+            gt_db.extend(rec)
			
 
				+
			
 
				+        return gt_db
			
 
				+
			
 
				+    def _box2cs(self, box):
			
 
				+        x, y, w, h = box[:4]
			
 
				+        center = np.zeros((2), dtype=np.float32)
			
 
				+        center[0] = x + w * 0.5
			
 
				+        center[1] = y + h * 0.5
			
 
				+        aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
			
 
				+
			
 
				+        if w > aspect_ratio * h:
			
 
				+            h = w * 1.0 / aspect_ratio
			
 
				+        elif w < aspect_ratio * h:
			
 
				+            w = h * aspect_ratio
			
 
				+        scale = np.array(
			
 
				+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
			
 
				+            dtype=np.float32)
			
 
				+        if center[0] != -1:
			
 
				+            scale = scale * 1.25
			
 
				+
			
 
				+        return center, scale
			
 
				+
			
 
				+    def _load_coco_person_detection_results(self):
			
 
				+        all_boxes = None
			
 
				+        bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file)
			
 
				+        with open(bbox_file_path, 'r') as f:
			
 
				+            all_boxes = json.load(f)
			
 
				+
			
 
				+        if not all_boxes:
			
 
				+            print('=> Load %s fail!' % bbox_file_path)
			
 
				+            return None
			
 
				+
			
 
				+        kpt_db = []
			
 
				+        for n_img in range(0, len(all_boxes)):
			
 
				+            det_res = all_boxes[n_img]
			
 
				+            if det_res['category_id'] != 1:
			
 
				+                continue
			
 
				+            file_name = det_res[
			
 
				+                'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[
			
 
				+                    'image_id']
			
 
				+            img_name = os.path.join(self.img_prefix, file_name)
			
 
				+            box = det_res['bbox']
			
 
				+            score = det_res['score']
			
 
				+            im_id = int(det_res['image_id'])
			
 
				+
			
 
				+            if score < self.image_thre:
			
 
				+                continue
			
 
				+
			
 
				+            center, scale = self._box2cs(box)
			
 
				+            joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
			
 
				+            joints_vis = np.ones(
			
 
				+                (self.ann_info['num_joints'], 3), dtype=np.float)
			
 
				+            kpt_db.append({
			
 
				+                'image_file': img_name,
			
 
				+                'im_id': im_id,
			
 
				+                'center': center,
			
 
				+                'scale': scale,
			
 
				+                'score': score,
			
 
				+                'joints': joints,
			
 
				+                'joints_vis': joints_vis,
			
 
				+            })
			
 
				+
			
 
				+        return kpt_db
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
			
 
				+    """MPII dataset for topdown pose estimation.
			
 
				+
			
 
				+    The dataset loads raw features and apply specified transforms
			
 
				+    to return a dict containing the image tensors and other information.
			
 
				+
			
 
				+    MPII keypoint indexes::
			
 
				+
			
 
				+        0: 'right_ankle',
			
 
				+        1: 'right_knee',
			
 
				+        2: 'right_hip',
			
 
				+        3: 'left_hip',
			
 
				+        4: 'left_knee',
			
 
				+        5: 'left_ankle',
			
 
				+        6: 'pelvis',
			
 
				+        7: 'thorax',
			
 
				+        8: 'upper_neck',
			
 
				+        9: 'head_top',
			
 
				+        10: 'right_wrist',
			
 
				+        11: 'right_elbow',
			
 
				+        12: 'right_shoulder',
			
 
				+        13: 'left_shoulder',
			
 
				+        14: 'left_elbow',
			
 
				+        15: 'left_wrist',
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): Root path to the dataset.
			
 
				+        image_dir (str): Path to a directory where images are held.
			
 
				+        anno_path (str): Relative path to the annotation file.
			
 
				+        num_joints (int): Keypoint numbers
			
 
				+        trainsize (list):[w, h] Image target size
			
 
				+        transform (composed(operators)): A sequence of data transforms.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir,
			
 
				+                 image_dir,
			
 
				+                 anno_path,
			
 
				+                 num_joints,
			
 
				+                 transform=[]):
			
 
				+        super().__init__(dataset_dir, image_dir, anno_path, num_joints,
			
 
				+                         transform)
			
 
				+
			
 
				+        self.dataset_name = 'mpii'
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        with open(self.get_anno()) as anno_file:
			
 
				+            anno = json.load(anno_file)
			
 
				+
			
 
				+        gt_db = []
			
 
				+        for a in anno:
			
 
				+            image_name = a['image']
			
 
				+            im_id = a['image_id'] if 'image_id' in a else int(
			
 
				+                os.path.splitext(image_name)[0])
			
 
				+
			
 
				+            c = np.array(a['center'], dtype=np.float)
			
 
				+            s = np.array([a['scale'], a['scale']], dtype=np.float)
			
 
				+
			
 
				+            # Adjust center/scale slightly to avoid cropping limbs
			
 
				+            if c[0] != -1:
			
 
				+                c[1] = c[1] + 15 * s[1]
			
 
				+                s = s * 1.25
			
 
				+            c = c - 1
			
 
				+
			
 
				+            joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float)
			
 
				+            joints_vis = np.zeros(
			
 
				+                (self.ann_info['num_joints'], 3), dtype=np.float)
			
 
				+            if 'joints' in a:
			
 
				+                joints_ = np.array(a['joints'])
			
 
				+                joints_[:, 0:2] = joints_[:, 0:2] - 1
			
 
				+                joints_vis_ = np.array(a['joints_vis'])
			
 
				+                assert len(joints_) == self.ann_info[
			
 
				+                    'num_joints'], 'joint num diff: {} vs {}'.format(
			
 
				+                        len(joints_), self.ann_info['num_joints'])
			
 
				+
			
 
				+                joints[:, 0:2] = joints_[:, 0:2]
			
 
				+                joints_vis[:, 0] = joints_vis_[:]
			
 
				+                joints_vis[:, 1] = joints_vis_[:]
			
 
				+
			
 
				+            gt_db.append({
			
 
				+                'image_file': os.path.join(self.img_prefix, image_name),
			
 
				+                'im_id': im_id,
			
 
				+                'center': c,
			
 
				+                'scale': s,
			
 
				+                'joints': joints,
			
 
				+                'joints_vis': joints_vis
			
 
				+            })
			
 
				+        print("number length: {}".format(len(gt_db)))
			
 
				+        self.db = gt_db
			
--- a/paddlers/models/ppdet/data/source/mot.py
+++ b/paddlers/models/ppdet/data/source/mot.py
@@ -0,0 +1,636 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import cv2
			
 
				+import glob
			
 
				+import numpy as np
			
 
				+from collections import OrderedDict, defaultdict
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except Exception:
			
 
				+    from collections import Sequence
			
 
				+from .dataset import DetDataset, _make_dataset, _is_valid_file
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class MOTDataSet(DetDataset):
			
 
				+    """
			
 
				+    Load dataset with MOT format, only support single class MOT.
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): root directory for dataset.
			
 
				+        image_lists (str|list): mot data image lists, muiti-source mot dataset.
			
 
				+        data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				+        sample_num (int): number of samples to load, -1 means all.
			
 
				+
			
 
				+    Notes:
			
 
				+        MOT datasets root directory following this:
			
 
				+            dataset/mot
			
 
				+            |——————image_lists
			
 
				+            |        |——————caltech.train
			
 
				+            |        |——————caltech.val
			
 
				+            |        |——————mot16.train
			
 
				+            |        |——————mot17.train
			
 
				+            |        ......
			
 
				+            |——————Caltech
			
 
				+            |——————MOT17
			
 
				+            |——————......
			
 
				+
			
 
				+        All the MOT datasets have the following structure:
			
 
				+            Caltech
			
 
				+            |——————images
			
 
				+            |        └——————00001.jpg
			
 
				+            |        |—————— ...
			
 
				+            |        └——————0000N.jpg
			
 
				+            └——————labels_with_ids
			
 
				+                        └——————00001.txt
			
 
				+                        |—————— ...
			
 
				+                        └——————0000N.txt
			
 
				+            or
			
 
				+
			
 
				+            MOT17
			
 
				+            |——————images
			
 
				+            |        └——————train
			
 
				+            |        └——————test
			
 
				+            └——————labels_with_ids
			
 
				+                        └——————train
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir=None,
			
 
				+                 image_lists=[],
			
 
				+                 data_fields=['image'],
			
 
				+                 sample_num=-1):
			
 
				+        super(MOTDataSet, self).__init__(
			
 
				+            dataset_dir=dataset_dir,
			
 
				+            data_fields=data_fields,
			
 
				+            sample_num=sample_num)
			
 
				+        self.dataset_dir = dataset_dir
			
 
				+        self.image_lists = image_lists
			
 
				+        if isinstance(self.image_lists, str):
			
 
				+            self.image_lists = [self.image_lists]
			
 
				+        self.roidbs = None
			
 
				+        self.cname2cid = None
			
 
				+
			
 
				+    def get_anno(self):
			
 
				+        if self.image_lists == []:
			
 
				+            return
			
 
				+        # only used to get categories and metric
			
 
				+        # only check first data, but the label_list of all data should be same.
			
 
				+        first_mot_data = self.image_lists[0].split('.')[0]
			
 
				+        anno_file = os.path.join(self.dataset_dir, first_mot_data,
			
 
				+                                 'label_list.txt')
			
 
				+        return anno_file
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        self.img_files = OrderedDict()
			
 
				+        self.img_start_index = OrderedDict()
			
 
				+        self.label_files = OrderedDict()
			
 
				+        self.tid_num = OrderedDict()
			
 
				+        self.tid_start_index = OrderedDict()
			
 
				+
			
 
				+        img_index = 0
			
 
				+        for data_name in self.image_lists:
			
 
				+            # check every data image list
			
 
				+            image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
			
 
				+            assert os.path.isdir(image_lists_dir), \
			
 
				+                "The {} is not a directory.".format(image_lists_dir)
			
 
				+
			
 
				+            list_path = os.path.join(image_lists_dir, data_name)
			
 
				+            assert os.path.exists(list_path), \
			
 
				+                "The list path {} does not exist.".format(list_path)
			
 
				+
			
 
				+            # record img_files, filter out empty ones
			
 
				+            with open(list_path, 'r') as file:
			
 
				+                self.img_files[data_name] = file.readlines()
			
 
				+                self.img_files[data_name] = [
			
 
				+                    os.path.join(self.dataset_dir, x.strip())
			
 
				+                    for x in self.img_files[data_name]
			
 
				+                ]
			
 
				+                self.img_files[data_name] = list(
			
 
				+                    filter(lambda x: len(x) > 0, self.img_files[data_name]))
			
 
				+
			
 
				+                self.img_start_index[data_name] = img_index
			
 
				+                img_index += len(self.img_files[data_name])
			
 
				+
			
 
				+            # record label_files
			
 
				+            self.label_files[data_name] = [
			
 
				+                x.replace('images', 'labels_with_ids').replace(
			
 
				+                    '.png', '.txt').replace('.jpg', '.txt')
			
 
				+                for x in self.img_files[data_name]
			
 
				+            ]
			
 
				+
			
 
				+        for data_name, label_paths in self.label_files.items():
			
 
				+            max_index = -1
			
 
				+            for lp in label_paths:
			
 
				+                lb = np.loadtxt(lp)
			
 
				+                if len(lb) < 1:
			
 
				+                    continue
			
 
				+                if len(lb.shape) < 2:
			
 
				+                    img_max = lb[1]
			
 
				+                else:
			
 
				+                    img_max = np.max(lb[:, 1])
			
 
				+                if img_max > max_index:
			
 
				+                    max_index = img_max
			
 
				+            self.tid_num[data_name] = int(max_index + 1)
			
 
				+
			
 
				+        last_index = 0
			
 
				+        for i, (k, v) in enumerate(self.tid_num.items()):
			
 
				+            self.tid_start_index[k] = last_index
			
 
				+            last_index += v
			
 
				+
			
 
				+        self.num_identities_dict = defaultdict(int)
			
 
				+        self.num_identities_dict[0] = int(last_index + 1)  # single class
			
 
				+        self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
			
 
				+        self.total_imgs = sum(self.num_imgs_each_data)
			
 
				+
			
 
				+        logger.info('MOT dataset summary: ')
			
 
				+        logger.info(self.tid_num)
			
 
				+        logger.info('Total images: {}'.format(self.total_imgs))
			
 
				+        logger.info('Image start index: {}'.format(self.img_start_index))
			
 
				+        logger.info('Total identities: {}'.format(self.num_identities_dict[0]))
			
 
				+        logger.info('Identity start index: {}'.format(self.tid_start_index))
			
 
				+
			
 
				+        records = []
			
 
				+        cname2cid = mot_label()
			
 
				+
			
 
				+        for img_index in range(self.total_imgs):
			
 
				+            for i, (k, v) in enumerate(self.img_start_index.items()):
			
 
				+                if img_index >= v:
			
 
				+                    data_name = list(self.label_files.keys())[i]
			
 
				+                    start_index = v
			
 
				+            img_file = self.img_files[data_name][img_index - start_index]
			
 
				+            lbl_file = self.label_files[data_name][img_index - start_index]
			
 
				+
			
 
				+            if not os.path.exists(img_file):
			
 
				+                logger.warning(
			
 
				+                    'Illegal image file: {}, and it will be ignored'.format(
			
 
				+                        img_file))
			
 
				+                continue
			
 
				+            if not os.path.isfile(lbl_file):
			
 
				+                logger.warning(
			
 
				+                    'Illegal label file: {}, and it will be ignored'.format(
			
 
				+                        lbl_file))
			
 
				+                continue
			
 
				+
			
 
				+            labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
			
 
				+            # each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
			
 
				+
			
 
				+            cx, cy = labels[:, 2], labels[:, 3]
			
 
				+            w, h = labels[:, 4], labels[:, 5]
			
 
				+            gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
			
 
				+            gt_class = labels[:, 0:1].astype('int32')
			
 
				+            gt_score = np.ones((len(labels), 1)).astype('float32')
			
 
				+            gt_ide = labels[:, 1:2].astype('int32')
			
 
				+            for i, _ in enumerate(gt_ide):
			
 
				+                if gt_ide[i] > -1:
			
 
				+                    gt_ide[i] += self.tid_start_index[data_name]
			
 
				+
			
 
				+            mot_rec = {
			
 
				+                'im_file': img_file,
			
 
				+                'im_id': img_index,
			
 
				+            } if 'image' in self.data_fields else {}
			
 
				+
			
 
				+            gt_rec = {
			
 
				+                'gt_class': gt_class,
			
 
				+                'gt_score': gt_score,
			
 
				+                'gt_bbox': gt_bbox,
			
 
				+                'gt_ide': gt_ide,
			
 
				+            }
			
 
				+
			
 
				+            for k, v in gt_rec.items():
			
 
				+                if k in self.data_fields:
			
 
				+                    mot_rec[k] = v
			
 
				+
			
 
				+            records.append(mot_rec)
			
 
				+            if self.sample_num > 0 and img_index >= self.sample_num:
			
 
				+                break
			
 
				+        assert len(records) > 0, 'not found any mot record in %s' % (
			
 
				+            self.image_lists)
			
 
				+        self.roidbs, self.cname2cid = records, cname2cid
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class MCMOTDataSet(DetDataset):
			
 
				+    """
			
 
				+    Load dataset with MOT format, support multi-class MOT.
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): root directory for dataset.
			
 
				+        image_lists (list(str)): mcmot data image lists, muiti-source mcmot dataset.
			
 
				+        data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				+        label_list (str): if use_default_label is False, will load
			
 
				+            mapping between category and class index.
			
 
				+        sample_num (int): number of samples to load, -1 means all.
			
 
				+
			
 
				+    Notes:
			
 
				+        MCMOT datasets root directory following this:
			
 
				+            dataset/mot
			
 
				+            |——————image_lists
			
 
				+            |        |——————visdrone_mcmot.train
			
 
				+            |        |——————visdrone_mcmot.val
			
 
				+            visdrone_mcmot
			
 
				+            |——————images
			
 
				+            |        └——————train
			
 
				+            |        └——————val
			
 
				+            └——————labels_with_ids
			
 
				+                        └——————train
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir=None,
			
 
				+                 image_lists=[],
			
 
				+                 data_fields=['image'],
			
 
				+                 label_list=None,
			
 
				+                 sample_num=-1):
			
 
				+        super(MCMOTDataSet, self).__init__(
			
 
				+            dataset_dir=dataset_dir,
			
 
				+            data_fields=data_fields,
			
 
				+            sample_num=sample_num)
			
 
				+        self.dataset_dir = dataset_dir
			
 
				+        self.image_lists = image_lists
			
 
				+        if isinstance(self.image_lists, str):
			
 
				+            self.image_lists = [self.image_lists]
			
 
				+        self.label_list = label_list
			
 
				+        self.roidbs = None
			
 
				+        self.cname2cid = None
			
 
				+
			
 
				+    def get_anno(self):
			
 
				+        if self.image_lists == []:
			
 
				+            return
			
 
				+        # only used to get categories and metric
			
 
				+        # only check first data, but the label_list of all data should be same.
			
 
				+        first_mot_data = self.image_lists[0].split('.')[0]
			
 
				+        anno_file = os.path.join(self.dataset_dir, first_mot_data,
			
 
				+                                 'label_list.txt')
			
 
				+        return anno_file
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        self.img_files = OrderedDict()
			
 
				+        self.img_start_index = OrderedDict()
			
 
				+        self.label_files = OrderedDict()
			
 
				+        self.tid_num = OrderedDict()
			
 
				+        self.tid_start_idx_of_cls_ids = defaultdict(dict)  # for MCMOT
			
 
				+
			
 
				+        img_index = 0
			
 
				+        for data_name in self.image_lists:
			
 
				+            # check every data image list
			
 
				+            image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
			
 
				+            assert os.path.isdir(image_lists_dir), \
			
 
				+                "The {} is not a directory.".format(image_lists_dir)
			
 
				+
			
 
				+            list_path = os.path.join(image_lists_dir, data_name)
			
 
				+            assert os.path.exists(list_path), \
			
 
				+                "The list path {} does not exist.".format(list_path)
			
 
				+
			
 
				+            # record img_files, filter out empty ones
			
 
				+            with open(list_path, 'r') as file:
			
 
				+                self.img_files[data_name] = file.readlines()
			
 
				+                self.img_files[data_name] = [
			
 
				+                    os.path.join(self.dataset_dir, x.strip())
			
 
				+                    for x in self.img_files[data_name]
			
 
				+                ]
			
 
				+                self.img_files[data_name] = list(
			
 
				+                    filter(lambda x: len(x) > 0, self.img_files[data_name]))
			
 
				+
			
 
				+                self.img_start_index[data_name] = img_index
			
 
				+                img_index += len(self.img_files[data_name])
			
 
				+
			
 
				+            # record label_files
			
 
				+            self.label_files[data_name] = [
			
 
				+                x.replace('images', 'labels_with_ids').replace(
			
 
				+                    '.png', '.txt').replace('.jpg', '.txt')
			
 
				+                for x in self.img_files[data_name]
			
 
				+            ]
			
 
				+
			
 
				+        for data_name, label_paths in self.label_files.items():
			
 
				+            # using max_ids_dict rather than max_index
			
 
				+            max_ids_dict = defaultdict(int)
			
 
				+            for lp in label_paths:
			
 
				+                lb = np.loadtxt(lp)
			
 
				+                if len(lb) < 1:
			
 
				+                    continue
			
 
				+                lb = lb.reshape(-1, 6)
			
 
				+                for item in lb:
			
 
				+                    if item[1] > max_ids_dict[int(item[0])]:
			
 
				+                        # item[0]: cls_id
			
 
				+                        # item[1]: track id
			
 
				+                        max_ids_dict[int(item[0])] = int(item[1])
			
 
				+            # track id number
			
 
				+            self.tid_num[data_name] = max_ids_dict
			
 
				+
			
 
				+        last_idx_dict = defaultdict(int)
			
 
				+        for i, (k, v) in enumerate(self.tid_num.items()):  # each sub dataset
			
 
				+            for cls_id, id_num in v.items():  # v is a max_ids_dict
			
 
				+                self.tid_start_idx_of_cls_ids[k][cls_id] = last_idx_dict[
			
 
				+                    cls_id]
			
 
				+                last_idx_dict[cls_id] += id_num
			
 
				+
			
 
				+        self.num_identities_dict = defaultdict(int)
			
 
				+        for k, v in last_idx_dict.items():
			
 
				+            self.num_identities_dict[k] = int(v)  # total ids of each category
			
 
				+
			
 
				+        self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
			
 
				+        self.total_imgs = sum(self.num_imgs_each_data)
			
 
				+
			
 
				+        # cname2cid and cid2cname
			
 
				+        cname2cid = {}
			
 
				+        if self.label_list is not None:
			
 
				+            # if use label_list for multi source mix dataset,
			
 
				+            # please make sure label_list in the first sub_dataset at least.
			
 
				+            sub_dataset = self.image_lists[0].split('.')[0]
			
 
				+            label_path = os.path.join(self.dataset_dir, sub_dataset,
			
 
				+                                      self.label_list)
			
 
				+            if not os.path.exists(label_path):
			
 
				+                logger.info(
			
 
				+                    "Note: label_list {} does not exists, use VisDrone 10 classes labels as default.".
			
 
				+                    format(label_path))
			
 
				+                cname2cid = visdrone_mcmot_label()
			
 
				+            else:
			
 
				+                with open(label_path, 'r') as fr:
			
 
				+                    label_id = 0
			
 
				+                    for line in fr.readlines():
			
 
				+                        cname2cid[line.strip()] = label_id
			
 
				+                        label_id += 1
			
 
				+        else:
			
 
				+            cname2cid = visdrone_mcmot_label()
			
 
				+
			
 
				+        cid2cname = dict([(v, k) for (k, v) in cname2cid.items()])
			
 
				+
			
 
				+        logger.info('MCMOT dataset summary: ')
			
 
				+        logger.info(self.tid_num)
			
 
				+        logger.info('Total images: {}'.format(self.total_imgs))
			
 
				+        logger.info('Image start index: {}'.format(self.img_start_index))
			
 
				+
			
 
				+        logger.info('Total identities of each category: ')
			
 
				+        num_identities_dict = sorted(
			
 
				+            self.num_identities_dict.items(), key=lambda x: x[0])
			
 
				+        total_IDs_all_cats = 0
			
 
				+        for (k, v) in num_identities_dict:
			
 
				+            logger.info('Category {} [{}] has {} IDs.'.format(k, cid2cname[k],
			
 
				+                                                              v))
			
 
				+            total_IDs_all_cats += v
			
 
				+        logger.info('Total identities of all categories: {}'.format(
			
 
				+            total_IDs_all_cats))
			
 
				+
			
 
				+        logger.info('Identity start index of each category: ')
			
 
				+        for k, v in self.tid_start_idx_of_cls_ids.items():
			
 
				+            sorted_v = sorted(v.items(), key=lambda x: x[0])
			
 
				+            for (cls_id, start_idx) in sorted_v:
			
 
				+                logger.info('Start index of dataset {} category {:d} is {:d}'
			
 
				+                            .format(k, cls_id, start_idx))
			
 
				+
			
 
				+        records = []
			
 
				+        for img_index in range(self.total_imgs):
			
 
				+            for i, (k, v) in enumerate(self.img_start_index.items()):
			
 
				+                if img_index >= v:
			
 
				+                    data_name = list(self.label_files.keys())[i]
			
 
				+                    start_index = v
			
 
				+            img_file = self.img_files[data_name][img_index - start_index]
			
 
				+            lbl_file = self.label_files[data_name][img_index - start_index]
			
 
				+
			
 
				+            if not os.path.exists(img_file):
			
 
				+                logger.warning(
			
 
				+                    'Illegal image file: {}, and it will be ignored'.format(
			
 
				+                        img_file))
			
 
				+                continue
			
 
				+            if not os.path.isfile(lbl_file):
			
 
				+                logger.warning(
			
 
				+                    'Illegal label file: {}, and it will be ignored'.format(
			
 
				+                        lbl_file))
			
 
				+                continue
			
 
				+
			
 
				+            labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
			
 
				+            # each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
			
 
				+
			
 
				+            cx, cy = labels[:, 2], labels[:, 3]
			
 
				+            w, h = labels[:, 4], labels[:, 5]
			
 
				+            gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
			
 
				+            gt_class = labels[:, 0:1].astype('int32')
			
 
				+            gt_score = np.ones((len(labels), 1)).astype('float32')
			
 
				+            gt_ide = labels[:, 1:2].astype('int32')
			
 
				+            for i, _ in enumerate(gt_ide):
			
 
				+                if gt_ide[i] > -1:
			
 
				+                    cls_id = int(gt_class[i])
			
 
				+                    start_idx = self.tid_start_idx_of_cls_ids[data_name][
			
 
				+                        cls_id]
			
 
				+                    gt_ide[i] += start_idx
			
 
				+
			
 
				+            mot_rec = {
			
 
				+                'im_file': img_file,
			
 
				+                'im_id': img_index,
			
 
				+            } if 'image' in self.data_fields else {}
			
 
				+
			
 
				+            gt_rec = {
			
 
				+                'gt_class': gt_class,
			
 
				+                'gt_score': gt_score,
			
 
				+                'gt_bbox': gt_bbox,
			
 
				+                'gt_ide': gt_ide,
			
 
				+            }
			
 
				+
			
 
				+            for k, v in gt_rec.items():
			
 
				+                if k in self.data_fields:
			
 
				+                    mot_rec[k] = v
			
 
				+
			
 
				+            records.append(mot_rec)
			
 
				+            if self.sample_num > 0 and img_index >= self.sample_num:
			
 
				+                break
			
 
				+        assert len(records) > 0, 'not found any mot record in %s' % (
			
 
				+            self.image_lists)
			
 
				+        self.roidbs, self.cname2cid = records, cname2cid
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class MOTImageFolder(DetDataset):
			
 
				+    """
			
 
				+    Load MOT dataset with MOT format from image folder or video .
			
 
				+    Args:
			
 
				+        video_file (str): path of the video file, default ''.
			
 
				+        frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
			
 
				+        dataset_dir (str): root directory for dataset.
			
 
				+        keep_ori_im (bool): whether to keep original image, default False.
			
 
				+            Set True when used during MOT model inference while saving
			
 
				+            images or video, or used in DeepSORT.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 video_file=None,
			
 
				+                 frame_rate=-1,
			
 
				+                 dataset_dir=None,
			
 
				+                 data_root=None,
			
 
				+                 image_dir=None,
			
 
				+                 sample_num=-1,
			
 
				+                 keep_ori_im=False,
			
 
				+                 **kwargs):
			
 
				+        super(MOTImageFolder, self).__init__(
			
 
				+            dataset_dir, image_dir, sample_num=sample_num)
			
 
				+        self.video_file = video_file
			
 
				+        self.data_root = data_root
			
 
				+        self.keep_ori_im = keep_ori_im
			
 
				+        self._imid2path = {}
			
 
				+        self.roidbs = None
			
 
				+        self.frame_rate = frame_rate
			
 
				+
			
 
				+    def check_or_download_dataset(self):
			
 
				+        return
			
 
				+
			
 
				+    def parse_dataset(self, ):
			
 
				+        if not self.roidbs:
			
 
				+            if self.video_file is None:
			
 
				+                self.frame_rate = 30  # set as default if infer image folder
			
 
				+                self.roidbs = self._load_images()
			
 
				+            else:
			
 
				+                self.roidbs = self._load_video_images()
			
 
				+
			
 
				+    def _load_video_images(self):
			
 
				+        if self.frame_rate == -1:
			
 
				+            # if frame_rate is not set for video, use cv2.VideoCapture
			
 
				+            cap = cv2.VideoCapture(self.video_file)
			
 
				+            self.frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
			
 
				+
			
 
				+        extension = self.video_file.split('.')[-1]
			
 
				+        output_path = self.video_file.replace('.{}'.format(extension), '')
			
 
				+        frames_path = video2frames(self.video_file, output_path,
			
 
				+                                   self.frame_rate)
			
 
				+        self.video_frames = sorted(
			
 
				+            glob.glob(os.path.join(frames_path, '*.png')))
			
 
				+
			
 
				+        self.video_length = len(self.video_frames)
			
 
				+        logger.info('Length of the video: {:d} frames.'.format(
			
 
				+            self.video_length))
			
 
				+        ct = 0
			
 
				+        records = []
			
 
				+        for image in self.video_frames:
			
 
				+            assert image != '' and os.path.isfile(image), \
			
 
				+                    "Image {} not found".format(image)
			
 
				+            if self.sample_num > 0 and ct >= self.sample_num:
			
 
				+                break
			
 
				+            rec = {'im_id': np.array([ct]), 'im_file': image}
			
 
				+            if self.keep_ori_im:
			
 
				+                rec.update({'keep_ori_im': 1})
			
 
				+            self._imid2path[ct] = image
			
 
				+            ct += 1
			
 
				+            records.append(rec)
			
 
				+        assert len(records) > 0, "No image file found"
			
 
				+        return records
			
 
				+
			
 
				+    def _find_images(self):
			
 
				+        image_dir = self.image_dir
			
 
				+        if not isinstance(image_dir, Sequence):
			
 
				+            image_dir = [image_dir]
			
 
				+        images = []
			
 
				+        for im_dir in image_dir:
			
 
				+            if os.path.isdir(im_dir):
			
 
				+                im_dir = os.path.join(self.dataset_dir, im_dir)
			
 
				+                images.extend(_make_dataset(im_dir))
			
 
				+            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
			
 
				+                images.append(im_dir)
			
 
				+        return images
			
 
				+
			
 
				+    def _load_images(self):
			
 
				+        images = self._find_images()
			
 
				+        ct = 0
			
 
				+        records = []
			
 
				+        for image in images:
			
 
				+            assert image != '' and os.path.isfile(image), \
			
 
				+                    "Image {} not found".format(image)
			
 
				+            if self.sample_num > 0 and ct >= self.sample_num:
			
 
				+                break
			
 
				+            rec = {'im_id': np.array([ct]), 'im_file': image}
			
 
				+            if self.keep_ori_im:
			
 
				+                rec.update({'keep_ori_im': 1})
			
 
				+            self._imid2path[ct] = image
			
 
				+            ct += 1
			
 
				+            records.append(rec)
			
 
				+        assert len(records) > 0, "No image file found"
			
 
				+        return records
			
 
				+
			
 
				+    def get_imid2path(self):
			
 
				+        return self._imid2path
			
 
				+
			
 
				+    def set_images(self, images):
			
 
				+        self.image_dir = images
			
 
				+        self.roidbs = self._load_images()
			
 
				+
			
 
				+    def set_video(self, video_file, frame_rate):
			
 
				+        # update video_file and frame_rate by command line of tools/infer_mot.py
			
 
				+        self.video_file = video_file
			
 
				+        self.frame_rate = frame_rate
			
 
				+        assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \
			
 
				+                "wrong or unsupported file format: {}".format(self.video_file)
			
 
				+        self.roidbs = self._load_video_images()
			
 
				+
			
 
				+
			
 
				+def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
			
 
				+    return f.lower().endswith(extensions)
			
 
				+
			
 
				+
			
 
				+def video2frames(video_path, outpath, frame_rate, **kargs):
			
 
				+    def _dict2str(kargs):
			
 
				+        cmd_str = ''
			
 
				+        for k, v in kargs.items():
			
 
				+            cmd_str += (' ' + str(k) + ' ' + str(v))
			
 
				+        return cmd_str
			
 
				+
			
 
				+    ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
			
 
				+    vid_name = os.path.basename(video_path).split('.')[0]
			
 
				+    out_full_path = os.path.join(outpath, vid_name)
			
 
				+
			
 
				+    if not os.path.exists(out_full_path):
			
 
				+        os.makedirs(out_full_path)
			
 
				+
			
 
				+    # video file name
			
 
				+    outformat = os.path.join(out_full_path, '%08d.png')
			
 
				+
			
 
				+    cmd = ffmpeg
			
 
				+    cmd = ffmpeg + [
			
 
				+        ' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat
			
 
				+    ]
			
 
				+    cmd = ''.join(cmd) + _dict2str(kargs)
			
 
				+
			
 
				+    if os.system(cmd) != 0:
			
 
				+        raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
			
 
				+        sys.exit(-1)
			
 
				+
			
 
				+    sys.stdout.flush()
			
 
				+    return out_full_path
			
 
				+
			
 
				+
			
 
				+def mot_label():
			
 
				+    labels_map = {'person': 0}
			
 
				+    return labels_map
			
 
				+
			
 
				+
			
 
				+def visdrone_mcmot_label():
			
 
				+    labels_map = {
			
 
				+        'pedestrian': 0,
			
 
				+        'people': 1,
			
 
				+        'bicycle': 2,
			
 
				+        'car': 3,
			
 
				+        'van': 4,
			
 
				+        'truck': 5,
			
 
				+        'tricycle': 6,
			
 
				+        'awning-tricycle': 7,
			
 
				+        'bus': 8,
			
 
				+        'motor': 9,
			
 
				+    }
			
 
				+    return labels_map
			
--- a/paddlers/models/ppdet/data/source/sniper_coco.py
+++ b/paddlers/models/ppdet/data/source/sniper_coco.py
@@ -0,0 +1,191 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import cv2
			
 
				+import json
			
 
				+import copy
			
 
				+import numpy as np
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except Exception:
			
 
				+    from collections import Sequence
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from paddlers.models.ppdet.data.crop_utils.annotation_cropper import AnnoCropper
			
 
				+from .coco import COCODataSet
			
 
				+from .dataset import _make_dataset, _is_valid_file
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+
			
 
				+logger = setup_logger('sniper_coco_dataset')
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class SniperCOCODataSet(COCODataSet):
			
 
				+    """SniperCOCODataSet"""
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir=None,
			
 
				+                 image_dir=None,
			
 
				+                 anno_path=None,
			
 
				+                 proposals_file=None,
			
 
				+                 data_fields=['image'],
			
 
				+                 sample_num=-1,
			
 
				+                 load_crowd=False,
			
 
				+                 allow_empty=True,
			
 
				+                 empty_ratio=1.,
			
 
				+                 is_trainset=True,
			
 
				+                 image_target_sizes=[2000, 1000],
			
 
				+                 valid_box_ratio_ranges=[[-1, 0.1], [0.08, -1]],
			
 
				+                 chip_target_size=500,
			
 
				+                 chip_target_stride=200,
			
 
				+                 use_neg_chip=False,
			
 
				+                 max_neg_num_per_im=8,
			
 
				+                 max_per_img=-1,
			
 
				+                 nms_thresh=0.5):
			
 
				+        super(SniperCOCODataSet, self).__init__(
			
 
				+            dataset_dir=dataset_dir,
			
 
				+            image_dir=image_dir,
			
 
				+            anno_path=anno_path,
			
 
				+            data_fields=data_fields,
			
 
				+            sample_num=sample_num,
			
 
				+            load_crowd=load_crowd,
			
 
				+            allow_empty=allow_empty,
			
 
				+            empty_ratio=empty_ratio)
			
 
				+        self.proposals_file = proposals_file
			
 
				+        self.proposals = None
			
 
				+        self.anno_cropper = None
			
 
				+        self.is_trainset = is_trainset
			
 
				+        self.image_target_sizes = image_target_sizes
			
 
				+        self.valid_box_ratio_ranges = valid_box_ratio_ranges
			
 
				+        self.chip_target_size = chip_target_size
			
 
				+        self.chip_target_stride = chip_target_stride
			
 
				+        self.use_neg_chip = use_neg_chip
			
 
				+        self.max_neg_num_per_im = max_neg_num_per_im
			
 
				+        self.max_per_img = max_per_img
			
 
				+        self.nms_thresh = nms_thresh
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        if not hasattr(self, "roidbs"):
			
 
				+            super(SniperCOCODataSet, self).parse_dataset()
			
 
				+        if self.is_trainset:
			
 
				+            self._parse_proposals()
			
 
				+            self._merge_anno_proposals()
			
 
				+        self.ori_roidbs = copy.deepcopy(self.roidbs)
			
 
				+        self.init_anno_cropper()
			
 
				+        self.roidbs = self.generate_chips_roidbs(self.roidbs, self.is_trainset)
			
 
				+
			
 
				+    def set_proposals_file(self, file_path):
			
 
				+        self.proposals_file = file_path
			
 
				+
			
 
				+    def init_anno_cropper(self):
			
 
				+        logger.info("Init AnnoCropper...")
			
 
				+        self.anno_cropper = AnnoCropper(
			
 
				+            image_target_sizes=self.image_target_sizes,
			
 
				+            valid_box_ratio_ranges=self.valid_box_ratio_ranges,
			
 
				+            chip_target_size=self.chip_target_size,
			
 
				+            chip_target_stride=self.chip_target_stride,
			
 
				+            use_neg_chip=self.use_neg_chip,
			
 
				+            max_neg_num_per_im=self.max_neg_num_per_im,
			
 
				+            max_per_img=self.max_per_img,
			
 
				+            nms_thresh=self.nms_thresh)
			
 
				+
			
 
				+    def generate_chips_roidbs(self, roidbs, is_trainset):
			
 
				+        if is_trainset:
			
 
				+            roidbs = self.anno_cropper.crop_anno_records(roidbs)
			
 
				+        else:
			
 
				+            roidbs = self.anno_cropper.crop_infer_anno_records(roidbs)
			
 
				+        return roidbs
			
 
				+
			
 
				+    def _parse_proposals(self):
			
 
				+        if self.proposals_file:
			
 
				+            self.proposals = {}
			
 
				+            logger.info("Parse proposals file:{}".format(self.proposals_file))
			
 
				+            with open(self.proposals_file, 'r') as f:
			
 
				+                proposals = json.load(f)
			
 
				+            for prop in proposals:
			
 
				+                image_id = prop["image_id"]
			
 
				+                if image_id not in self.proposals:
			
 
				+                    self.proposals[image_id] = []
			
 
				+                x, y, w, h = prop["bbox"]
			
 
				+                self.proposals[image_id].append([x, y, x + w, y + h])
			
 
				+
			
 
				+    def _merge_anno_proposals(self):
			
 
				+        assert self.roidbs
			
 
				+        if self.proposals and len(self.proposals.keys()) > 0:
			
 
				+            logger.info("merge proposals to annos")
			
 
				+            for id, record in enumerate(self.roidbs):
			
 
				+                image_id = int(record["im_id"])
			
 
				+                if image_id not in self.proposals.keys():
			
 
				+                    logger.info("image id :{} no proposals".format(image_id))
			
 
				+                record["proposals"] = np.array(
			
 
				+                    self.proposals.get(image_id, []), dtype=np.float32)
			
 
				+                self.roidbs[id] = record
			
 
				+
			
 
				+    def get_ori_roidbs(self):
			
 
				+        if not hasattr(self, "ori_roidbs"):
			
 
				+            return None
			
 
				+        return self.ori_roidbs
			
 
				+
			
 
				+    def get_roidbs(self):
			
 
				+        if not hasattr(self, "roidbs"):
			
 
				+            self.parse_dataset()
			
 
				+        return self.roidbs
			
 
				+
			
 
				+    def set_roidbs(self, roidbs):
			
 
				+        self.roidbs = roidbs
			
 
				+
			
 
				+    def check_or_download_dataset(self):
			
 
				+        return
			
 
				+
			
 
				+    def _parse(self):
			
 
				+        image_dir = self.image_dir
			
 
				+        if not isinstance(image_dir, Sequence):
			
 
				+            image_dir = [image_dir]
			
 
				+        images = []
			
 
				+        for im_dir in image_dir:
			
 
				+            if os.path.isdir(im_dir):
			
 
				+                im_dir = os.path.join(self.dataset_dir, im_dir)
			
 
				+                images.extend(_make_dataset(im_dir))
			
 
				+            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
			
 
				+                images.append(im_dir)
			
 
				+        return images
			
 
				+
			
 
				+    def _load_images(self):
			
 
				+        images = self._parse()
			
 
				+        ct = 0
			
 
				+        records = []
			
 
				+        for image in images:
			
 
				+            assert image != '' and os.path.isfile(image), \
			
 
				+                "Image {} not found".format(image)
			
 
				+            if self.sample_num > 0 and ct >= self.sample_num:
			
 
				+                break
			
 
				+            im = cv2.imread(image)
			
 
				+            h, w, c = im.shape
			
 
				+            rec = {'im_id': np.array([ct]), 'im_file': image, "h": h, "w": w}
			
 
				+            self._imid2path[ct] = image
			
 
				+            ct += 1
			
 
				+            records.append(rec)
			
 
				+        assert len(records) > 0, "No image file found"
			
 
				+        return records
			
 
				+
			
 
				+    def get_imid2path(self):
			
 
				+        return self._imid2path
			
 
				+
			
 
				+    def set_images(self, images):
			
 
				+        self._imid2path = {}
			
 
				+        self.image_dir = images
			
 
				+        self.roidbs = self._load_images()
			
--- a/paddlers/models/ppdet/data/source/voc.py
+++ b/paddlers/models/ppdet/data/source/voc.py
@@ -0,0 +1,231 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import numpy as np
			
 
				+
			
 
				+import xml.etree.ElementTree as ET
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+
			
 
				+from .dataset import DetDataset
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class VOCDataSet(DetDataset):
			
 
				+    """
			
 
				+    Load dataset with PascalVOC format.
			
 
				+
			
 
				+    Notes:
			
 
				+    `anno_path` must contains xml file and image file path for annotations.
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): root directory for dataset.
			
 
				+        image_dir (str): directory for images.
			
 
				+        anno_path (str): voc annotation file path.
			
 
				+        data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				+        sample_num (int): number of samples to load, -1 means all.
			
 
				+        label_list (str): if use_default_label is False, will load
			
 
				+            mapping between category and class index.
			
 
				+        allow_empty (bool): whether to load empty entry. False as default
			
 
				+        empty_ratio (float): the ratio of empty record number to total
			
 
				+            record's, if empty_ratio is out of [0. ,1.), do not sample the
			
 
				+            records and use all the empty entries. 1. as default
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir=None,
			
 
				+                 image_dir=None,
			
 
				+                 anno_path=None,
			
 
				+                 data_fields=['image'],
			
 
				+                 sample_num=-1,
			
 
				+                 label_list=None,
			
 
				+                 allow_empty=False,
			
 
				+                 empty_ratio=1.):
			
 
				+        super(VOCDataSet, self).__init__(
			
 
				+            dataset_dir=dataset_dir,
			
 
				+            image_dir=image_dir,
			
 
				+            anno_path=anno_path,
			
 
				+            data_fields=data_fields,
			
 
				+            sample_num=sample_num)
			
 
				+        self.label_list = label_list
			
 
				+        self.allow_empty = allow_empty
			
 
				+        self.empty_ratio = empty_ratio
			
 
				+
			
 
				+    def _sample_empty(self, records, num):
			
 
				+        # if empty_ratio is out of [0. ,1.), do not sample the records
			
 
				+        if self.empty_ratio < 0. or self.empty_ratio >= 1.:
			
 
				+            return records
			
 
				+        import random
			
 
				+        sample_num = min(
			
 
				+            int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
			
 
				+        records = random.sample(records, sample_num)
			
 
				+        return records
			
 
				+
			
 
				+    def parse_dataset(self, ):
			
 
				+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
			
 
				+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
			
 
				+
			
 
				+        # mapping category name to class id
			
 
				+        # first_class:0, second_class:1, ...
			
 
				+        records = []
			
 
				+        empty_records = []
			
 
				+        ct = 0
			
 
				+        cname2cid = {}
			
 
				+        if self.label_list:
			
 
				+            label_path = os.path.join(self.dataset_dir, self.label_list)
			
 
				+            if not os.path.exists(label_path):
			
 
				+                raise ValueError("label_list {} does not exists".format(
			
 
				+                    label_path))
			
 
				+            with open(label_path, 'r') as fr:
			
 
				+                label_id = 0
			
 
				+                for line in fr.readlines():
			
 
				+                    cname2cid[line.strip()] = label_id
			
 
				+                    label_id += 1
			
 
				+        else:
			
 
				+            cname2cid = pascalvoc_label()
			
 
				+
			
 
				+        with open(anno_path, 'r') as fr:
			
 
				+            while True:
			
 
				+                line = fr.readline()
			
 
				+                if not line:
			
 
				+                    break
			
 
				+                img_file, xml_file = [os.path.join(image_dir, x) \
			
 
				+                        for x in line.strip().split()[:2]]
			
 
				+                if not os.path.exists(img_file):
			
 
				+                    logger.warning(
			
 
				+                        'Illegal image file: {}, and it will be ignored'.
			
 
				+                        format(img_file))
			
 
				+                    continue
			
 
				+                if not os.path.isfile(xml_file):
			
 
				+                    logger.warning(
			
 
				+                        'Illegal xml file: {}, and it will be ignored'.format(
			
 
				+                            xml_file))
			
 
				+                    continue
			
 
				+                tree = ET.parse(xml_file)
			
 
				+                if tree.find('id') is None:
			
 
				+                    im_id = np.array([ct])
			
 
				+                else:
			
 
				+                    im_id = np.array([int(tree.find('id').text)])
			
 
				+
			
 
				+                objs = tree.findall('object')
			
 
				+                im_w = float(tree.find('size').find('width').text)
			
 
				+                im_h = float(tree.find('size').find('height').text)
			
 
				+                if im_w < 0 or im_h < 0:
			
 
				+                    logger.warning(
			
 
				+                        'Illegal width: {} or height: {} in annotation, '
			
 
				+                        'and {} will be ignored'.format(im_w, im_h, xml_file))
			
 
				+                    continue
			
 
				+
			
 
				+                num_bbox, i = len(objs), 0
			
 
				+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
			
 
				+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				+                gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
			
 
				+                difficult = np.zeros((num_bbox, 1), dtype=np.int32)
			
 
				+                for obj in objs:
			
 
				+                    cname = obj.find('name').text
			
 
				+
			
 
				+                    # user dataset may not contain difficult field
			
 
				+                    _difficult = obj.find('difficult')
			
 
				+                    _difficult = int(
			
 
				+                        _difficult.text) if _difficult is not None else 0
			
 
				+
			
 
				+                    x1 = float(obj.find('bndbox').find('xmin').text)
			
 
				+                    y1 = float(obj.find('bndbox').find('ymin').text)
			
 
				+                    x2 = float(obj.find('bndbox').find('xmax').text)
			
 
				+                    y2 = float(obj.find('bndbox').find('ymax').text)
			
 
				+                    x1 = max(0, x1)
			
 
				+                    y1 = max(0, y1)
			
 
				+                    x2 = min(im_w - 1, x2)
			
 
				+                    y2 = min(im_h - 1, y2)
			
 
				+                    if x2 > x1 and y2 > y1:
			
 
				+                        gt_bbox[i, :] = [x1, y1, x2, y2]
			
 
				+                        gt_class[i, 0] = cname2cid[cname]
			
 
				+                        gt_score[i, 0] = 1.
			
 
				+                        difficult[i, 0] = _difficult
			
 
				+                        i += 1
			
 
				+                    else:
			
 
				+                        logger.warning(
			
 
				+                            'Found an invalid bbox in annotations: xml_file: {}'
			
 
				+                            ', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
			
 
				+                                xml_file, x1, y1, x2, y2))
			
 
				+                gt_bbox = gt_bbox[:i, :]
			
 
				+                gt_class = gt_class[:i, :]
			
 
				+                gt_score = gt_score[:i, :]
			
 
				+                difficult = difficult[:i, :]
			
 
				+
			
 
				+                voc_rec = {
			
 
				+                    'im_file': img_file,
			
 
				+                    'im_id': im_id,
			
 
				+                    'h': im_h,
			
 
				+                    'w': im_w
			
 
				+                } if 'image' in self.data_fields else {}
			
 
				+
			
 
				+                gt_rec = {
			
 
				+                    'gt_class': gt_class,
			
 
				+                    'gt_score': gt_score,
			
 
				+                    'gt_bbox': gt_bbox,
			
 
				+                    'difficult': difficult
			
 
				+                }
			
 
				+                for k, v in gt_rec.items():
			
 
				+                    if k in self.data_fields:
			
 
				+                        voc_rec[k] = v
			
 
				+
			
 
				+                if len(objs) == 0:
			
 
				+                    empty_records.append(voc_rec)
			
 
				+                else:
			
 
				+                    records.append(voc_rec)
			
 
				+
			
 
				+                ct += 1
			
 
				+                if self.sample_num > 0 and ct >= self.sample_num:
			
 
				+                    break
			
 
				+        assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
			
 
				+        logger.debug('{} samples in file {}'.format(ct, anno_path))
			
 
				+        if self.allow_empty and len(empty_records) > 0:
			
 
				+            empty_records = self._sample_empty(empty_records, len(records))
			
 
				+            records += empty_records
			
 
				+        self.roidbs, self.cname2cid = records, cname2cid
			
 
				+
			
 
				+    def get_label_list(self):
			
 
				+        return os.path.join(self.dataset_dir, self.label_list)
			
 
				+
			
 
				+
			
 
				+def pascalvoc_label():
			
 
				+    labels_map = {
			
 
				+        'aeroplane': 0,
			
 
				+        'bicycle': 1,
			
 
				+        'bird': 2,
			
 
				+        'boat': 3,
			
 
				+        'bottle': 4,
			
 
				+        'bus': 5,
			
 
				+        'car': 6,
			
 
				+        'cat': 7,
			
 
				+        'chair': 8,
			
 
				+        'cow': 9,
			
 
				+        'diningtable': 10,
			
 
				+        'dog': 11,
			
 
				+        'horse': 12,
			
 
				+        'motorbike': 13,
			
 
				+        'person': 14,
			
 
				+        'pottedplant': 15,
			
 
				+        'sheep': 16,
			
 
				+        'sofa': 17,
			
 
				+        'train': 18,
			
 
				+        'tvmonitor': 19
			
 
				+    }
			
 
				+    return labels_map
			
--- a/paddlers/models/ppdet/data/source/widerface.py
+++ b/paddlers/models/ppdet/data/source/widerface.py
@@ -0,0 +1,180 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import numpy as np
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from .dataset import DetDataset
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class WIDERFaceDataSet(DetDataset):
			
 
				+    """
			
 
				+    Load WiderFace records with 'anno_path'
			
 
				+
			
 
				+    Args:
			
 
				+        dataset_dir (str): root directory for dataset.
			
 
				+        image_dir (str): directory for images.
			
 
				+        anno_path (str): WiderFace annotation data.
			
 
				+        data_fields (list): key name of data dictionary, at least have 'image'.
			
 
				+        sample_num (int): number of samples to load, -1 means all.
			
 
				+        with_lmk (bool): whether to load face landmark keypoint labels.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dataset_dir=None,
			
 
				+                 image_dir=None,
			
 
				+                 anno_path=None,
			
 
				+                 data_fields=['image'],
			
 
				+                 sample_num=-1,
			
 
				+                 with_lmk=False):
			
 
				+        super(WIDERFaceDataSet, self).__init__(
			
 
				+            dataset_dir=dataset_dir,
			
 
				+            image_dir=image_dir,
			
 
				+            anno_path=anno_path,
			
 
				+            data_fields=data_fields,
			
 
				+            sample_num=sample_num,
			
 
				+            with_lmk=with_lmk)
			
 
				+        self.anno_path = anno_path
			
 
				+        self.sample_num = sample_num
			
 
				+        self.roidbs = None
			
 
				+        self.cname2cid = None
			
 
				+        self.with_lmk = with_lmk
			
 
				+
			
 
				+    def parse_dataset(self):
			
 
				+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
			
 
				+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
			
 
				+
			
 
				+        txt_file = anno_path
			
 
				+
			
 
				+        records = []
			
 
				+        ct = 0
			
 
				+        file_lists = self._load_file_list(txt_file)
			
 
				+        cname2cid = widerface_label()
			
 
				+
			
 
				+        for item in file_lists:
			
 
				+            im_fname = item[0]
			
 
				+            im_id = np.array([ct])
			
 
				+            gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32)
			
 
				+            gt_class = np.zeros((len(item) - 1, 1), dtype=np.int32)
			
 
				+            gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32)
			
 
				+            lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32)
			
 
				+            for index_box in range(len(item)):
			
 
				+                if index_box < 1:
			
 
				+                    continue
			
 
				+                gt_bbox[index_box - 1] = item[index_box][0]
			
 
				+                if self.with_lmk:
			
 
				+                    gt_lmk_labels[index_box - 1] = item[index_box][1]
			
 
				+                    lmk_ignore_flag[index_box - 1] = item[index_box][2]
			
 
				+            im_fname = os.path.join(image_dir,
			
 
				+                                    im_fname) if image_dir else im_fname
			
 
				+            widerface_rec = {
			
 
				+                'im_file': im_fname,
			
 
				+                'im_id': im_id,
			
 
				+            } if 'image' in self.data_fields else {}
			
 
				+            gt_rec = {
			
 
				+                'gt_bbox': gt_bbox,
			
 
				+                'gt_class': gt_class,
			
 
				+            }
			
 
				+            for k, v in gt_rec.items():
			
 
				+                if k in self.data_fields:
			
 
				+                    widerface_rec[k] = v
			
 
				+            if self.with_lmk:
			
 
				+                widerface_rec['gt_keypoint'] = gt_lmk_labels
			
 
				+                widerface_rec['keypoint_ignore'] = lmk_ignore_flag
			
 
				+
			
 
				+            if len(item) != 0:
			
 
				+                records.append(widerface_rec)
			
 
				+
			
 
				+            ct += 1
			
 
				+            if self.sample_num > 0 and ct >= self.sample_num:
			
 
				+                break
			
 
				+        assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
			
 
				+        logger.debug('{} samples in file {}'.format(ct, anno_path))
			
 
				+        self.roidbs, self.cname2cid = records, cname2cid
			
 
				+
			
 
				+    def _load_file_list(self, input_txt):
			
 
				+        with open(input_txt, 'r') as f_dir:
			
 
				+            lines_input_txt = f_dir.readlines()
			
 
				+
			
 
				+        file_dict = {}
			
 
				+        num_class = 0
			
 
				+        exts = ['jpg', 'jpeg', 'png', 'bmp']
			
 
				+        exts += [ext.upper() for ext in exts]
			
 
				+        for i in range(len(lines_input_txt)):
			
 
				+            line_txt = lines_input_txt[i].strip('\n\t\r')
			
 
				+            split_str = line_txt.split(' ')
			
 
				+            if len(split_str) == 1:
			
 
				+                img_file_name = os.path.split(split_str[0])[1]
			
 
				+                split_txt = img_file_name.split('.')
			
 
				+                if len(split_txt) < 2:
			
 
				+                    continue
			
 
				+                elif split_txt[-1] in exts:
			
 
				+                    if i != 0:
			
 
				+                        num_class += 1
			
 
				+                    file_dict[num_class] = [line_txt]
			
 
				+            else:
			
 
				+                if len(line_txt) <= 6:
			
 
				+                    continue
			
 
				+                result_boxs = []
			
 
				+                xmin = float(split_str[0])
			
 
				+                ymin = float(split_str[1])
			
 
				+                w = float(split_str[2])
			
 
				+                h = float(split_str[3])
			
 
				+                # Filter out wrong labels
			
 
				+                if w < 0 or h < 0:
			
 
				+                    logger.warning('Illegal box with w: {}, h: {} in '
			
 
				+                                   'img: {}, and it will be ignored'.format(
			
 
				+                                       w, h, file_dict[num_class][0]))
			
 
				+                    continue
			
 
				+                xmin = max(0, xmin)
			
 
				+                ymin = max(0, ymin)
			
 
				+                xmax = xmin + w
			
 
				+                ymax = ymin + h
			
 
				+                gt_bbox = [xmin, ymin, xmax, ymax]
			
 
				+                result_boxs.append(gt_bbox)
			
 
				+                if self.with_lmk:
			
 
				+                    assert len(split_str) > 18, 'When `with_lmk=True`, the number' \
			
 
				+                            'of characters per line in the annotation file should' \
			
 
				+                            'exceed 18.'
			
 
				+                    lmk0_x = float(split_str[5])
			
 
				+                    lmk0_y = float(split_str[6])
			
 
				+                    lmk1_x = float(split_str[8])
			
 
				+                    lmk1_y = float(split_str[9])
			
 
				+                    lmk2_x = float(split_str[11])
			
 
				+                    lmk2_y = float(split_str[12])
			
 
				+                    lmk3_x = float(split_str[14])
			
 
				+                    lmk3_y = float(split_str[15])
			
 
				+                    lmk4_x = float(split_str[17])
			
 
				+                    lmk4_y = float(split_str[18])
			
 
				+                    lmk_ignore_flag = 0 if lmk0_x == -1 else 1
			
 
				+                    gt_lmk_label = [
			
 
				+                        lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x,
			
 
				+                        lmk3_y, lmk4_x, lmk4_y
			
 
				+                    ]
			
 
				+                    result_boxs.append(gt_lmk_label)
			
 
				+                    result_boxs.append(lmk_ignore_flag)
			
 
				+                file_dict[num_class].append(result_boxs)
			
 
				+
			
 
				+        return list(file_dict.values())
			
 
				+
			
 
				+
			
 
				+def widerface_label():
			
 
				+    labels_map = {'face': 0}
			
 
				+    return labels_map
			
--- a/paddlers/models/ppdet/data/transform/__init__.py
+++ b/paddlers/models/ppdet/data/transform/__init__.py
@@ -0,0 +1,28 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import operators
			
 
				+from . import batch_operators
			
 
				+from . import keypoint_operators
			
 
				+from . import mot_operators
			
 
				+
			
 
				+from .operators import *
			
 
				+from .batch_operators import *
			
 
				+from .keypoint_operators import *
			
 
				+from .mot_operators import *
			
 
				+
			
 
				+__all__ = []
			
 
				+__all__ += registered_ops
			
 
				+__all__ += keypoint_operators.__all__
			
 
				+__all__ += mot_operators.__all__
			
--- a/paddlers/models/ppdet/data/transform/atss_assigner.py
+++ b/paddlers/models/ppdet/data/transform/atss_assigner.py
@@ -0,0 +1,270 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# The code is based on:
			
 
				+# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
			
 
				+    """Calculate overlap between two set of bboxes.
			
 
				+    If ``is_aligned `` is ``False``, then calculate the overlaps between each
			
 
				+    bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
			
 
				+    pair of bboxes1 and bboxes2.
			
 
				+    Args:
			
 
				+        bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
			
 
				+        bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
			
 
				+            B indicates the batch dim, in shape (B1, B2, ..., Bn).
			
 
				+            If ``is_aligned `` is ``True``, then m and n must be equal.
			
 
				+        mode (str): "iou" (intersection over union) or "iof" (intersection over
			
 
				+            foreground).
			
 
				+        is_aligned (bool, optional): If True, then m and n must be equal.
			
 
				+            Default False.
			
 
				+        eps (float, optional): A value added to the denominator for numerical
			
 
				+            stability. Default 1e-6.
			
 
				+    Returns:
			
 
				+        Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
			
 
				+    """
			
 
				+    assert mode in ['iou', 'iof', 'giou'], 'Unsupported mode {}'.format(mode)
			
 
				+    # Either the boxes are empty or the length of boxes's last dimenstion is 4
			
 
				+    assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
			
 
				+    assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
			
 
				+
			
 
				+    # Batch dim must be the same
			
 
				+    # Batch dim: (B1, B2, ... Bn)
			
 
				+    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
			
 
				+    batch_shape = bboxes1.shape[:-2]
			
 
				+
			
 
				+    rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
			
 
				+    cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
			
 
				+    if is_aligned:
			
 
				+        assert rows == cols
			
 
				+
			
 
				+    if rows * cols == 0:
			
 
				+        if is_aligned:
			
 
				+            return np.random.random(batch_shape + (rows, ))
			
 
				+        else:
			
 
				+            return np.random.random(batch_shape + (rows, cols))
			
 
				+
			
 
				+    area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
			
 
				+        bboxes1[..., 3] - bboxes1[..., 1])
			
 
				+    area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
			
 
				+        bboxes2[..., 3] - bboxes2[..., 1])
			
 
				+
			
 
				+    if is_aligned:
			
 
				+        lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2])  # [B, rows, 2]
			
 
				+        rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:])  # [B, rows, 2]
			
 
				+
			
 
				+        wh = (rb - lt).clip(min=0)  # [B, rows, 2]
			
 
				+        overlap = wh[..., 0] * wh[..., 1]
			
 
				+
			
 
				+        if mode in ['iou', 'giou']:
			
 
				+            union = area1 + area2 - overlap
			
 
				+        else:
			
 
				+            union = area1
			
 
				+        if mode == 'giou':
			
 
				+            enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
			
 
				+            enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
			
 
				+    else:
			
 
				+        lt = np.maximum(bboxes1[..., :, None, :2],
			
 
				+                        bboxes2[..., None, :, :2])  # [B, rows, cols, 2]
			
 
				+        rb = np.minimum(bboxes1[..., :, None, 2:],
			
 
				+                        bboxes2[..., None, :, 2:])  # [B, rows, cols, 2]
			
 
				+
			
 
				+        wh = (rb - lt).clip(min=0)  # [B, rows, cols, 2]
			
 
				+        overlap = wh[..., 0] * wh[..., 1]
			
 
				+
			
 
				+        if mode in ['iou', 'giou']:
			
 
				+            union = area1[..., None] + area2[..., None, :] - overlap
			
 
				+        else:
			
 
				+            union = area1[..., None]
			
 
				+        if mode == 'giou':
			
 
				+            enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
			
 
				+                                     bboxes2[..., None, :, :2])
			
 
				+            enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
			
 
				+                                     bboxes2[..., None, :, 2:])
			
 
				+
			
 
				+    eps = np.array([eps])
			
 
				+    union = np.maximum(union, eps)
			
 
				+    ious = overlap / union
			
 
				+    if mode in ['iou', 'iof']:
			
 
				+        return ious
			
 
				+    # calculate gious
			
 
				+    enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
			
 
				+    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
			
 
				+    enclose_area = np.maximum(enclose_area, eps)
			
 
				+    gious = ious - (enclose_area - union) / enclose_area
			
 
				+    return gious
			
 
				+
			
 
				+
			
 
				+def topk_(input, k, axis=1, largest=True):
			
 
				+    x = -input if largest else input
			
 
				+    if axis == 0:
			
 
				+        row_index = np.arange(input.shape[1 - axis])
			
 
				+        topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
			
 
				+        topk_data = x[topk_index, row_index]
			
 
				+
			
 
				+        topk_index_sort = np.argsort(topk_data, axis=axis)
			
 
				+        topk_data_sort = topk_data[topk_index_sort, row_index]
			
 
				+        topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
			
 
				+    else:
			
 
				+        column_index = np.arange(x.shape[1 - axis])[:, None]
			
 
				+        topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
			
 
				+        topk_data = x[column_index, topk_index]
			
 
				+        topk_data = -topk_data if largest else topk_data
			
 
				+        topk_index_sort = np.argsort(topk_data, axis=axis)
			
 
				+        topk_data_sort = topk_data[column_index, topk_index_sort]
			
 
				+        topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
			
 
				+
			
 
				+    return topk_data_sort, topk_index_sort
			
 
				+
			
 
				+
			
 
				+class ATSSAssigner(object):
			
 
				+    """Assign a corresponding gt bbox or background to each bbox.
			
 
				+
			
 
				+    Each proposals will be assigned with `0` or a positive integer
			
 
				+    indicating the ground truth index.
			
 
				+
			
 
				+    - 0: negative sample, no assigned gt
			
 
				+    - positive integer: positive sample, index (1-based) of assigned gt
			
 
				+
			
 
				+    Args:
			
 
				+        topk (float): number of bbox selected in each level
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, topk=9):
			
 
				+        self.topk = topk
			
 
				+
			
 
				+    def __call__(self,
			
 
				+                 bboxes,
			
 
				+                 num_level_bboxes,
			
 
				+                 gt_bboxes,
			
 
				+                 gt_bboxes_ignore=None,
			
 
				+                 gt_labels=None):
			
 
				+        """Assign gt to bboxes.
			
 
				+        The assignment is done in following steps
			
 
				+        1. compute iou between all bbox (bbox of all pyramid levels) and gt
			
 
				+        2. compute center distance between all bbox and gt
			
 
				+        3. on each pyramid level, for each gt, select k bbox whose center
			
 
				+           are closest to the gt center, so we total select k*l bbox as
			
 
				+           candidates for each gt
			
 
				+        4. get corresponding iou for the these candidates, and compute the
			
 
				+           mean and std, set mean + std as the iou threshold
			
 
				+        5. select these candidates whose iou are greater than or equal to
			
 
				+           the threshold as postive
			
 
				+        6. limit the positive sample's center in gt
			
 
				+        Args:
			
 
				+            bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
			
 
				+            num_level_bboxes (List): num of bboxes in each level
			
 
				+            gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
			
 
				+            gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
			
 
				+                labelled as `ignored`, e.g., crowd boxes in COCO.
			
 
				+            gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
			
 
				+        """
			
 
				+        bboxes = bboxes[:, :4]
			
 
				+        num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
			
 
				+
			
 
				+        # assign 0 by default
			
 
				+        assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
			
 
				+
			
 
				+        if num_gt == 0 or num_bboxes == 0:
			
 
				+            # No ground truth or boxes, return empty assignment
			
 
				+            max_overlaps = np.zeros((num_bboxes, ))
			
 
				+            if num_gt == 0:
			
 
				+                # No truth, assign everything to background
			
 
				+                assigned_gt_inds[:] = 0
			
 
				+            if not np.any(gt_labels):
			
 
				+                assigned_labels = None
			
 
				+            else:
			
 
				+                assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
			
 
				+            return assigned_gt_inds, max_overlaps
			
 
				+
			
 
				+        # compute iou between all bbox and gt
			
 
				+        overlaps = bbox_overlaps(bboxes, gt_bboxes)
			
 
				+        # compute center distance between all bbox and gt
			
 
				+        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
			
 
				+        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
			
 
				+        gt_points = np.stack((gt_cx, gt_cy), axis=1)
			
 
				+
			
 
				+        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
			
 
				+        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
			
 
				+        bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
			
 
				+
			
 
				+        distances = np.sqrt(
			
 
				+            np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
			
 
				+            .sum(-1))
			
 
				+
			
 
				+        # Selecting candidates based on the center distance
			
 
				+        candidate_idxs = []
			
 
				+        start_idx = 0
			
 
				+        for bboxes_per_level in num_level_bboxes:
			
 
				+            # on each pyramid level, for each gt,
			
 
				+            # select k bbox whose center are closest to the gt center
			
 
				+            end_idx = start_idx + bboxes_per_level
			
 
				+            distances_per_level = distances[start_idx:end_idx, :]
			
 
				+            selectable_k = min(self.topk, bboxes_per_level)
			
 
				+            _, topk_idxs_per_level = topk_(
			
 
				+                distances_per_level, selectable_k, axis=0, largest=False)
			
 
				+            candidate_idxs.append(topk_idxs_per_level + start_idx)
			
 
				+            start_idx = end_idx
			
 
				+        candidate_idxs = np.concatenate(candidate_idxs, axis=0)
			
 
				+
			
 
				+        # get corresponding iou for the these candidates, and compute the
			
 
				+        # mean and std, set mean + std as the iou threshold
			
 
				+        candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
			
 
				+        overlaps_mean_per_gt = candidate_overlaps.mean(0)
			
 
				+        overlaps_std_per_gt = candidate_overlaps.std(0)
			
 
				+        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
			
 
				+
			
 
				+        is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
			
 
				+
			
 
				+        # limit the positive sample's center in gt
			
 
				+        for gt_idx in range(num_gt):
			
 
				+            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
			
 
				+        ep_bboxes_cx = np.broadcast_to(
			
 
				+            bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
			
 
				+        ep_bboxes_cy = np.broadcast_to(
			
 
				+            bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
			
 
				+        candidate_idxs = candidate_idxs.reshape(-1)
			
 
				+
			
 
				+        # calculate the left, top, right, bottom distance between positive
			
 
				+        # bbox center and gt side
			
 
				+        l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
			
 
				+        t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
			
 
				+        r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
			
 
				+        b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
			
 
				+        is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
			
 
				+        is_pos = is_pos & is_in_gts
			
 
				+
			
 
				+        # if an anchor box is assigned to multiple gts,
			
 
				+        # the one with the highest IoU will be selected.
			
 
				+        overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
			
 
				+        index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
			
 
				+        overlaps_inf[index] = overlaps.T.reshape(-1)[index]
			
 
				+        overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
			
 
				+
			
 
				+        max_overlaps = overlaps_inf.max(axis=1)
			
 
				+        argmax_overlaps = overlaps_inf.argmax(axis=1)
			
 
				+        assigned_gt_inds[max_overlaps !=
			
 
				+                         -np.inf] = argmax_overlaps[max_overlaps !=
			
 
				+                                                    -np.inf] + 1
			
 
				+
			
 
				+        return assigned_gt_inds, max_overlaps
			
--- a/paddlers/models/ppdet/data/transform/autoaugment_utils.py
+++ b/paddlers/models/ppdet/data/transform/autoaugment_utils.py
@@ -0,0 +1,1591 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+# Reference:
			
 
				+#   https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/autoaugment_utils.py
			
 
				+"""AutoAugment util file."""
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import inspect
			
 
				+import math
			
 
				+from PIL import Image, ImageEnhance
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+from copy import deepcopy
			
 
				+
			
 
				+# This signifies the max integer that the controller RNN could predict for the
			
 
				+# augmentation scheme.
			
 
				+_MAX_LEVEL = 10.
			
 
				+
			
 
				+# Represents an invalid bounding box that is used for checking for padding
			
 
				+# lists of bounding box coordinates for a few augmentation operations
			
 
				+_INVALID_BOX = [[-1.0, -1.0, -1.0, -1.0]]
			
 
				+
			
 
				+
			
 
				+def policy_v0():
			
 
				+    """Autoaugment policy that was used in AutoAugment Detection Paper."""
			
 
				+    # Each tuple is an augmentation operation of the form
			
 
				+    # (operation, probability, magnitude). Each element in policy is a
			
 
				+    # sub-policy that will be applied sequentially on the image.
			
 
				+    policy = [
			
 
				+        [('TranslateX_BBox', 0.6, 4), ('Equalize', 0.8, 10)],
			
 
				+        [('TranslateY_Only_BBoxes', 0.2, 2), ('Cutout', 0.8, 8)],
			
 
				+        [('Sharpness', 0.0, 8), ('ShearX_BBox', 0.4, 0)],
			
 
				+        [('ShearY_BBox', 1.0, 2), ('TranslateY_Only_BBoxes', 0.6, 6)],
			
 
				+        [('Rotate_BBox', 0.6, 10), ('Color', 1.0, 6)],
			
 
				+    ]
			
 
				+    return policy
			
 
				+
			
 
				+
			
 
				+def policy_v1():
			
 
				+    """Autoaugment policy that was used in AutoAugment Detection Paper."""
			
 
				+    # Each tuple is an augmentation operation of the form
			
 
				+    # (operation, probability, magnitude). Each element in policy is a
			
 
				+    # sub-policy that will be applied sequentially on the image.
			
 
				+    policy = [
			
 
				+        [('TranslateX_BBox', 0.6, 4), ('Equalize', 0.8, 10)],
			
 
				+        [('TranslateY_Only_BBoxes', 0.2, 2), ('Cutout', 0.8, 8)],
			
 
				+        [('Sharpness', 0.0, 8), ('ShearX_BBox', 0.4, 0)],
			
 
				+        [('ShearY_BBox', 1.0, 2), ('TranslateY_Only_BBoxes', 0.6, 6)],
			
 
				+        [('Rotate_BBox', 0.6, 10), ('Color', 1.0, 6)],
			
 
				+        [('Color', 0.0, 0), ('ShearX_Only_BBoxes', 0.8, 4)],
			
 
				+        [('ShearY_Only_BBoxes', 0.8, 2), ('Flip_Only_BBoxes', 0.0, 10)],
			
 
				+        [('Equalize', 0.6, 10), ('TranslateX_BBox', 0.2, 2)],
			
 
				+        [('Color', 1.0, 10), ('TranslateY_Only_BBoxes', 0.4, 6)],
			
 
				+        [('Rotate_BBox', 0.8, 10), ('Contrast', 0.0, 10)],  # ,
			
 
				+        [('Cutout', 0.2, 2), ('Brightness', 0.8, 10)],
			
 
				+        [('Color', 1.0, 6), ('Equalize', 1.0, 2)],
			
 
				+        [('Cutout_Only_BBoxes', 0.4, 6), ('TranslateY_Only_BBoxes', 0.8, 2)],
			
 
				+        [('Color', 0.2, 8), ('Rotate_BBox', 0.8, 10)],
			
 
				+        [('Sharpness', 0.4, 4), ('TranslateY_Only_BBoxes', 0.0, 4)],
			
 
				+        [('Sharpness', 1.0, 4), ('SolarizeAdd', 0.4, 4)],
			
 
				+        [('Rotate_BBox', 1.0, 8), ('Sharpness', 0.2, 8)],
			
 
				+        [('ShearY_BBox', 0.6, 10), ('Equalize_Only_BBoxes', 0.6, 8)],
			
 
				+        [('ShearX_BBox', 0.2, 6), ('TranslateY_Only_BBoxes', 0.2, 10)],
			
 
				+        [('SolarizeAdd', 0.6, 8), ('Brightness', 0.8, 10)],
			
 
				+    ]
			
 
				+    return policy
			
 
				+
			
 
				+
			
 
				+def policy_vtest():
			
 
				+    """Autoaugment test policy for debugging."""
			
 
				+    # Each tuple is an augmentation operation of the form
			
 
				+    # (operation, probability, magnitude). Each element in policy is a
			
 
				+    # sub-policy that will be applied sequentially on the image.
			
 
				+    policy = [[('TranslateX_BBox', 1.0, 4), ('Equalize', 1.0, 10)], ]
			
 
				+    return policy
			
 
				+
			
 
				+
			
 
				+def policy_v2():
			
 
				+    """Additional policy that performs well on object detection."""
			
 
				+    # Each tuple is an augmentation operation of the form
			
 
				+    # (operation, probability, magnitude). Each element in policy is a
			
 
				+    # sub-policy that will be applied sequentially on the image.
			
 
				+    policy = [
			
 
				+        [('Color', 0.0, 6), ('Cutout', 0.6, 8), ('Sharpness', 0.4, 8)],
			
 
				+        [('Rotate_BBox', 0.4, 8), ('Sharpness', 0.4, 2),
			
 
				+         ('Rotate_BBox', 0.8, 10)],
			
 
				+        [('TranslateY_BBox', 1.0, 8), ('AutoContrast', 0.8, 2)],
			
 
				+        [('AutoContrast', 0.4, 6), ('ShearX_BBox', 0.8, 8),
			
 
				+         ('Brightness', 0.0, 10)],
			
 
				+        [('SolarizeAdd', 0.2, 6), ('Contrast', 0.0, 10),
			
 
				+         ('AutoContrast', 0.6, 0)],
			
 
				+        [('Cutout', 0.2, 0), ('Solarize', 0.8, 8), ('Color', 1.0, 4)],
			
 
				+        [('TranslateY_BBox', 0.0, 4), ('Equalize', 0.6, 8),
			
 
				+         ('Solarize', 0.0, 10)],
			
 
				+        [('TranslateY_BBox', 0.2, 2), ('ShearY_BBox', 0.8, 8),
			
 
				+         ('Rotate_BBox', 0.8, 8)],
			
 
				+        [('Cutout', 0.8, 8), ('Brightness', 0.8, 8), ('Cutout', 0.2, 2)],
			
 
				+        [('Color', 0.8, 4), ('TranslateY_BBox', 1.0, 6),
			
 
				+         ('Rotate_BBox', 0.6, 6)],
			
 
				+        [('Rotate_BBox', 0.6, 10), ('BBox_Cutout', 1.0, 4),
			
 
				+         ('Cutout', 0.2, 8)],
			
 
				+        [('Rotate_BBox', 0.0, 0), ('Equalize', 0.6, 6),
			
 
				+         ('ShearY_BBox', 0.6, 8)],
			
 
				+        [('Brightness', 0.8, 8), ('AutoContrast', 0.4, 2),
			
 
				+         ('Brightness', 0.2, 2)],
			
 
				+        [('TranslateY_BBox', 0.4, 8), ('Solarize', 0.4, 6),
			
 
				+         ('SolarizeAdd', 0.2, 10)],
			
 
				+        [('Contrast', 1.0, 10), ('SolarizeAdd', 0.2, 8), ('Equalize', 0.2, 4)],
			
 
				+    ]
			
 
				+    return policy
			
 
				+
			
 
				+
			
 
				+def policy_v3():
			
 
				+    """"Additional policy that performs well on object detection."""
			
 
				+    # Each tuple is an augmentation operation of the form
			
 
				+    # (operation, probability, magnitude). Each element in policy is a
			
 
				+    # sub-policy that will be applied sequentially on the image.
			
 
				+    policy = [
			
 
				+        [('Posterize', 0.8, 2), ('TranslateX_BBox', 1.0, 8)],
			
 
				+        [('BBox_Cutout', 0.2, 10), ('Sharpness', 1.0, 8)],
			
 
				+        [('Rotate_BBox', 0.6, 8), ('Rotate_BBox', 0.8, 10)],
			
 
				+        [('Equalize', 0.8, 10), ('AutoContrast', 0.2, 10)],
			
 
				+        [('SolarizeAdd', 0.2, 2), ('TranslateY_BBox', 0.2, 8)],
			
 
				+        [('Sharpness', 0.0, 2), ('Color', 0.4, 8)],
			
 
				+        [('Equalize', 1.0, 8), ('TranslateY_BBox', 1.0, 8)],
			
 
				+        [('Posterize', 0.6, 2), ('Rotate_BBox', 0.0, 10)],
			
 
				+        [('AutoContrast', 0.6, 0), ('Rotate_BBox', 1.0, 6)],
			
 
				+        [('Equalize', 0.0, 4), ('Cutout', 0.8, 10)],
			
 
				+        [('Brightness', 1.0, 2), ('TranslateY_BBox', 1.0, 6)],
			
 
				+        [('Contrast', 0.0, 2), ('ShearY_BBox', 0.8, 0)],
			
 
				+        [('AutoContrast', 0.8, 10), ('Contrast', 0.2, 10)],
			
 
				+        [('Rotate_BBox', 1.0, 10), ('Cutout', 1.0, 10)],
			
 
				+        [('SolarizeAdd', 0.8, 6), ('Equalize', 0.8, 8)],
			
 
				+    ]
			
 
				+    return policy
			
 
				+
			
 
				+
			
 
				+def _equal(val1, val2, eps=1e-8):
			
 
				+    return abs(val1 - val2) <= eps
			
 
				+
			
 
				+
			
 
				+def blend(image1, image2, factor):
			
 
				+    """Blend image1 and image2 using 'factor'.
			
 
				+
			
 
				+    Factor can be above 0.0.    A value of 0.0 means only image1 is used.
			
 
				+    A value of 1.0 means only image2 is used.    A value between 0.0 and
			
 
				+    1.0 means we linearly interpolate the pixel values between the two
			
 
				+    images.    A value greater than 1.0 "extrapolates" the difference
			
 
				+    between the two pixel values, and we clip the results to values
			
 
				+    between 0 and 255.
			
 
				+
			
 
				+    Args:
			
 
				+        image1: An image Tensor of type uint8.
			
 
				+        image2: An image Tensor of type uint8.
			
 
				+        factor: A floating point value above 0.0.
			
 
				+
			
 
				+    Returns:
			
 
				+        A blended image Tensor of type uint8.
			
 
				+    """
			
 
				+    if factor == 0.0:
			
 
				+        return image1
			
 
				+    if factor == 1.0:
			
 
				+        return image2
			
 
				+
			
 
				+    image1 = image1.astype(np.float32)
			
 
				+    image2 = image2.astype(np.float32)
			
 
				+
			
 
				+    difference = image2 - image1
			
 
				+    scaled = factor * difference
			
 
				+
			
 
				+    # Do addition in float.
			
 
				+    temp = image1 + scaled
			
 
				+
			
 
				+    # Interpolate
			
 
				+    if factor > 0.0 and factor < 1.0:
			
 
				+        # Interpolation means we always stay within 0 and 255.
			
 
				+        return temp.astype(np.uint8)
			
 
				+
			
 
				+    # Extrapolate:
			
 
				+    #
			
 
				+    # We need to clip and then cast.
			
 
				+    return np.clip(temp, a_min=0, a_max=255).astype(np.uint8)
			
 
				+
			
 
				+
			
 
				+def cutout(image, pad_size, replace=0):
			
 
				+    """Apply cutout (https://arxiv.org/abs/1708.04552) to image.
			
 
				+
			
 
				+    This operation applies a (2*pad_size x 2*pad_size) mask of zeros to
			
 
				+    a random location within `img`. The pixel values filled in will be of the
			
 
				+    value `replace`. The located where the mask will be applied is randomly
			
 
				+    chosen uniformly over the whole image.
			
 
				+
			
 
				+    Args:
			
 
				+        image: An image Tensor of type uint8.
			
 
				+        pad_size: Specifies how big the zero mask that will be generated is that
			
 
				+            is applied to the image. The mask will be of size
			
 
				+            (2*pad_size x 2*pad_size).
			
 
				+        replace: What pixel value to fill in the image in the area that has
			
 
				+            the cutout mask applied to it.
			
 
				+
			
 
				+    Returns:
			
 
				+        An image Tensor that is of type uint8.
			
 
				+    Example:
			
 
				+        img = cv2.imread( "/home/vis/gry/train/img_data/test.jpg", cv2.COLOR_BGR2RGB )
			
 
				+        new_img = cutout(img, pad_size=50, replace=0)
			
 
				+    """
			
 
				+    image_height, image_width = image.shape[0], image.shape[1]
			
 
				+
			
 
				+    cutout_center_height = np.random.randint(low=0, high=image_height)
			
 
				+    cutout_center_width = np.random.randint(low=0, high=image_width)
			
 
				+
			
 
				+    lower_pad = np.maximum(0, cutout_center_height - pad_size)
			
 
				+    upper_pad = np.maximum(0, image_height - cutout_center_height - pad_size)
			
 
				+    left_pad = np.maximum(0, cutout_center_width - pad_size)
			
 
				+    right_pad = np.maximum(0, image_width - cutout_center_width - pad_size)
			
 
				+
			
 
				+    cutout_shape = [
			
 
				+        image_height - (lower_pad + upper_pad),
			
 
				+        image_width - (left_pad + right_pad)
			
 
				+    ]
			
 
				+    padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]]
			
 
				+    mask = np.pad(np.zeros(
			
 
				+        cutout_shape, dtype=image.dtype),
			
 
				+                  padding_dims,
			
 
				+                  'constant',
			
 
				+                  constant_values=1)
			
 
				+    mask = np.expand_dims(mask, -1)
			
 
				+    mask = np.tile(mask, [1, 1, 3])
			
 
				+    image = np.where(
			
 
				+        np.equal(mask, 0),
			
 
				+        np.ones_like(
			
 
				+            image, dtype=image.dtype) * replace,
			
 
				+        image)
			
 
				+    return image.astype(np.uint8)
			
 
				+
			
 
				+
			
 
				+def solarize(image, threshold=128):
			
 
				+    # For each pixel in the image, select the pixel
			
 
				+    # if the value is less than the threshold.
			
 
				+    # Otherwise, subtract 255 from the pixel.
			
 
				+    return np.where(image < threshold, image, 255 - image)
			
 
				+
			
 
				+
			
 
				+def solarize_add(image, addition=0, threshold=128):
			
 
				+    # For each pixel in the image less than threshold
			
 
				+    # we add 'addition' amount to it and then clip the
			
 
				+    # pixel value to be between 0 and 255. The value
			
 
				+    # of 'addition' is between -128 and 128.
			
 
				+    added_image = image.astype(np.int64) + addition
			
 
				+    added_image = np.clip(added_image, a_min=0, a_max=255).astype(np.uint8)
			
 
				+    return np.where(image < threshold, added_image, image)
			
 
				+
			
 
				+
			
 
				+def color(image, factor):
			
 
				+    """use cv2 to deal"""
			
 
				+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
			
 
				+    degenerate = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
			
 
				+    return blend(degenerate, image, factor)
			
 
				+
			
 
				+
			
 
				+# refer to https://github.com/4uiiurz1/pytorch-auto-augment/blob/024b2eac4140c38df8342f09998e307234cafc80/auto_augment.py#L197
			
 
				+def contrast(img, factor):
			
 
				+    img = ImageEnhance.Contrast(Image.fromarray(img)).enhance(factor)
			
 
				+    return np.array(img)
			
 
				+
			
 
				+
			
 
				+def brightness(image, factor):
			
 
				+    """Equivalent of PIL Brightness."""
			
 
				+    degenerate = np.zeros_like(image)
			
 
				+    return blend(degenerate, image, factor)
			
 
				+
			
 
				+
			
 
				+def posterize(image, bits):
			
 
				+    """Equivalent of PIL Posterize."""
			
 
				+    shift = 8 - bits
			
 
				+    return np.left_shift(np.right_shift(image, shift), shift)
			
 
				+
			
 
				+
			
 
				+def rotate(image, degrees, replace):
			
 
				+    """Rotates the image by degrees either clockwise or counterclockwise.
			
 
				+
			
 
				+    Args:
			
 
				+        image: An image Tensor of type uint8.
			
 
				+        degrees: Float, a scalar angle in degrees to rotate all images by. If
			
 
				+            degrees is positive the image will be rotated clockwise otherwise it will
			
 
				+            be rotated counterclockwise.
			
 
				+        replace: A one or three value 1D tensor to fill empty pixels caused by
			
 
				+            the rotate operation.
			
 
				+
			
 
				+    Returns:
			
 
				+        The rotated version of image.
			
 
				+    """
			
 
				+    image = wrap(image)
			
 
				+    image = Image.fromarray(image)
			
 
				+    image = image.rotate(degrees)
			
 
				+    image = np.array(image, dtype=np.uint8)
			
 
				+    return unwrap(image, replace)
			
 
				+
			
 
				+
			
 
				+def random_shift_bbox(image,
			
 
				+                      bbox,
			
 
				+                      pixel_scaling,
			
 
				+                      replace,
			
 
				+                      new_min_bbox_coords=None):
			
 
				+    """Move the bbox and the image content to a slightly new random location.
			
 
				+
			
 
				+    Args:
			
 
				+        image: 3D uint8 Tensor.
			
 
				+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
			
 
				+            of type float that represents the normalized coordinates between 0 and 1.
			
 
				+            The potential values for the new min corner of the bbox will be between
			
 
				+            [old_min - pixel_scaling * bbox_height/2,
			
 
				+             old_min - pixel_scaling * bbox_height/2].
			
 
				+        pixel_scaling: A float between 0 and 1 that specifies the pixel range
			
 
				+            that the new bbox location will be sampled from.
			
 
				+        replace: A one or three value 1D tensor to fill empty pixels.
			
 
				+        new_min_bbox_coords: If not None, then this is a tuple that specifies the
			
 
				+            (min_y, min_x) coordinates of the new bbox. Normally this is randomly
			
 
				+            specified, but this allows it to be manually set. The coordinates are
			
 
				+            the absolute coordinates between 0 and image height/width and are int32.
			
 
				+
			
 
				+    Returns:
			
 
				+        The new image that will have the shifted bbox location in it along with
			
 
				+        the new bbox that contains the new coordinates.
			
 
				+    """
			
 
				+    # Obtains image height and width and create helper clip functions.
			
 
				+    image_height, image_width = image.shape[0], image.shape[1]
			
 
				+    image_height = float(image_height)
			
 
				+    image_width = float(image_width)
			
 
				+
			
 
				+    def clip_y(val):
			
 
				+        return np.clip(val, a_min=0, a_max=image_height - 1).astype(np.int32)
			
 
				+
			
 
				+    def clip_x(val):
			
 
				+        return np.clip(val, a_min=0, a_max=image_width - 1).astype(np.int32)
			
 
				+
			
 
				+    # Convert bbox to pixel coordinates.
			
 
				+    min_y = int(image_height * bbox[0])
			
 
				+    min_x = int(image_width * bbox[1])
			
 
				+    max_y = clip_y(image_height * bbox[2])
			
 
				+    max_x = clip_x(image_width * bbox[3])
			
 
				+
			
 
				+    bbox_height, bbox_width = (max_y - min_y + 1, max_x - min_x + 1)
			
 
				+    image_height = int(image_height)
			
 
				+    image_width = int(image_width)
			
 
				+
			
 
				+    # Select the new min/max bbox ranges that are used for sampling the
			
 
				+    # new min x/y coordinates of the shifted bbox.
			
 
				+    minval_y = clip_y(min_y - np.int32(pixel_scaling * float(bbox_height) /
			
 
				+                                       2.0))
			
 
				+    maxval_y = clip_y(min_y + np.int32(pixel_scaling * float(bbox_height) /
			
 
				+                                       2.0))
			
 
				+    minval_x = clip_x(min_x - np.int32(pixel_scaling * float(bbox_width) /
			
 
				+                                       2.0))
			
 
				+    maxval_x = clip_x(min_x + np.int32(pixel_scaling * float(bbox_width) /
			
 
				+                                       2.0))
			
 
				+
			
 
				+    # Sample and calculate the new unclipped min/max coordinates of the new bbox.
			
 
				+    if new_min_bbox_coords is None:
			
 
				+        unclipped_new_min_y = np.random.randint(
			
 
				+            low=minval_y, high=maxval_y, dtype=np.int32)
			
 
				+        unclipped_new_min_x = np.random.randint(
			
 
				+            low=minval_x, high=maxval_x, dtype=np.int32)
			
 
				+    else:
			
 
				+        unclipped_new_min_y, unclipped_new_min_x = (
			
 
				+            clip_y(new_min_bbox_coords[0]), clip_x(new_min_bbox_coords[1]))
			
 
				+    unclipped_new_max_y = unclipped_new_min_y + bbox_height - 1
			
 
				+    unclipped_new_max_x = unclipped_new_min_x + bbox_width - 1
			
 
				+
			
 
				+    # Determine if any of the new bbox was shifted outside the current image.
			
 
				+    # This is used for determining if any of the original bbox content should be
			
 
				+    # discarded.
			
 
				+    new_min_y, new_min_x, new_max_y, new_max_x = (
			
 
				+        clip_y(unclipped_new_min_y), clip_x(unclipped_new_min_x),
			
 
				+        clip_y(unclipped_new_max_y), clip_x(unclipped_new_max_x))
			
 
				+    shifted_min_y = (new_min_y - unclipped_new_min_y) + min_y
			
 
				+    shifted_max_y = max_y - (unclipped_new_max_y - new_max_y)
			
 
				+    shifted_min_x = (new_min_x - unclipped_new_min_x) + min_x
			
 
				+    shifted_max_x = max_x - (unclipped_new_max_x - new_max_x)
			
 
				+
			
 
				+    # Create the new bbox tensor by converting pixel integer values to floats.
			
 
				+    new_bbox = np.stack([
			
 
				+        float(new_min_y) / float(image_height), float(new_min_x) /
			
 
				+        float(image_width), float(new_max_y) / float(image_height),
			
 
				+        float(new_max_x) / float(image_width)
			
 
				+    ])
			
 
				+
			
 
				+    # Copy the contents in the bbox and fill the old bbox location
			
 
				+    # with gray (128).
			
 
				+    bbox_content = image[shifted_min_y:shifted_max_y + 1, shifted_min_x:
			
 
				+                         shifted_max_x + 1, :]
			
 
				+
			
 
				+    def mask_and_add_image(min_y_, min_x_, max_y_, max_x_, mask,
			
 
				+                           content_tensor, image_):
			
 
				+        """Applies mask to bbox region in image then adds content_tensor to it."""
			
 
				+        mask = np.pad(mask, [[min_y_, (image_height - 1) - max_y_],
			
 
				+                             [min_x_, (image_width - 1) - max_x_], [0, 0]],
			
 
				+                      'constant',
			
 
				+                      constant_values=1)
			
 
				+
			
 
				+        content_tensor = np.pad(content_tensor,
			
 
				+                                [[min_y_, (image_height - 1) - max_y_],
			
 
				+                                 [min_x_, (image_width - 1) - max_x_], [0, 0]],
			
 
				+                                'constant',
			
 
				+                                constant_values=0)
			
 
				+        return image_ * mask + content_tensor
			
 
				+
			
 
				+    # Zero out original bbox location.
			
 
				+    mask = np.zeros_like(image)[min_y:max_y + 1, min_x:max_x + 1, :]
			
 
				+    grey_tensor = np.zeros_like(mask) + replace[0]
			
 
				+    image = mask_and_add_image(min_y, min_x, max_y, max_x, mask, grey_tensor,
			
 
				+                               image)
			
 
				+
			
 
				+    # Fill in bbox content to new bbox location.
			
 
				+    mask = np.zeros_like(bbox_content)
			
 
				+    image = mask_and_add_image(new_min_y, new_min_x, new_max_y, new_max_x,
			
 
				+                               mask, bbox_content, image)
			
 
				+
			
 
				+    return image.astype(np.uint8), new_bbox
			
 
				+
			
 
				+
			
 
				+def _clip_bbox(min_y, min_x, max_y, max_x):
			
 
				+    """Clip bounding box coordinates between 0 and 1.
			
 
				+
			
 
				+    Args:
			
 
				+        min_y: Normalized bbox coordinate of type float between 0 and 1.
			
 
				+        min_x: Normalized bbox coordinate of type float between 0 and 1.
			
 
				+        max_y: Normalized bbox coordinate of type float between 0 and 1.
			
 
				+        max_x: Normalized bbox coordinate of type float between 0 and 1.
			
 
				+
			
 
				+    Returns:
			
 
				+        Clipped coordinate values between 0 and 1.
			
 
				+    """
			
 
				+    min_y = np.clip(min_y, a_min=0, a_max=1.0)
			
 
				+    min_x = np.clip(min_x, a_min=0, a_max=1.0)
			
 
				+    max_y = np.clip(max_y, a_min=0, a_max=1.0)
			
 
				+    max_x = np.clip(max_x, a_min=0, a_max=1.0)
			
 
				+    return min_y, min_x, max_y, max_x
			
 
				+
			
 
				+
			
 
				+def _check_bbox_area(min_y, min_x, max_y, max_x, delta=0.05):
			
 
				+    """Adjusts bbox coordinates to make sure the area is > 0.
			
 
				+
			
 
				+    Args:
			
 
				+        min_y: Normalized bbox coordinate of type float between 0 and 1.
			
 
				+        min_x: Normalized bbox coordinate of type float between 0 and 1.
			
 
				+        max_y: Normalized bbox coordinate of type float between 0 and 1.
			
 
				+        max_x: Normalized bbox coordinate of type float between 0 and 1.
			
 
				+        delta: Float, this is used to create a gap of size 2 * delta between
			
 
				+            bbox min/max coordinates that are the same on the boundary.
			
 
				+            This prevents the bbox from having an area of zero.
			
 
				+
			
 
				+    Returns:
			
 
				+        Tuple of new bbox coordinates between 0 and 1 that will now have a
			
 
				+        guaranteed area > 0.
			
 
				+    """
			
 
				+    height = max_y - min_y
			
 
				+    width = max_x - min_x
			
 
				+
			
 
				+    def _adjust_bbox_boundaries(min_coord, max_coord):
			
 
				+        # Make sure max is never 0 and min is never 1.
			
 
				+        max_coord = np.maximum(max_coord, 0.0 + delta)
			
 
				+        min_coord = np.minimum(min_coord, 1.0 - delta)
			
 
				+        return min_coord, max_coord
			
 
				+
			
 
				+    if _equal(height, 0):
			
 
				+        min_y, max_y = _adjust_bbox_boundaries(min_y, max_y)
			
 
				+
			
 
				+    if _equal(width, 0):
			
 
				+        min_x, max_x = _adjust_bbox_boundaries(min_x, max_x)
			
 
				+
			
 
				+    return min_y, min_x, max_y, max_x
			
 
				+
			
 
				+
			
 
				+def _scale_bbox_only_op_probability(prob):
			
 
				+    """Reduce the probability of the bbox-only operation.
			
 
				+
			
 
				+    Probability is reduced so that we do not distort the content of too many
			
 
				+    bounding boxes that are close to each other. The value of 3.0 was a chosen
			
 
				+    hyper parameter when designing the autoaugment algorithm that we found
			
 
				+    empirically to work well.
			
 
				+
			
 
				+    Args:
			
 
				+        prob: Float that is the probability of applying the bbox-only operation.
			
 
				+
			
 
				+    Returns:
			
 
				+        Reduced probability.
			
 
				+    """
			
 
				+    return prob / 3.0
			
 
				+
			
 
				+
			
 
				+def _apply_bbox_augmentation(image, bbox, augmentation_func, *args):
			
 
				+    """Applies augmentation_func to the subsection of image indicated by bbox.
			
 
				+
			
 
				+    Args:
			
 
				+        image: 3D uint8 Tensor.
			
 
				+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
			
 
				+            of type float that represents the normalized coordinates between 0 and 1.
			
 
				+        augmentation_func: Augmentation function that will be applied to the
			
 
				+            subsection of image.
			
 
				+        *args: Additional parameters that will be passed into augmentation_func
			
 
				+            when it is called.
			
 
				+
			
 
				+    Returns:
			
 
				+        A modified version of image, where the bbox location in the image will
			
 
				+        have `ugmentation_func applied to it.
			
 
				+    """
			
 
				+    image_height = image.shape[0]
			
 
				+    image_width = image.shape[1]
			
 
				+
			
 
				+    min_y = int(image_height * bbox[0])
			
 
				+    min_x = int(image_width * bbox[1])
			
 
				+    max_y = int(image_height * bbox[2])
			
 
				+    max_x = int(image_width * bbox[3])
			
 
				+
			
 
				+    # Clip to be sure the max values do not fall out of range.
			
 
				+    max_y = np.minimum(max_y, image_height - 1)
			
 
				+    max_x = np.minimum(max_x, image_width - 1)
			
 
				+
			
 
				+    # Get the sub-tensor that is the image within the bounding box region.
			
 
				+    bbox_content = image[min_y:max_y + 1, min_x:max_x + 1, :]
			
 
				+
			
 
				+    # Apply the augmentation function to the bbox portion of the image.
			
 
				+    augmented_bbox_content = augmentation_func(bbox_content, *args)
			
 
				+
			
 
				+    # Pad the augmented_bbox_content and the mask to match the shape of original
			
 
				+    # image.
			
 
				+    augmented_bbox_content = np.pad(
			
 
				+        augmented_bbox_content, [[min_y, (image_height - 1) - max_y],
			
 
				+                                 [min_x, (image_width - 1) - max_x], [0, 0]],
			
 
				+        'constant',
			
 
				+        constant_values=1)
			
 
				+
			
 
				+    # Create a mask that will be used to zero out a part of the original image.
			
 
				+    mask_tensor = np.zeros_like(bbox_content)
			
 
				+
			
 
				+    mask_tensor = np.pad(mask_tensor,
			
 
				+                         [[min_y, (image_height - 1) - max_y],
			
 
				+                          [min_x, (image_width - 1) - max_x], [0, 0]],
			
 
				+                         'constant',
			
 
				+                         constant_values=1)
			
 
				+    # Replace the old bbox content with the new augmented content.
			
 
				+    image = image * mask_tensor + augmented_bbox_content
			
 
				+    return image.astype(np.uint8)
			
 
				+
			
 
				+
			
 
				+def _concat_bbox(bbox, bboxes):
			
 
				+    """Helper function that concates bbox to bboxes along the first dimension."""
			
 
				+
			
 
				+    # Note if all elements in bboxes are -1 (_INVALID_BOX), then this means
			
 
				+    # we discard bboxes and start the bboxes Tensor with the current bbox.
			
 
				+    bboxes_sum_check = np.sum(bboxes)
			
 
				+    bbox = np.expand_dims(bbox, 0)
			
 
				+    # This check will be true when it is an _INVALID_BOX
			
 
				+    if _equal(bboxes_sum_check, -4):
			
 
				+        bboxes = bbox
			
 
				+    else:
			
 
				+        bboxes = np.concatenate([bboxes, bbox], 0)
			
 
				+    return bboxes
			
 
				+
			
 
				+
			
 
				+def _apply_bbox_augmentation_wrapper(image, bbox, new_bboxes, prob,
			
 
				+                                     augmentation_func, func_changes_bbox,
			
 
				+                                     *args):
			
 
				+    """Applies _apply_bbox_augmentation with probability prob.
			
 
				+
			
 
				+    Args:
			
 
				+        image: 3D uint8 Tensor.
			
 
				+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
			
 
				+            of type float that represents the normalized coordinates between 0 and 1.
			
 
				+        new_bboxes: 2D Tensor that is a list of the bboxes in the image after they
			
 
				+            have been altered by aug_func. These will only be changed when
			
 
				+            func_changes_bbox is set to true. Each bbox has 4 elements
			
 
				+            (min_y, min_x, max_y, max_x) of type float that are the normalized
			
 
				+            bbox coordinates between 0 and 1.
			
 
				+        prob: Float that is the probability of applying _apply_bbox_augmentation.
			
 
				+        augmentation_func: Augmentation function that will be applied to the
			
 
				+            subsection of image.
			
 
				+        func_changes_bbox: Boolean. Does augmentation_func return bbox in addition
			
 
				+            to image.
			
 
				+        *args: Additional parameters that will be passed into augmentation_func
			
 
				+            when it is called.
			
 
				+
			
 
				+    Returns:
			
 
				+        A tuple. Fist element is a modified version of image, where the bbox
			
 
				+        location in the image will have augmentation_func applied to it if it is
			
 
				+        chosen to be called with probability `prob`. The second element is a
			
 
				+        Tensor of Tensors of length 4 that will contain the altered bbox after
			
 
				+        applying augmentation_func.
			
 
				+    """
			
 
				+    should_apply_op = (np.random.rand() + prob >= 1)
			
 
				+    if func_changes_bbox:
			
 
				+        if should_apply_op:
			
 
				+            augmented_image, bbox = augmentation_func(image, bbox, *args)
			
 
				+        else:
			
 
				+            augmented_image, bbox = (image, bbox)
			
 
				+    else:
			
 
				+        if should_apply_op:
			
 
				+            augmented_image = _apply_bbox_augmentation(
			
 
				+                image, bbox, augmentation_func, *args)
			
 
				+        else:
			
 
				+            augmented_image = image
			
 
				+    new_bboxes = _concat_bbox(bbox, new_bboxes)
			
 
				+    return augmented_image.astype(np.uint8), new_bboxes
			
 
				+
			
 
				+
			
 
				+def _apply_multi_bbox_augmentation(image, bboxes, prob, aug_func,
			
 
				+                                   func_changes_bbox, *args):
			
 
				+    """Applies aug_func to the image for each bbox in bboxes.
			
 
				+
			
 
				+    Args:
			
 
				+        image: 3D uint8 Tensor.
			
 
				+        bboxes: 2D Tensor that is a list of the bboxes in the image. Each bbox
			
 
				+            has 4 elements (min_y, min_x, max_y, max_x) of type float.
			
 
				+        prob: Float that is the probability of applying aug_func to a specific
			
 
				+            bounding box within the image.
			
 
				+        aug_func: Augmentation function that will be applied to the
			
 
				+            subsections of image indicated by the bbox values in bboxes.
			
 
				+        func_changes_bbox: Boolean. Does augmentation_func return bbox in addition
			
 
				+            to image.
			
 
				+        *args: Additional parameters that will be passed into augmentation_func
			
 
				+            when it is called.
			
 
				+
			
 
				+    Returns:
			
 
				+        A modified version of image, where each bbox location in the image will
			
 
				+        have augmentation_func applied to it if it is chosen to be called with
			
 
				+        probability prob independently across all bboxes. Also the final
			
 
				+        bboxes are returned that will be unchanged if func_changes_bbox is set to
			
 
				+        false and if true, the new altered ones will be returned.
			
 
				+    """
			
 
				+    # Will keep track of the new altered bboxes after aug_func is repeatedly
			
 
				+    # applied. The -1 values are a dummy value and this first Tensor will be
			
 
				+    # removed upon appending the first real bbox.
			
 
				+    new_bboxes = np.array(_INVALID_BOX)
			
 
				+
			
 
				+    # If the bboxes are empty, then just give it _INVALID_BOX. The result
			
 
				+    # will be thrown away.
			
 
				+    bboxes = np.array((_INVALID_BOX)) if bboxes.size == 0 else bboxes
			
 
				+
			
 
				+    assert bboxes.shape[1] == 4, "bboxes.shape[1] must be 4!!!!"
			
 
				+
			
 
				+    # pylint:disable=g-long-lambda
			
 
				+    # pylint:disable=line-too-long
			
 
				+    wrapped_aug_func = lambda _image, bbox, _new_bboxes: _apply_bbox_augmentation_wrapper(_image, bbox, _new_bboxes, prob, aug_func, func_changes_bbox, *args)
			
 
				+    # pylint:enable=g-long-lambda
			
 
				+    # pylint:enable=line-too-long
			
 
				+
			
 
				+    # Setup the while_loop.
			
 
				+    num_bboxes = bboxes.shape[0]  # We loop until we go over all bboxes.
			
 
				+    idx = 0  # Counter for the while loop.
			
 
				+
			
 
				+    # Conditional function when to end the loop once we go over all bboxes
			
 
				+    # images_and_bboxes contain (_image, _new_bboxes)
			
 
				+    def cond(_idx, _images_and_bboxes):
			
 
				+        return _idx < num_bboxes
			
 
				+
			
 
				+    # Shuffle the bboxes so that the augmentation order is not deterministic if
			
 
				+    # we are not changing the bboxes with aug_func.
			
 
				+    # if not func_changes_bbox:
			
 
				+    #     print(bboxes)
			
 
				+    #     loop_bboxes = np.take(bboxes,np.random.permutation(bboxes.shape[0]),axis=0)
			
 
				+    #     print(loop_bboxes)
			
 
				+    # else:
			
 
				+    #     loop_bboxes = bboxes
			
 
				+    # we can not shuffle the bbox because it does not contain class information here
			
 
				+    loop_bboxes = deepcopy(bboxes)
			
 
				+
			
 
				+    # Main function of while_loop where we repeatedly apply augmentation on the
			
 
				+    # bboxes in the image.
			
 
				+    # pylint:disable=g-long-lambda
			
 
				+    body = lambda _idx, _images_and_bboxes: [
			
 
				+            _idx + 1, wrapped_aug_func(_images_and_bboxes[0],
			
 
				+                                         loop_bboxes[_idx],
			
 
				+                                         _images_and_bboxes[1])]
			
 
				+    while (cond(idx, (image, new_bboxes))):
			
 
				+        idx, (image, new_bboxes) = body(idx, (image, new_bboxes))
			
 
				+
			
 
				+    # Either return the altered bboxes or the original ones depending on if
			
 
				+    # we altered them in anyway.
			
 
				+    if func_changes_bbox:
			
 
				+        final_bboxes = new_bboxes
			
 
				+    else:
			
 
				+        final_bboxes = bboxes
			
 
				+    return image, final_bboxes
			
 
				+
			
 
				+
			
 
				+def _apply_multi_bbox_augmentation_wrapper(image, bboxes, prob, aug_func,
			
 
				+                                           func_changes_bbox, *args):
			
 
				+    """Checks to be sure num bboxes > 0 before calling inner function."""
			
 
				+    num_bboxes = len(bboxes)
			
 
				+    new_image = deepcopy(image)
			
 
				+    new_bboxes = deepcopy(bboxes)
			
 
				+    if num_bboxes != 0:
			
 
				+        new_image, new_bboxes = _apply_multi_bbox_augmentation(
			
 
				+            new_image, new_bboxes, prob, aug_func, func_changes_bbox, *args)
			
 
				+    return new_image, new_bboxes
			
 
				+
			
 
				+
			
 
				+def rotate_only_bboxes(image, bboxes, prob, degrees, replace):
			
 
				+    """Apply rotate to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(
			
 
				+        image, bboxes, prob, rotate, func_changes_bbox, degrees, replace)
			
 
				+
			
 
				+
			
 
				+def shear_x_only_bboxes(image, bboxes, prob, level, replace):
			
 
				+    """Apply shear_x to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(
			
 
				+        image, bboxes, prob, shear_x, func_changes_bbox, level, replace)
			
 
				+
			
 
				+
			
 
				+def shear_y_only_bboxes(image, bboxes, prob, level, replace):
			
 
				+    """Apply shear_y to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(
			
 
				+        image, bboxes, prob, shear_y, func_changes_bbox, level, replace)
			
 
				+
			
 
				+
			
 
				+def translate_x_only_bboxes(image, bboxes, prob, pixels, replace):
			
 
				+    """Apply translate_x to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(
			
 
				+        image, bboxes, prob, translate_x, func_changes_bbox, pixels, replace)
			
 
				+
			
 
				+
			
 
				+def translate_y_only_bboxes(image, bboxes, prob, pixels, replace):
			
 
				+    """Apply translate_y to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(
			
 
				+        image, bboxes, prob, translate_y, func_changes_bbox, pixels, replace)
			
 
				+
			
 
				+
			
 
				+def flip_only_bboxes(image, bboxes, prob):
			
 
				+    """Apply flip_lr to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(image, bboxes, prob,
			
 
				+                                                  np.fliplr, func_changes_bbox)
			
 
				+
			
 
				+
			
 
				+def solarize_only_bboxes(image, bboxes, prob, threshold):
			
 
				+    """Apply solarize to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(
			
 
				+        image, bboxes, prob, solarize, func_changes_bbox, threshold)
			
 
				+
			
 
				+
			
 
				+def equalize_only_bboxes(image, bboxes, prob):
			
 
				+    """Apply equalize to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(image, bboxes, prob,
			
 
				+                                                  equalize, func_changes_bbox)
			
 
				+
			
 
				+
			
 
				+def cutout_only_bboxes(image, bboxes, prob, pad_size, replace):
			
 
				+    """Apply cutout to each bbox in the image with probability prob."""
			
 
				+    func_changes_bbox = False
			
 
				+    prob = _scale_bbox_only_op_probability(prob)
			
 
				+    return _apply_multi_bbox_augmentation_wrapper(
			
 
				+        image, bboxes, prob, cutout, func_changes_bbox, pad_size, replace)
			
 
				+
			
 
				+
			
 
				+def _rotate_bbox(bbox, image_height, image_width, degrees):
			
 
				+    """Rotates the bbox coordinated by degrees.
			
 
				+
			
 
				+    Args:
			
 
				+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
			
 
				+            of type float that represents the normalized coordinates between 0 and 1.
			
 
				+        image_height: Int, height of the image.
			
 
				+        image_width: Int, height of the image.
			
 
				+        degrees: Float, a scalar angle in degrees to rotate all images by. If
			
 
				+            degrees is positive the image will be rotated clockwise otherwise it will
			
 
				+            be rotated counterclockwise.
			
 
				+
			
 
				+    Returns:
			
 
				+        A tensor of the same shape as bbox, but now with the rotated coordinates.
			
 
				+    """
			
 
				+    image_height, image_width = (float(image_height), float(image_width))
			
 
				+
			
 
				+    # Convert from degrees to radians.
			
 
				+    degrees_to_radians = math.pi / 180.0
			
 
				+    radians = degrees * degrees_to_radians
			
 
				+
			
 
				+    # Translate the bbox to the center of the image and turn the normalized 0-1
			
 
				+    # coordinates to absolute pixel locations.
			
 
				+    # Y coordinates are made negative as the y axis of images goes down with
			
 
				+    # increasing pixel values, so we negate to make sure x axis and y axis points
			
 
				+    # are in the traditionally positive direction.
			
 
				+    min_y = -int(image_height * (bbox[0] - 0.5))
			
 
				+    min_x = int(image_width * (bbox[1] - 0.5))
			
 
				+    max_y = -int(image_height * (bbox[2] - 0.5))
			
 
				+    max_x = int(image_width * (bbox[3] - 0.5))
			
 
				+    coordinates = np.stack([[min_y, min_x], [min_y, max_x], [max_y, min_x],
			
 
				+                            [max_y, max_x]]).astype(np.float32)
			
 
				+    # Rotate the coordinates according to the rotation matrix clockwise if
			
 
				+    # radians is positive, else negative
			
 
				+    rotation_matrix = np.stack([[math.cos(radians), math.sin(radians)],
			
 
				+                                [-math.sin(radians), math.cos(radians)]])
			
 
				+    new_coords = np.matmul(rotation_matrix,
			
 
				+                           np.transpose(coordinates)).astype(np.int32)
			
 
				+
			
 
				+    # Find min/max values and convert them back to normalized 0-1 floats.
			
 
				+    min_y = -(float(np.max(new_coords[0, :])) / image_height - 0.5)
			
 
				+    min_x = float(np.min(new_coords[1, :])) / image_width + 0.5
			
 
				+    max_y = -(float(np.min(new_coords[0, :])) / image_height - 0.5)
			
 
				+    max_x = float(np.max(new_coords[1, :])) / image_width + 0.5
			
 
				+
			
 
				+    # Clip the bboxes to be sure the fall between [0, 1].
			
 
				+    min_y, min_x, max_y, max_x = _clip_bbox(min_y, min_x, max_y, max_x)
			
 
				+    min_y, min_x, max_y, max_x = _check_bbox_area(min_y, min_x, max_y, max_x)
			
 
				+    return np.stack([min_y, min_x, max_y, max_x])
			
 
				+
			
 
				+
			
 
				+def rotate_with_bboxes(image, bboxes, degrees, replace):
			
 
				+    # Rotate the image.
			
 
				+    image = rotate(image, degrees, replace)
			
 
				+
			
 
				+    # Convert bbox coordinates to pixel values.
			
 
				+    image_height, image_width = image.shape[:2]
			
 
				+    # pylint:disable=g-long-lambda
			
 
				+    wrapped_rotate_bbox = lambda bbox: _rotate_bbox(bbox, image_height, image_width, degrees)
			
 
				+    # pylint:enable=g-long-lambda
			
 
				+    new_bboxes = np.zeros_like(bboxes)
			
 
				+    for idx in range(len(bboxes)):
			
 
				+        new_bboxes[idx] = wrapped_rotate_bbox(bboxes[idx])
			
 
				+    return image, new_bboxes
			
 
				+
			
 
				+
			
 
				+def translate_x(image, pixels, replace):
			
 
				+    """Equivalent of PIL Translate in X dimension."""
			
 
				+    image = Image.fromarray(wrap(image))
			
 
				+    image = image.transform(image.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0))
			
 
				+    return unwrap(np.array(image), replace)
			
 
				+
			
 
				+
			
 
				+def translate_y(image, pixels, replace):
			
 
				+    """Equivalent of PIL Translate in Y dimension."""
			
 
				+    image = Image.fromarray(wrap(image))
			
 
				+    image = image.transform(image.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels))
			
 
				+    return unwrap(np.array(image), replace)
			
 
				+
			
 
				+
			
 
				+def _shift_bbox(bbox, image_height, image_width, pixels, shift_horizontal):
			
 
				+    """Shifts the bbox coordinates by pixels.
			
 
				+
			
 
				+    Args:
			
 
				+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
			
 
				+            of type float that represents the normalized coordinates between 0 and 1.
			
 
				+        image_height: Int, height of the image.
			
 
				+        image_width: Int, width of the image.
			
 
				+        pixels: An int. How many pixels to shift the bbox.
			
 
				+        shift_horizontal: Boolean. If true then shift in X dimension else shift in
			
 
				+            Y dimension.
			
 
				+
			
 
				+    Returns:
			
 
				+        A tensor of the same shape as bbox, but now with the shifted coordinates.
			
 
				+    """
			
 
				+    pixels = int(pixels)
			
 
				+    # Convert bbox to integer pixel locations.
			
 
				+    min_y = int(float(image_height) * bbox[0])
			
 
				+    min_x = int(float(image_width) * bbox[1])
			
 
				+    max_y = int(float(image_height) * bbox[2])
			
 
				+    max_x = int(float(image_width) * bbox[3])
			
 
				+
			
 
				+    if shift_horizontal:
			
 
				+        min_x = np.maximum(0, min_x - pixels)
			
 
				+        max_x = np.minimum(image_width, max_x - pixels)
			
 
				+    else:
			
 
				+        min_y = np.maximum(0, min_y - pixels)
			
 
				+        max_y = np.minimum(image_height, max_y - pixels)
			
 
				+
			
 
				+    # Convert bbox back to floats.
			
 
				+    min_y = float(min_y) / float(image_height)
			
 
				+    min_x = float(min_x) / float(image_width)
			
 
				+    max_y = float(max_y) / float(image_height)
			
 
				+    max_x = float(max_x) / float(image_width)
			
 
				+
			
 
				+    # Clip the bboxes to be sure the fall between [0, 1].
			
 
				+    min_y, min_x, max_y, max_x = _clip_bbox(min_y, min_x, max_y, max_x)
			
 
				+    min_y, min_x, max_y, max_x = _check_bbox_area(min_y, min_x, max_y, max_x)
			
 
				+    return np.stack([min_y, min_x, max_y, max_x])
			
 
				+
			
 
				+
			
 
				+def translate_bbox(image, bboxes, pixels, replace, shift_horizontal):
			
 
				+    """Equivalent of PIL Translate in X/Y dimension that shifts image and bbox.
			
 
				+
			
 
				+    Args:
			
 
				+        image: 3D uint8 Tensor.
			
 
				+        bboxes: 2D Tensor that is a list of the bboxes in the image. Each bbox
			
 
				+            has 4 elements (min_y, min_x, max_y, max_x) of type float with values
			
 
				+            between [0, 1].
			
 
				+        pixels: An int. How many pixels to shift the image and bboxes
			
 
				+        replace: A one or three value 1D tensor to fill empty pixels.
			
 
				+        shift_horizontal: Boolean. If true then shift in X dimension else shift in
			
 
				+            Y dimension.
			
 
				+
			
 
				+    Returns:
			
 
				+        A tuple containing a 3D uint8 Tensor that will be the result of translating
			
 
				+        image by pixels. The second element of the tuple is bboxes, where now
			
 
				+        the coordinates will be shifted to reflect the shifted image.
			
 
				+    """
			
 
				+    if shift_horizontal:
			
 
				+        image = translate_x(image, pixels, replace)
			
 
				+    else:
			
 
				+        image = translate_y(image, pixels, replace)
			
 
				+
			
 
				+    # Convert bbox coordinates to pixel values.
			
 
				+    image_height, image_width = image.shape[0], image.shape[1]
			
 
				+    # pylint:disable=g-long-lambda
			
 
				+    wrapped_shift_bbox = lambda bbox: _shift_bbox(bbox, image_height, image_width, pixels, shift_horizontal)
			
 
				+    # pylint:enable=g-long-lambda
			
 
				+    new_bboxes = deepcopy(bboxes)
			
 
				+    num_bboxes = len(bboxes)
			
 
				+    for idx in range(num_bboxes):
			
 
				+        new_bboxes[idx] = wrapped_shift_bbox(bboxes[idx])
			
 
				+    return image.astype(np.uint8), new_bboxes
			
 
				+
			
 
				+
			
 
				+def shear_x(image, level, replace):
			
 
				+    """Equivalent of PIL Shearing in X dimension."""
			
 
				+    # Shear parallel to x axis is a projective transform
			
 
				+    # with a matrix form of:
			
 
				+    # [1    level
			
 
				+    #    0    1].
			
 
				+    image = Image.fromarray(wrap(image))
			
 
				+    image = image.transform(image.size, Image.AFFINE, (1, level, 0, 0, 1, 0))
			
 
				+    return unwrap(np.array(image), replace)
			
 
				+
			
 
				+
			
 
				+def shear_y(image, level, replace):
			
 
				+    """Equivalent of PIL Shearing in Y dimension."""
			
 
				+    # Shear parallel to y axis is a projective transform
			
 
				+    # with a matrix form of:
			
 
				+    # [1    0
			
 
				+    #    level    1].
			
 
				+    image = Image.fromarray(wrap(image))
			
 
				+    image = image.transform(image.size, Image.AFFINE, (1, 0, 0, level, 1, 0))
			
 
				+    return unwrap(np.array(image), replace)
			
 
				+
			
 
				+
			
 
				+def _shear_bbox(bbox, image_height, image_width, level, shear_horizontal):
			
 
				+    """Shifts the bbox according to how the image was sheared.
			
 
				+
			
 
				+    Args:
			
 
				+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
			
 
				+            of type float that represents the normalized coordinates between 0 and 1.
			
 
				+        image_height: Int, height of the image.
			
 
				+        image_width: Int, height of the image.
			
 
				+        level: Float. How much to shear the image.
			
 
				+        shear_horizontal: If true then shear in X dimension else shear in
			
 
				+            the Y dimension.
			
 
				+
			
 
				+    Returns:
			
 
				+        A tensor of the same shape as bbox, but now with the shifted coordinates.
			
 
				+    """
			
 
				+    image_height, image_width = (float(image_height), float(image_width))
			
 
				+
			
 
				+    # Change bbox coordinates to be pixels.
			
 
				+    min_y = int(image_height * bbox[0])
			
 
				+    min_x = int(image_width * bbox[1])
			
 
				+    max_y = int(image_height * bbox[2])
			
 
				+    max_x = int(image_width * bbox[3])
			
 
				+    coordinates = np.stack(
			
 
				+        [[min_y, min_x], [min_y, max_x], [max_y, min_x], [max_y, max_x]])
			
 
				+    coordinates = coordinates.astype(np.float32)
			
 
				+
			
 
				+    # Shear the coordinates according to the translation matrix.
			
 
				+    if shear_horizontal:
			
 
				+        translation_matrix = np.stack([[1, 0], [-level, 1]])
			
 
				+    else:
			
 
				+        translation_matrix = np.stack([[1, -level], [0, 1]])
			
 
				+    translation_matrix = translation_matrix.astype(np.float32)
			
 
				+    new_coords = np.matmul(translation_matrix,
			
 
				+                           np.transpose(coordinates)).astype(np.int32)
			
 
				+
			
 
				+    # Find min/max values and convert them back to floats.
			
 
				+    min_y = float(np.min(new_coords[0, :])) / image_height
			
 
				+    min_x = float(np.min(new_coords[1, :])) / image_width
			
 
				+    max_y = float(np.max(new_coords[0, :])) / image_height
			
 
				+    max_x = float(np.max(new_coords[1, :])) / image_width
			
 
				+
			
 
				+    # Clip the bboxes to be sure the fall between [0, 1].
			
 
				+    min_y, min_x, max_y, max_x = _clip_bbox(min_y, min_x, max_y, max_x)
			
 
				+    min_y, min_x, max_y, max_x = _check_bbox_area(min_y, min_x, max_y, max_x)
			
 
				+    return np.stack([min_y, min_x, max_y, max_x])
			
 
				+
			
 
				+
			
 
				+def shear_with_bboxes(image, bboxes, level, replace, shear_horizontal):
			
 
				+    """Applies Shear Transformation to the image and shifts the bboxes.
			
 
				+
			
 
				+    Args:
			
 
				+        image: 3D uint8 Tensor.
			
 
				+        bboxes: 2D Tensor that is a list of the bboxes in the image. Each bbox
			
 
				+            has 4 elements (min_y, min_x, max_y, max_x) of type float with values
			
 
				+            between [0, 1].
			
 
				+        level: Float. How much to shear the image. This value will be between
			
 
				+            -0.3 to 0.3.
			
 
				+        replace: A one or three value 1D tensor to fill empty pixels.
			
 
				+        shear_horizontal: Boolean. If true then shear in X dimension else shear in
			
 
				+            the Y dimension.
			
 
				+
			
 
				+    Returns:
			
 
				+        A tuple containing a 3D uint8 Tensor that will be the result of shearing
			
 
				+        image by level. The second element of the tuple is bboxes, where now
			
 
				+        the coordinates will be shifted to reflect the sheared image.
			
 
				+    """
			
 
				+    if shear_horizontal:
			
 
				+        image = shear_x(image, level, replace)
			
 
				+    else:
			
 
				+        image = shear_y(image, level, replace)
			
 
				+
			
 
				+    # Convert bbox coordinates to pixel values.
			
 
				+    image_height, image_width = image.shape[:2]
			
 
				+    # pylint:disable=g-long-lambda
			
 
				+    wrapped_shear_bbox = lambda bbox: _shear_bbox(bbox, image_height, image_width, level, shear_horizontal)
			
 
				+    # pylint:enable=g-long-lambda
			
 
				+    new_bboxes = deepcopy(bboxes)
			
 
				+    num_bboxes = len(bboxes)
			
 
				+    for idx in range(num_bboxes):
			
 
				+        new_bboxes[idx] = wrapped_shear_bbox(bboxes[idx])
			
 
				+    return image.astype(np.uint8), new_bboxes
			
 
				+
			
 
				+
			
 
				+def autocontrast(image):
			
 
				+    """Implements Autocontrast function from PIL.
			
 
				+
			
 
				+    Args:
			
 
				+        image: A 3D uint8 tensor.
			
 
				+
			
 
				+    Returns:
			
 
				+        The image after it has had autocontrast applied to it and will be of type
			
 
				+        uint8.
			
 
				+    """
			
 
				+
			
 
				+    def scale_channel(image):
			
 
				+        """Scale the 2D image using the autocontrast rule."""
			
 
				+        # A possibly cheaper version can be done using cumsum/unique_with_counts
			
 
				+        # over the histogram values, rather than iterating over the entire image.
			
 
				+        # to compute mins and maxes.
			
 
				+        lo = float(np.min(image))
			
 
				+        hi = float(np.max(image))
			
 
				+
			
 
				+        # Scale the image, making the lowest value 0 and the highest value 255.
			
 
				+        def scale_values(im):
			
 
				+            scale = 255.0 / (hi - lo)
			
 
				+            offset = -lo * scale
			
 
				+            im = im.astype(np.float32) * scale + offset
			
 
				+            img = np.clip(im, a_min=0, a_max=255.0)
			
 
				+            return im.astype(np.uint8)
			
 
				+
			
 
				+        result = scale_values(image) if hi > lo else image
			
 
				+        return result
			
 
				+
			
 
				+    # Assumes RGB for now.    Scales each channel independently
			
 
				+    # and then stacks the result.
			
 
				+    s1 = scale_channel(image[:, :, 0])
			
 
				+    s2 = scale_channel(image[:, :, 1])
			
 
				+    s3 = scale_channel(image[:, :, 2])
			
 
				+    image = np.stack([s1, s2, s3], 2)
			
 
				+    return image
			
 
				+
			
 
				+
			
 
				+def sharpness(image, factor):
			
 
				+    """Implements Sharpness function from PIL."""
			
 
				+    orig_image = image
			
 
				+    image = image.astype(np.float32)
			
 
				+    # Make image 4D for conv operation.
			
 
				+    # SMOOTH PIL Kernel.
			
 
				+    kernel = np.array(
			
 
				+        [[1, 1, 1], [1, 5, 1], [1, 1, 1]], dtype=np.float32) / 13.
			
 
				+    result = cv2.filter2D(image, -1, kernel).astype(np.uint8)
			
 
				+
			
 
				+    # Blend the final result.
			
 
				+    return blend(result, orig_image, factor)
			
 
				+
			
 
				+
			
 
				+def equalize(image):
			
 
				+    """Implements Equalize function from PIL using."""
			
 
				+
			
 
				+    def scale_channel(im, c):
			
 
				+        """Scale the data in the channel to implement equalize."""
			
 
				+        im = im[:, :, c].astype(np.int32)
			
 
				+        # Compute the histogram of the image channel.
			
 
				+        histo, _ = np.histogram(im, range=[0, 255], bins=256)
			
 
				+
			
 
				+        # For the purposes of computing the step, filter out the nonzeros.
			
 
				+        nonzero = np.where(np.not_equal(histo, 0))
			
 
				+        nonzero_histo = np.reshape(np.take(histo, nonzero), [-1])
			
 
				+        step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255
			
 
				+
			
 
				+        def build_lut(histo, step):
			
 
				+            # Compute the cumulative sum, shifting by step // 2
			
 
				+            # and then normalization by step.
			
 
				+            lut = (np.cumsum(histo) + (step // 2)) // step
			
 
				+            # Shift lut, prepending with 0.
			
 
				+            lut = np.concatenate([[0], lut[:-1]], 0)
			
 
				+            # Clip the counts to be in range.    This is done
			
 
				+            # in the C code for image.point.
			
 
				+            return np.clip(lut, a_min=0, a_max=255).astype(np.uint8)
			
 
				+
			
 
				+        # If step is zero, return the original image.    Otherwise, build
			
 
				+        # lut from the full histogram and step and then index from it.
			
 
				+        if step == 0:
			
 
				+            result = im
			
 
				+        else:
			
 
				+            result = np.take(build_lut(histo, step), im)
			
 
				+
			
 
				+        return result.astype(np.uint8)
			
 
				+
			
 
				+    # Assumes RGB for now.    Scales each channel independently
			
 
				+    # and then stacks the result.
			
 
				+    s1 = scale_channel(image, 0)
			
 
				+    s2 = scale_channel(image, 1)
			
 
				+    s3 = scale_channel(image, 2)
			
 
				+    image = np.stack([s1, s2, s3], 2)
			
 
				+    return image
			
 
				+
			
 
				+
			
 
				+def wrap(image):
			
 
				+    """Returns 'image' with an extra channel set to all 1s."""
			
 
				+    shape = image.shape
			
 
				+    extended_channel = 255 * np.ones([shape[0], shape[1], 1], image.dtype)
			
 
				+    extended = np.concatenate([image, extended_channel], 2).astype(image.dtype)
			
 
				+    return extended
			
 
				+
			
 
				+
			
 
				+def unwrap(image, replace):
			
 
				+    """Unwraps an image produced by wrap.
			
 
				+
			
 
				+    Where there is a 0 in the last channel for every spatial position,
			
 
				+    the rest of the three channels in that spatial dimension are grayed
			
 
				+    (set to 128).    Operations like translate and shear on a wrapped
			
 
				+    Tensor will leave 0s in empty locations.    Some transformations look
			
 
				+    at the intensity of values to do preprocessing, and we want these
			
 
				+    empty pixels to assume the 'average' value, rather than pure black.
			
 
				+
			
 
				+
			
 
				+    Args:
			
 
				+        image: A 3D Image Tensor with 4 channels.
			
 
				+        replace: A one or three value 1D tensor to fill empty pixels.
			
 
				+
			
 
				+    Returns:
			
 
				+        image: A 3D image Tensor with 3 channels.
			
 
				+    """
			
 
				+    image_shape = image.shape
			
 
				+    # Flatten the spatial dimensions.
			
 
				+    flattened_image = np.reshape(image, [-1, image_shape[2]])
			
 
				+
			
 
				+    # Find all pixels where the last channel is zero.
			
 
				+    alpha_channel = flattened_image[:, 3]
			
 
				+
			
 
				+    replace = np.concatenate([replace, np.ones([1], image.dtype)], 0)
			
 
				+
			
 
				+    # Where they are zero, fill them in with 'replace'.
			
 
				+    alpha_channel = np.reshape(alpha_channel, (-1, 1))
			
 
				+    alpha_channel = np.tile(alpha_channel, reps=(1, flattened_image.shape[1]))
			
 
				+
			
 
				+    flattened_image = np.where(
			
 
				+        np.equal(alpha_channel, 0),
			
 
				+        np.ones_like(
			
 
				+            flattened_image, dtype=image.dtype) * replace,
			
 
				+        flattened_image)
			
 
				+
			
 
				+    image = np.reshape(flattened_image, image_shape)
			
 
				+    image = image[:, :, :3]
			
 
				+    return image.astype(np.uint8)
			
 
				+
			
 
				+
			
 
				+def _cutout_inside_bbox(image, bbox, pad_fraction):
			
 
				+    """Generates cutout mask and the mean pixel value of the bbox.
			
 
				+
			
 
				+    First a location is randomly chosen within the image as the center where the
			
 
				+    cutout mask will be applied. Note this can be towards the boundaries of the
			
 
				+    image, so the full cutout mask may not be applied.
			
 
				+
			
 
				+    Args:
			
 
				+        image: 3D uint8 Tensor.
			
 
				+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
			
 
				+            of type float that represents the normalized coordinates between 0 and 1.
			
 
				+        pad_fraction: Float that specifies how large the cutout mask should be in
			
 
				+            in reference to the size of the original bbox. If pad_fraction is 0.25,
			
 
				+            then the cutout mask will be of shape
			
 
				+            (0.25 * bbox height, 0.25 * bbox width).
			
 
				+
			
 
				+    Returns:
			
 
				+        A tuple. Fist element is a tensor of the same shape as image where each
			
 
				+        element is either a 1 or 0 that is used to determine where the image
			
 
				+        will have cutout applied. The second element is the mean of the pixels
			
 
				+        in the image where the bbox is located.
			
 
				+        mask value: [0,1]
			
 
				+    """
			
 
				+    image_height, image_width = image.shape[0], image.shape[1]
			
 
				+    # Transform from shape [1, 4] to [4].
			
 
				+    bbox = np.squeeze(bbox)
			
 
				+
			
 
				+    min_y = int(float(image_height) * bbox[0])
			
 
				+    min_x = int(float(image_width) * bbox[1])
			
 
				+    max_y = int(float(image_height) * bbox[2])
			
 
				+    max_x = int(float(image_width) * bbox[3])
			
 
				+
			
 
				+    # Calculate the mean pixel values in the bounding box, which will be used
			
 
				+    # to fill the cutout region.
			
 
				+    mean = np.mean(image[min_y:max_y + 1, min_x:max_x + 1], axis=(0, 1))
			
 
				+    # Cutout mask will be size pad_size_heigh * 2 by pad_size_width * 2 if the
			
 
				+    # region lies entirely within the bbox.
			
 
				+    box_height = max_y - min_y + 1
			
 
				+    box_width = max_x - min_x + 1
			
 
				+    pad_size_height = int(pad_fraction * (box_height / 2))
			
 
				+    pad_size_width = int(pad_fraction * (box_width / 2))
			
 
				+
			
 
				+    # Sample the center location in the image where the zero mask will be applied.
			
 
				+    cutout_center_height = np.random.randint(min_y, max_y + 1, dtype=np.int32)
			
 
				+    cutout_center_width = np.random.randint(min_x, max_x + 1, dtype=np.int32)
			
 
				+
			
 
				+    lower_pad = np.maximum(0, cutout_center_height - pad_size_height)
			
 
				+    upper_pad = np.maximum(
			
 
				+        0, image_height - cutout_center_height - pad_size_height)
			
 
				+    left_pad = np.maximum(0, cutout_center_width - pad_size_width)
			
 
				+    right_pad = np.maximum(0,
			
 
				+                           image_width - cutout_center_width - pad_size_width)
			
 
				+
			
 
				+    cutout_shape = [
			
 
				+        image_height - (lower_pad + upper_pad),
			
 
				+        image_width - (left_pad + right_pad)
			
 
				+    ]
			
 
				+    padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]]
			
 
				+
			
 
				+    mask = np.pad(np.zeros(
			
 
				+        cutout_shape, dtype=image.dtype),
			
 
				+                  padding_dims,
			
 
				+                  'constant',
			
 
				+                  constant_values=1)
			
 
				+
			
 
				+    mask = np.expand_dims(mask, 2)
			
 
				+    mask = np.tile(mask, [1, 1, 3])
			
 
				+    return mask, mean
			
 
				+
			
 
				+
			
 
				+def bbox_cutout(image, bboxes, pad_fraction, replace_with_mean):
			
 
				+    """Applies cutout to the image according to bbox information.
			
 
				+
			
 
				+    This is a cutout variant that using bbox information to make more informed
			
 
				+    decisions on where to place the cutout mask.
			
 
				+
			
 
				+    Args:
			
 
				+        image: 3D uint8 Tensor.
			
 
				+        bboxes: 2D Tensor that is a list of the bboxes in the image. Each bbox
			
 
				+            has 4 elements (min_y, min_x, max_y, max_x) of type float with values
			
 
				+            between [0, 1].
			
 
				+        pad_fraction: Float that specifies how large the cutout mask should be in
			
 
				+            in reference to the size of the original bbox. If pad_fraction is 0.25,
			
 
				+            then the cutout mask will be of shape
			
 
				+            (0.25 * bbox height, 0.25 * bbox width).
			
 
				+        replace_with_mean: Boolean that specified what value should be filled in
			
 
				+            where the cutout mask is applied. Since the incoming image will be of
			
 
				+            uint8 and will not have had any mean normalization applied, by default
			
 
				+            we set the value to be 128. If replace_with_mean is True then we find
			
 
				+            the mean pixel values across the channel dimension and use those to fill
			
 
				+            in where the cutout mask is applied.
			
 
				+
			
 
				+    Returns:
			
 
				+        A tuple. First element is a tensor of the same shape as image that has
			
 
				+        cutout applied to it. Second element is the bboxes that were passed in
			
 
				+        that will be unchanged.
			
 
				+    """
			
 
				+
			
 
				+    def apply_bbox_cutout(image, bboxes, pad_fraction):
			
 
				+        """Applies cutout to a single bounding box within image."""
			
 
				+        # Choose a single bounding box to apply cutout to.
			
 
				+        random_index = np.random.randint(0, bboxes.shape[0], dtype=np.int32)
			
 
				+        # Select the corresponding bbox and apply cutout.
			
 
				+        chosen_bbox = np.take(bboxes, random_index, axis=0)
			
 
				+        mask, mean = _cutout_inside_bbox(image, chosen_bbox, pad_fraction)
			
 
				+
			
 
				+        # When applying cutout we either set the pixel value to 128 or to the mean
			
 
				+        # value inside the bbox.
			
 
				+        replace = mean if replace_with_mean else [128] * 3
			
 
				+
			
 
				+        # Apply the cutout mask to the image. Where the mask is 0 we fill it with
			
 
				+        # `replace`.
			
 
				+        image = np.where(
			
 
				+            np.equal(mask, 0),
			
 
				+            np.ones_like(
			
 
				+                image, dtype=image.dtype) * replace,
			
 
				+            image).astype(image.dtype)
			
 
				+        return image
			
 
				+
			
 
				+    # Check to see if there are boxes, if so then apply boxcutout.
			
 
				+    if len(bboxes) != 0:
			
 
				+        image = apply_bbox_cutout(image, bboxes, pad_fraction)
			
 
				+
			
 
				+    return image, bboxes
			
 
				+
			
 
				+
			
 
				+NAME_TO_FUNC = {
			
 
				+        'AutoContrast': autocontrast,
			
 
				+        'Equalize': equalize,
			
 
				+        'Posterize': posterize,
			
 
				+        'Solarize': solarize,
			
 
				+        'SolarizeAdd': solarize_add,
			
 
				+        'Color': color,
			
 
				+        'Contrast': contrast,
			
 
				+        'Brightness': brightness,
			
 
				+        'Sharpness': sharpness,
			
 
				+        'Cutout': cutout,
			
 
				+        'BBox_Cutout': bbox_cutout,
			
 
				+        'Rotate_BBox': rotate_with_bboxes,
			
 
				+        # pylint:disable=g-long-lambda
			
 
				+        'TranslateX_BBox': lambda image, bboxes, pixels, replace: translate_bbox(
			
 
				+                image, bboxes, pixels, replace, shift_horizontal=True),
			
 
				+        'TranslateY_BBox': lambda image, bboxes, pixels, replace: translate_bbox(
			
 
				+                image, bboxes, pixels, replace, shift_horizontal=False),
			
 
				+        'ShearX_BBox': lambda image, bboxes, level, replace: shear_with_bboxes(
			
 
				+                image, bboxes, level, replace, shear_horizontal=True),
			
 
				+        'ShearY_BBox': lambda image, bboxes, level, replace: shear_with_bboxes(
			
 
				+                image, bboxes, level, replace, shear_horizontal=False),
			
 
				+        # pylint:enable=g-long-lambda
			
 
				+        'Rotate_Only_BBoxes': rotate_only_bboxes,
			
 
				+        'ShearX_Only_BBoxes': shear_x_only_bboxes,
			
 
				+        'ShearY_Only_BBoxes': shear_y_only_bboxes,
			
 
				+        'TranslateX_Only_BBoxes': translate_x_only_bboxes,
			
 
				+        'TranslateY_Only_BBoxes': translate_y_only_bboxes,
			
 
				+        'Flip_Only_BBoxes': flip_only_bboxes,
			
 
				+        'Solarize_Only_BBoxes': solarize_only_bboxes,
			
 
				+        'Equalize_Only_BBoxes': equalize_only_bboxes,
			
 
				+        'Cutout_Only_BBoxes': cutout_only_bboxes,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def _randomly_negate_tensor(tensor):
			
 
				+    """With 50% prob turn the tensor negative."""
			
 
				+    should_flip = np.floor(np.random.rand() + 0.5) >= 1
			
 
				+    final_tensor = tensor if should_flip else -tensor
			
 
				+    return final_tensor
			
 
				+
			
 
				+
			
 
				+def _rotate_level_to_arg(level):
			
 
				+    level = (level / _MAX_LEVEL) * 30.
			
 
				+    level = _randomly_negate_tensor(level)
			
 
				+    return (level, )
			
 
				+
			
 
				+
			
 
				+def _shrink_level_to_arg(level):
			
 
				+    """Converts level to ratio by which we shrink the image content."""
			
 
				+    if level == 0:
			
 
				+        return (1.0, )  # if level is zero, do not shrink the image
			
 
				+    # Maximum shrinking ratio is 2.9.
			
 
				+    level = 2. / (_MAX_LEVEL / level) + 0.9
			
 
				+    return (level, )
			
 
				+
			
 
				+
			
 
				+def _enhance_level_to_arg(level):
			
 
				+    return ((level / _MAX_LEVEL) * 1.8 + 0.1, )
			
 
				+
			
 
				+
			
 
				+def _shear_level_to_arg(level):
			
 
				+    level = (level / _MAX_LEVEL) * 0.3
			
 
				+    # Flip level to negative with 50% chance.
			
 
				+    level = _randomly_negate_tensor(level)
			
 
				+    return (level, )
			
 
				+
			
 
				+
			
 
				+def _translate_level_to_arg(level, translate_const):
			
 
				+    level = (level / _MAX_LEVEL) * float(translate_const)
			
 
				+    # Flip level to negative with 50% chance.
			
 
				+    level = _randomly_negate_tensor(level)
			
 
				+    return (level, )
			
 
				+
			
 
				+
			
 
				+def _bbox_cutout_level_to_arg(level, hparams):
			
 
				+    cutout_pad_fraction = (
			
 
				+        level / _MAX_LEVEL) * 0.75  # hparams.cutout_max_pad_fraction
			
 
				+    return (cutout_pad_fraction,
			
 
				+            False)  # hparams.cutout_bbox_replace_with_mean
			
 
				+
			
 
				+
			
 
				+def level_to_arg(hparams):
			
 
				+    return {
			
 
				+        'AutoContrast': lambda level: (),
			
 
				+        'Equalize': lambda level: (),
			
 
				+        'Posterize': lambda level: (int((level / _MAX_LEVEL) * 4), ),
			
 
				+        'Solarize': lambda level: (int((level / _MAX_LEVEL) * 256), ),
			
 
				+        'SolarizeAdd': lambda level: (int((level / _MAX_LEVEL) * 110), ),
			
 
				+        'Color': _enhance_level_to_arg,
			
 
				+        'Contrast': _enhance_level_to_arg,
			
 
				+        'Brightness': _enhance_level_to_arg,
			
 
				+        'Sharpness': _enhance_level_to_arg,
			
 
				+        'Cutout':
			
 
				+        lambda level: (int((level / _MAX_LEVEL) * 100), ),  # hparams.cutout_const=100
			
 
				+        # pylint:disable=g-long-lambda
			
 
				+        'BBox_Cutout': lambda level: _bbox_cutout_level_to_arg(level, hparams),
			
 
				+        'TranslateX_BBox':
			
 
				+        lambda level: _translate_level_to_arg(level, 250),  # hparams.translate_const=250
			
 
				+        'TranslateY_BBox':
			
 
				+        lambda level: _translate_level_to_arg(level, 250),  # hparams.translate_cons
			
 
				+        # pylint:enable=g-long-lambda
			
 
				+        'ShearX_BBox': _shear_level_to_arg,
			
 
				+        'ShearY_BBox': _shear_level_to_arg,
			
 
				+        'Rotate_BBox': _rotate_level_to_arg,
			
 
				+        'Rotate_Only_BBoxes': _rotate_level_to_arg,
			
 
				+        'ShearX_Only_BBoxes': _shear_level_to_arg,
			
 
				+        'ShearY_Only_BBoxes': _shear_level_to_arg,
			
 
				+        # pylint:disable=g-long-lambda
			
 
				+        'TranslateX_Only_BBoxes':
			
 
				+        lambda level: _translate_level_to_arg(level, 120),  # hparams.translate_bbox_const
			
 
				+        'TranslateY_Only_BBoxes':
			
 
				+        lambda level: _translate_level_to_arg(level, 120),  # hparams.translate_bbox_const
			
 
				+        # pylint:enable=g-long-lambda
			
 
				+        'Flip_Only_BBoxes': lambda level: (),
			
 
				+        'Solarize_Only_BBoxes':
			
 
				+        lambda level: (int((level / _MAX_LEVEL) * 256), ),
			
 
				+        'Equalize_Only_BBoxes': lambda level: (),
			
 
				+        # pylint:disable=g-long-lambda
			
 
				+        'Cutout_Only_BBoxes':
			
 
				+        lambda level: (int((level / _MAX_LEVEL) * 50), ),  # hparams.cutout_bbox_const
			
 
				+        # pylint:enable=g-long-lambda
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def bbox_wrapper(func):
			
 
				+    """Adds a bboxes function argument to func and returns unchanged bboxes."""
			
 
				+
			
 
				+    def wrapper(images, bboxes, *args, **kwargs):
			
 
				+        return (func(images, *args, **kwargs), bboxes)
			
 
				+
			
 
				+    return wrapper
			
 
				+
			
 
				+
			
 
				+def _parse_policy_info(name, prob, level, replace_value, augmentation_hparams):
			
 
				+    """Return the function that corresponds to `name` and update `level` param."""
			
 
				+    func = NAME_TO_FUNC[name]
			
 
				+    args = level_to_arg(augmentation_hparams)[name](level)
			
 
				+
			
 
				+    # Check to see if prob is passed into function. This is used for operations
			
 
				+    # where we alter bboxes independently.
			
 
				+    # pytype:disable=wrong-arg-types
			
 
				+    if 'prob' in inspect.getfullargspec(func)[0]:
			
 
				+        args = tuple([prob] + list(args))
			
 
				+    # pytype:enable=wrong-arg-types
			
 
				+
			
 
				+    # Add in replace arg if it is required for the function that is being called.
			
 
				+    if 'replace' in inspect.getfullargspec(func)[0]:
			
 
				+        # Make sure replace is the final argument
			
 
				+        assert 'replace' == inspect.getfullargspec(func)[0][-1]
			
 
				+        args = tuple(list(args) + [replace_value])
			
 
				+
			
 
				+    # Add bboxes as the second positional argument for the function if it does
			
 
				+    # not already exist.
			
 
				+    if 'bboxes' not in inspect.getfullargspec(func)[0]:
			
 
				+        func = bbox_wrapper(func)
			
 
				+    return (func, prob, args)
			
 
				+
			
 
				+
			
 
				+def _apply_func_with_prob(func, image, args, prob, bboxes):
			
 
				+    """Apply `func` to image w/ `args` as input with probability `prob`."""
			
 
				+    assert isinstance(args, tuple)
			
 
				+    assert 'bboxes' == inspect.getfullargspec(func)[0][1]
			
 
				+
			
 
				+    # If prob is a function argument, then this randomness is being handled
			
 
				+    # inside the function, so make sure it is always called.
			
 
				+    if 'prob' in inspect.getfullargspec(func)[0]:
			
 
				+        prob = 1.0
			
 
				+
			
 
				+    # Apply the function with probability `prob`.
			
 
				+    should_apply_op = np.floor(np.random.rand() + 0.5) >= 1
			
 
				+    if should_apply_op:
			
 
				+        augmented_image, augmented_bboxes = func(image, bboxes, *args)
			
 
				+    else:
			
 
				+        augmented_image, augmented_bboxes = (image, bboxes)
			
 
				+    return augmented_image, augmented_bboxes
			
 
				+
			
 
				+
			
 
				+def select_and_apply_random_policy(policies, image, bboxes):
			
 
				+    """Select a random policy from `policies` and apply it to `image`."""
			
 
				+    policy_to_select = np.random.randint(0, len(policies), dtype=np.int32)
			
 
				+    # policy_to_select = 6 # for test
			
 
				+    for (i, policy) in enumerate(policies):
			
 
				+        if i == policy_to_select:
			
 
				+            image, bboxes = policy(image, bboxes)
			
 
				+    return (image, bboxes)
			
 
				+
			
 
				+
			
 
				+def build_and_apply_nas_policy(policies, image, bboxes, augmentation_hparams):
			
 
				+    """Build a policy from the given policies passed in and apply to image.
			
 
				+
			
 
				+    Args:
			
 
				+        policies: list of lists of tuples in the form `(func, prob, level)`, `func`
			
 
				+            is a string name of the augmentation function, `prob` is the probability
			
 
				+            of applying the `func` operation, `level` is the input argument for
			
 
				+            `func`.
			
 
				+        image: numpy array that the resulting policy will be applied to.
			
 
				+        bboxes:
			
 
				+        augmentation_hparams: Hparams associated with the NAS learned policy.
			
 
				+
			
 
				+    Returns:
			
 
				+        A version of image that now has data augmentation applied to it based on
			
 
				+        the `policies` pass into the function. Additionally, returns bboxes if
			
 
				+        a value for them is passed in that is not None
			
 
				+    """
			
 
				+    replace_value = [128, 128, 128]
			
 
				+
			
 
				+    # func is the string name of the augmentation function, prob is the
			
 
				+    # probability of applying the operation and level is the parameter associated
			
 
				+
			
 
				+    # tf_policies are functions that take in an image and return an augmented
			
 
				+    # image.
			
 
				+    tf_policies = []
			
 
				+    for policy in policies:
			
 
				+        tf_policy = []
			
 
				+        # Link string name to the correct python function and make sure the correct
			
 
				+        # argument is passed into that function.
			
 
				+        for policy_info in policy:
			
 
				+            policy_info = list(
			
 
				+                policy_info) + [replace_value, augmentation_hparams]
			
 
				+
			
 
				+            tf_policy.append(_parse_policy_info(*policy_info))
			
 
				+        # Now build the tf policy that will apply the augmentation procedue
			
 
				+        # on image.
			
 
				+        def make_final_policy(tf_policy_):
			
 
				+            def final_policy(image_, bboxes_):
			
 
				+                for func, prob, args in tf_policy_:
			
 
				+                    image_, bboxes_ = _apply_func_with_prob(func, image_, args,
			
 
				+                                                            prob, bboxes_)
			
 
				+                return image_, bboxes_
			
 
				+
			
 
				+            return final_policy
			
 
				+
			
 
				+        tf_policies.append(make_final_policy(tf_policy))
			
 
				+
			
 
				+    augmented_images, augmented_bboxes = select_and_apply_random_policy(
			
 
				+        tf_policies, image, bboxes)
			
 
				+    # If no bounding boxes were specified, then just return the images.
			
 
				+    return (augmented_images, augmented_bboxes)
			
 
				+
			
 
				+
			
 
				+# TODO(barretzoph): Add in ArXiv link once paper is out.
			
 
				+def distort_image_with_autoaugment(image, bboxes, augmentation_name):
			
 
				+    """Applies the AutoAugment policy to `image` and `bboxes`.
			
 
				+
			
 
				+    Args:
			
 
				+        image: `Tensor` of shape [height, width, 3] representing an image.
			
 
				+        bboxes: `Tensor` of shape [N, 4] representing ground truth boxes that are
			
 
				+            normalized between [0, 1].
			
 
				+        augmentation_name: The name of the AutoAugment policy to use. The available
			
 
				+            options are `v0`, `v1`, `v2`, `v3` and `test`. `v0` is the policy used for
			
 
				+            all of the results in the paper and was found to achieve the best results
			
 
				+            on the COCO dataset. `v1`, `v2` and `v3` are additional good policies
			
 
				+            found on the COCO dataset that have slight variation in what operations
			
 
				+            were used during the search procedure along with how many operations are
			
 
				+            applied in parallel to a single image (2 vs 3).
			
 
				+
			
 
				+    Returns:
			
 
				+        A tuple containing the augmented versions of `image` and `bboxes`.
			
 
				+    """
			
 
				+    available_policies = {
			
 
				+        'v0': policy_v0,
			
 
				+        'v1': policy_v1,
			
 
				+        'v2': policy_v2,
			
 
				+        'v3': policy_v3,
			
 
				+        'test': policy_vtest
			
 
				+    }
			
 
				+    if augmentation_name not in available_policies:
			
 
				+        raise ValueError('Invalid augmentation_name: {}'.format(
			
 
				+            augmentation_name))
			
 
				+
			
 
				+    policy = available_policies[augmentation_name]()
			
 
				+    augmentation_hparams = {}
			
 
				+    return build_and_apply_nas_policy(policy, image, bboxes,
			
 
				+                                      augmentation_hparams)
			
--- a/paddlers/models/ppdet/data/transform/batch_operators.py
+++ b/paddlers/models/ppdet/data/transform/batch_operators.py
@@ -0,0 +1,1080 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import typing
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except Exception:
			
 
				+    from collections import Sequence
			
 
				+
			
 
				+import cv2
			
 
				+import math
			
 
				+import numpy as np
			
 
				+from .operators import register_op, BaseOperator, Resize
			
 
				+from .op_helper import jaccard_overlap, gaussian2D, gaussian_radius, draw_umich_gaussian
			
 
				+from .atss_assigner import ATSSAssigner
			
 
				+from scipy import ndimage
			
 
				+
			
 
				+from paddlers.models.ppdet.modeling import bbox_utils
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+from paddlers.models.ppdet.modeling.keypoint_utils import get_affine_transform, affine_transform
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = [
			
 
				+    'PadBatch',
			
 
				+    'BatchRandomResize',
			
 
				+    'Gt2YoloTarget',
			
 
				+    'Gt2FCOSTarget',
			
 
				+    'Gt2TTFTarget',
			
 
				+    'Gt2Solov2Target',
			
 
				+    'Gt2SparseRCNNTarget',
			
 
				+    'PadMaskBatch',
			
 
				+    'Gt2GFLTarget',
			
 
				+    'Gt2CenterNetTarget',
			
 
				+]
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class PadBatch(BaseOperator):
			
 
				+    """
			
 
				+    Pad a batch of samples so they can be divisible by a stride.
			
 
				+    The layout of each image should be 'CHW'.
			
 
				+    Args:
			
 
				+        pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
			
 
				+            height and width is divisible by `pad_to_stride`.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, pad_to_stride=0):
			
 
				+        super(PadBatch, self).__init__()
			
 
				+        self.pad_to_stride = pad_to_stride
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        """
			
 
				+        Args:
			
 
				+            samples (list): a batch of sample, each is dict.
			
 
				+        """
			
 
				+        coarsest_stride = self.pad_to_stride
			
 
				+
			
 
				+        # multi scale input is nested list
			
 
				+        if isinstance(samples,
			
 
				+                      typing.Sequence) and len(samples) > 0 and isinstance(
			
 
				+                          samples[0], typing.Sequence):
			
 
				+            inner_samples = samples[0]
			
 
				+        else:
			
 
				+            inner_samples = samples
			
 
				+
			
 
				+        max_shape = np.array(
			
 
				+            [data['image'].shape for data in inner_samples]).max(axis=0)
			
 
				+        if coarsest_stride > 0:
			
 
				+            max_shape[1] = int(
			
 
				+                np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
			
 
				+            max_shape[2] = int(
			
 
				+                np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
			
 
				+
			
 
				+        for data in inner_samples:
			
 
				+            im = data['image']
			
 
				+            im_c, im_h, im_w = im.shape[:]
			
 
				+            padding_im = np.zeros(
			
 
				+                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
			
 
				+            padding_im[:, :im_h, :im_w] = im
			
 
				+            data['image'] = padding_im
			
 
				+            if 'semantic' in data and data['semantic'] is not None:
			
 
				+                semantic = data['semantic']
			
 
				+                padding_sem = np.zeros(
			
 
				+                    (1, max_shape[1], max_shape[2]), dtype=np.float32)
			
 
				+                padding_sem[:, :im_h, :im_w] = semantic
			
 
				+                data['semantic'] = padding_sem
			
 
				+            if 'gt_segm' in data and data['gt_segm'] is not None:
			
 
				+                gt_segm = data['gt_segm']
			
 
				+                padding_segm = np.zeros(
			
 
				+                    (gt_segm.shape[0], max_shape[1], max_shape[2]),
			
 
				+                    dtype=np.uint8)
			
 
				+                padding_segm[:, :im_h, :im_w] = gt_segm
			
 
				+                data['gt_segm'] = padding_segm
			
 
				+
			
 
				+            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
			
 
				+                # ploy to rbox
			
 
				+                polys = data['gt_rbox2poly']
			
 
				+                rbox = bbox_utils.poly2rbox(polys)
			
 
				+                data['gt_rbox'] = rbox
			
 
				+
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class BatchRandomResize(BaseOperator):
			
 
				+    """
			
 
				+    Resize image to target size randomly. random target_size and interpolation method
			
 
				+    Args:
			
 
				+        target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
			
 
				+        keep_ratio (bool): whether keep_raio or not, default true
			
 
				+        interp (int): the interpolation method
			
 
				+        random_size (bool): whether random select target size of image
			
 
				+        random_interp (bool): whether random select interpolation method
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 target_size,
			
 
				+                 keep_ratio,
			
 
				+                 interp=cv2.INTER_NEAREST,
			
 
				+                 random_size=True,
			
 
				+                 random_interp=False):
			
 
				+        super(BatchRandomResize, self).__init__()
			
 
				+        self.keep_ratio = keep_ratio
			
 
				+        self.interps = [
			
 
				+            cv2.INTER_NEAREST,
			
 
				+            cv2.INTER_LINEAR,
			
 
				+            cv2.INTER_AREA,
			
 
				+            cv2.INTER_CUBIC,
			
 
				+            cv2.INTER_LANCZOS4,
			
 
				+        ]
			
 
				+        self.interp = interp
			
 
				+        assert isinstance(target_size, (
			
 
				+            int, Sequence)), "target_size must be int, list or tuple"
			
 
				+        if random_size and not isinstance(target_size, list):
			
 
				+            raise TypeError(
			
 
				+                "Type of target_size is invalid when random_size is True. Must be List, now is {}".
			
 
				+                format(type(target_size)))
			
 
				+        self.target_size = target_size
			
 
				+        self.random_size = random_size
			
 
				+        self.random_interp = random_interp
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        if self.random_size:
			
 
				+            index = np.random.choice(len(self.target_size))
			
 
				+            target_size = self.target_size[index]
			
 
				+        else:
			
 
				+            target_size = self.target_size
			
 
				+
			
 
				+        if self.random_interp:
			
 
				+            interp = np.random.choice(self.interps)
			
 
				+        else:
			
 
				+            interp = self.interp
			
 
				+
			
 
				+        resizer = Resize(
			
 
				+            target_size, keep_ratio=self.keep_ratio, interp=interp)
			
 
				+        return resizer(samples, context=context)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2YoloTarget(BaseOperator):
			
 
				+    """
			
 
				+    Generate YOLOv3 targets by groud truth data, this operator is only used in
			
 
				+    fine grained YOLOv3 loss mode
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 anchors,
			
 
				+                 anchor_masks,
			
 
				+                 downsample_ratios,
			
 
				+                 num_classes=80,
			
 
				+                 iou_thresh=1.):
			
 
				+        super(Gt2YoloTarget, self).__init__()
			
 
				+        self.anchors = anchors
			
 
				+        self.anchor_masks = anchor_masks
			
 
				+        self.downsample_ratios = downsample_ratios
			
 
				+        self.num_classes = num_classes
			
 
				+        self.iou_thresh = iou_thresh
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        assert len(self.anchor_masks) == len(self.downsample_ratios), \
			
 
				+            "anchor_masks', and 'downsample_ratios' should have same length."
			
 
				+
			
 
				+        h, w = samples[0]['image'].shape[1:3]
			
 
				+        an_hw = np.array(self.anchors) / np.array([[w, h]])
			
 
				+        for sample in samples:
			
 
				+            gt_bbox = sample['gt_bbox']
			
 
				+            gt_class = sample['gt_class']
			
 
				+            if 'gt_score' not in sample:
			
 
				+                sample['gt_score'] = np.ones(
			
 
				+                    (gt_bbox.shape[0], 1), dtype=np.float32)
			
 
				+            gt_score = sample['gt_score']
			
 
				+            for i, (
			
 
				+                    mask, downsample_ratio
			
 
				+            ) in enumerate(zip(self.anchor_masks, self.downsample_ratios)):
			
 
				+                grid_h = int(h / downsample_ratio)
			
 
				+                grid_w = int(w / downsample_ratio)
			
 
				+                target = np.zeros(
			
 
				+                    (len(mask), 6 + self.num_classes, grid_h, grid_w),
			
 
				+                    dtype=np.float32)
			
 
				+                for b in range(gt_bbox.shape[0]):
			
 
				+                    gx, gy, gw, gh = gt_bbox[b, :]
			
 
				+                    cls = gt_class[b]
			
 
				+                    score = gt_score[b]
			
 
				+                    if gw <= 0. or gh <= 0. or score <= 0.:
			
 
				+                        continue
			
 
				+
			
 
				+                    # find best match anchor index
			
 
				+                    best_iou = 0.
			
 
				+                    best_idx = -1
			
 
				+                    for an_idx in range(an_hw.shape[0]):
			
 
				+                        iou = jaccard_overlap(
			
 
				+                            [0., 0., gw, gh],
			
 
				+                            [0., 0., an_hw[an_idx, 0], an_hw[an_idx, 1]])
			
 
				+                        if iou > best_iou:
			
 
				+                            best_iou = iou
			
 
				+                            best_idx = an_idx
			
 
				+
			
 
				+                    gi = int(gx * grid_w)
			
 
				+                    gj = int(gy * grid_h)
			
 
				+
			
 
				+                    # gtbox should be regresed in this layes if best match
			
 
				+                    # anchor index in anchor mask of this layer
			
 
				+                    if best_idx in mask:
			
 
				+                        best_n = mask.index(best_idx)
			
 
				+
			
 
				+                        # x, y, w, h, scale
			
 
				+                        target[best_n, 0, gj, gi] = gx * grid_w - gi
			
 
				+                        target[best_n, 1, gj, gi] = gy * grid_h - gj
			
 
				+                        target[best_n, 2, gj, gi] = np.log(
			
 
				+                            gw * w / self.anchors[best_idx][0])
			
 
				+                        target[best_n, 3, gj, gi] = np.log(
			
 
				+                            gh * h / self.anchors[best_idx][1])
			
 
				+                        target[best_n, 4, gj, gi] = 2.0 - gw * gh
			
 
				+
			
 
				+                        # objectness record gt_score
			
 
				+                        target[best_n, 5, gj, gi] = score
			
 
				+
			
 
				+                        # classification
			
 
				+                        target[best_n, 6 + cls, gj, gi] = 1.
			
 
				+
			
 
				+                    # For non-matched anchors, calculate the target if the iou
			
 
				+                    # between anchor and gt is larger than iou_thresh
			
 
				+                    if self.iou_thresh < 1:
			
 
				+                        for idx, mask_i in enumerate(mask):
			
 
				+                            if mask_i == best_idx: continue
			
 
				+                            iou = jaccard_overlap(
			
 
				+                                [0., 0., gw, gh],
			
 
				+                                [0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
			
 
				+                            if iou > self.iou_thresh and target[idx, 5, gj,
			
 
				+                                                                gi] == 0.:
			
 
				+                                # x, y, w, h, scale
			
 
				+                                target[idx, 0, gj, gi] = gx * grid_w - gi
			
 
				+                                target[idx, 1, gj, gi] = gy * grid_h - gj
			
 
				+                                target[idx, 2, gj, gi] = np.log(
			
 
				+                                    gw * w / self.anchors[mask_i][0])
			
 
				+                                target[idx, 3, gj, gi] = np.log(
			
 
				+                                    gh * h / self.anchors[mask_i][1])
			
 
				+                                target[idx, 4, gj, gi] = 2.0 - gw * gh
			
 
				+
			
 
				+                                # objectness record gt_score
			
 
				+                                target[idx, 5, gj, gi] = score
			
 
				+
			
 
				+                                # classification
			
 
				+                                target[idx, 6 + cls, gj, gi] = 1.
			
 
				+                sample['target{}'.format(i)] = target
			
 
				+
			
 
				+            # remove useless gt_class and gt_score after target calculated
			
 
				+            sample.pop('gt_class')
			
 
				+            sample.pop('gt_score')
			
 
				+
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2FCOSTarget(BaseOperator):
			
 
				+    """
			
 
				+    Generate FCOS targets by groud truth data
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 object_sizes_boundary,
			
 
				+                 center_sampling_radius,
			
 
				+                 downsample_ratios,
			
 
				+                 norm_reg_targets=False):
			
 
				+        super(Gt2FCOSTarget, self).__init__()
			
 
				+        self.center_sampling_radius = center_sampling_radius
			
 
				+        self.downsample_ratios = downsample_ratios
			
 
				+        self.INF = np.inf
			
 
				+        self.object_sizes_boundary = [-1] + object_sizes_boundary + [self.INF]
			
 
				+        object_sizes_of_interest = []
			
 
				+        for i in range(len(self.object_sizes_boundary) - 1):
			
 
				+            object_sizes_of_interest.append([
			
 
				+                self.object_sizes_boundary[i],
			
 
				+                self.object_sizes_boundary[i + 1]
			
 
				+            ])
			
 
				+        self.object_sizes_of_interest = object_sizes_of_interest
			
 
				+        self.norm_reg_targets = norm_reg_targets
			
 
				+
			
 
				+    def _compute_points(self, w, h):
			
 
				+        """
			
 
				+        compute the corresponding points in each feature map
			
 
				+        :param h: image height
			
 
				+        :param w: image width
			
 
				+        :return: points from all feature map
			
 
				+        """
			
 
				+        locations = []
			
 
				+        for stride in self.downsample_ratios:
			
 
				+            shift_x = np.arange(0, w, stride).astype(np.float32)
			
 
				+            shift_y = np.arange(0, h, stride).astype(np.float32)
			
 
				+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
			
 
				+            shift_x = shift_x.flatten()
			
 
				+            shift_y = shift_y.flatten()
			
 
				+            location = np.stack([shift_x, shift_y], axis=1) + stride // 2
			
 
				+            locations.append(location)
			
 
				+        num_points_each_level = [len(location) for location in locations]
			
 
				+        locations = np.concatenate(locations, axis=0)
			
 
				+        return locations, num_points_each_level
			
 
				+
			
 
				+    def _convert_xywh2xyxy(self, gt_bbox, w, h):
			
 
				+        """
			
 
				+        convert the bounding box from style xywh to xyxy
			
 
				+        :param gt_bbox: bounding boxes normalized into [0, 1]
			
 
				+        :param w: image width
			
 
				+        :param h: image height
			
 
				+        :return: bounding boxes in xyxy style
			
 
				+        """
			
 
				+        bboxes = gt_bbox.copy()
			
 
				+        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * w
			
 
				+        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * h
			
 
				+        bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
			
 
				+        bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
			
 
				+        return bboxes
			
 
				+
			
 
				+    def _check_inside_boxes_limited(self, gt_bbox, xs, ys,
			
 
				+                                    num_points_each_level):
			
 
				+        """
			
 
				+        check if points is within the clipped boxes
			
 
				+        :param gt_bbox: bounding boxes
			
 
				+        :param xs: horizontal coordinate of points
			
 
				+        :param ys: vertical coordinate of points
			
 
				+        :return: the mask of points is within gt_box or not
			
 
				+        """
			
 
				+        bboxes = np.reshape(
			
 
				+            gt_bbox, newshape=[1, gt_bbox.shape[0], gt_bbox.shape[1]])
			
 
				+        bboxes = np.tile(bboxes, reps=[xs.shape[0], 1, 1])
			
 
				+        ct_x = (bboxes[:, :, 0] + bboxes[:, :, 2]) / 2
			
 
				+        ct_y = (bboxes[:, :, 1] + bboxes[:, :, 3]) / 2
			
 
				+        beg = 0
			
 
				+        clipped_box = bboxes.copy()
			
 
				+        for lvl, stride in enumerate(self.downsample_ratios):
			
 
				+            end = beg + num_points_each_level[lvl]
			
 
				+            stride_exp = self.center_sampling_radius * stride
			
 
				+            clipped_box[beg:end, :, 0] = np.maximum(
			
 
				+                bboxes[beg:end, :, 0], ct_x[beg:end, :] - stride_exp)
			
 
				+            clipped_box[beg:end, :, 1] = np.maximum(
			
 
				+                bboxes[beg:end, :, 1], ct_y[beg:end, :] - stride_exp)
			
 
				+            clipped_box[beg:end, :, 2] = np.minimum(
			
 
				+                bboxes[beg:end, :, 2], ct_x[beg:end, :] + stride_exp)
			
 
				+            clipped_box[beg:end, :, 3] = np.minimum(
			
 
				+                bboxes[beg:end, :, 3], ct_y[beg:end, :] + stride_exp)
			
 
				+            beg = end
			
 
				+        l_res = xs - clipped_box[:, :, 0]
			
 
				+        r_res = clipped_box[:, :, 2] - xs
			
 
				+        t_res = ys - clipped_box[:, :, 1]
			
 
				+        b_res = clipped_box[:, :, 3] - ys
			
 
				+        clipped_box_reg_targets = np.stack(
			
 
				+            [l_res, t_res, r_res, b_res], axis=2)
			
 
				+        inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0
			
 
				+        return inside_gt_box
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \
			
 
				+            "object_sizes_of_interest', and 'downsample_ratios' should have same length."
			
 
				+
			
 
				+        for sample in samples:
			
 
				+            im = sample['image']
			
 
				+            bboxes = sample['gt_bbox']
			
 
				+            gt_class = sample['gt_class']
			
 
				+            # calculate the locations
			
 
				+            h, w = im.shape[1:3]
			
 
				+            points, num_points_each_level = self._compute_points(w, h)
			
 
				+            object_scale_exp = []
			
 
				+            for i, num_pts in enumerate(num_points_each_level):
			
 
				+                object_scale_exp.append(
			
 
				+                    np.tile(
			
 
				+                        np.array([self.object_sizes_of_interest[i]]),
			
 
				+                        reps=[num_pts, 1]))
			
 
				+            object_scale_exp = np.concatenate(object_scale_exp, axis=0)
			
 
				+
			
 
				+            gt_area = (bboxes[:, 2] - bboxes[:, 0]) * (
			
 
				+                bboxes[:, 3] - bboxes[:, 1])
			
 
				+            xs, ys = points[:, 0], points[:, 1]
			
 
				+            xs = np.reshape(xs, newshape=[xs.shape[0], 1])
			
 
				+            xs = np.tile(xs, reps=[1, bboxes.shape[0]])
			
 
				+            ys = np.reshape(ys, newshape=[ys.shape[0], 1])
			
 
				+            ys = np.tile(ys, reps=[1, bboxes.shape[0]])
			
 
				+
			
 
				+            l_res = xs - bboxes[:, 0]
			
 
				+            r_res = bboxes[:, 2] - xs
			
 
				+            t_res = ys - bboxes[:, 1]
			
 
				+            b_res = bboxes[:, 3] - ys
			
 
				+            reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
			
 
				+            if self.center_sampling_radius > 0:
			
 
				+                is_inside_box = self._check_inside_boxes_limited(
			
 
				+                    bboxes, xs, ys, num_points_each_level)
			
 
				+            else:
			
 
				+                is_inside_box = np.min(reg_targets, axis=2) > 0
			
 
				+            # check if the targets is inside the corresponding level
			
 
				+            max_reg_targets = np.max(reg_targets, axis=2)
			
 
				+            lower_bound = np.tile(
			
 
				+                np.expand_dims(
			
 
				+                    object_scale_exp[:, 0], axis=1),
			
 
				+                reps=[1, max_reg_targets.shape[1]])
			
 
				+            high_bound = np.tile(
			
 
				+                np.expand_dims(
			
 
				+                    object_scale_exp[:, 1], axis=1),
			
 
				+                reps=[1, max_reg_targets.shape[1]])
			
 
				+            is_match_current_level = \
			
 
				+                (max_reg_targets > lower_bound) & \
			
 
				+                (max_reg_targets < high_bound)
			
 
				+            points2gtarea = np.tile(
			
 
				+                np.expand_dims(
			
 
				+                    gt_area, axis=0), reps=[xs.shape[0], 1])
			
 
				+            points2gtarea[is_inside_box == 0] = self.INF
			
 
				+            points2gtarea[is_match_current_level == 0] = self.INF
			
 
				+            points2min_area = points2gtarea.min(axis=1)
			
 
				+            points2min_area_ind = points2gtarea.argmin(axis=1)
			
 
				+            labels = gt_class[points2min_area_ind] + 1
			
 
				+            labels[points2min_area == self.INF] = 0
			
 
				+            reg_targets = reg_targets[range(xs.shape[0]), points2min_area_ind]
			
 
				+            ctn_targets = np.sqrt((reg_targets[:, [0, 2]].min(axis=1) / \
			
 
				+                                  reg_targets[:, [0, 2]].max(axis=1)) * \
			
 
				+                                  (reg_targets[:, [1, 3]].min(axis=1) / \
			
 
				+                                   reg_targets[:, [1, 3]].max(axis=1))).astype(np.float32)
			
 
				+            ctn_targets = np.reshape(
			
 
				+                ctn_targets, newshape=[ctn_targets.shape[0], 1])
			
 
				+            ctn_targets[labels <= 0] = 0
			
 
				+            pos_ind = np.nonzero(labels != 0)
			
 
				+            reg_targets_pos = reg_targets[pos_ind[0], :]
			
 
				+            split_sections = []
			
 
				+            beg = 0
			
 
				+            for lvl in range(len(num_points_each_level)):
			
 
				+                end = beg + num_points_each_level[lvl]
			
 
				+                split_sections.append(end)
			
 
				+                beg = end
			
 
				+            labels_by_level = np.split(labels, split_sections, axis=0)
			
 
				+            reg_targets_by_level = np.split(
			
 
				+                reg_targets, split_sections, axis=0)
			
 
				+            ctn_targets_by_level = np.split(
			
 
				+                ctn_targets, split_sections, axis=0)
			
 
				+            for lvl in range(len(self.downsample_ratios)):
			
 
				+                grid_w = int(np.ceil(w / self.downsample_ratios[lvl]))
			
 
				+                grid_h = int(np.ceil(h / self.downsample_ratios[lvl]))
			
 
				+                if self.norm_reg_targets:
			
 
				+                    sample['reg_target{}'.format(lvl)] = \
			
 
				+                        np.reshape(
			
 
				+                            reg_targets_by_level[lvl] / \
			
 
				+                            self.downsample_ratios[lvl],
			
 
				+                            newshape=[grid_h, grid_w, 4])
			
 
				+                else:
			
 
				+                    sample['reg_target{}'.format(lvl)] = np.reshape(
			
 
				+                        reg_targets_by_level[lvl],
			
 
				+                        newshape=[grid_h, grid_w, 4])
			
 
				+                sample['labels{}'.format(lvl)] = np.reshape(
			
 
				+                    labels_by_level[lvl], newshape=[grid_h, grid_w, 1])
			
 
				+                sample['centerness{}'.format(lvl)] = np.reshape(
			
 
				+                    ctn_targets_by_level[lvl], newshape=[grid_h, grid_w, 1])
			
 
				+
			
 
				+            sample.pop('is_crowd', None)
			
 
				+            sample.pop('difficult', None)
			
 
				+            sample.pop('gt_class', None)
			
 
				+            sample.pop('gt_bbox', None)
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2GFLTarget(BaseOperator):
			
 
				+    """
			
 
				+    Generate GFocal loss targets by groud truth data
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 num_classes=80,
			
 
				+                 downsample_ratios=[8, 16, 32, 64, 128],
			
 
				+                 grid_cell_scale=4,
			
 
				+                 cell_offset=0):
			
 
				+        super(Gt2GFLTarget, self).__init__()
			
 
				+        self.num_classes = num_classes
			
 
				+        self.downsample_ratios = downsample_ratios
			
 
				+        self.grid_cell_scale = grid_cell_scale
			
 
				+        self.cell_offset = cell_offset
			
 
				+
			
 
				+        self.assigner = ATSSAssigner()
			
 
				+
			
 
				+    def get_grid_cells(self, featmap_size, scale, stride, offset=0):
			
 
				+        """
			
 
				+        Generate grid cells of a feature map for target assignment.
			
 
				+        Args:
			
 
				+            featmap_size: Size of a single level feature map.
			
 
				+            scale: Grid cell scale.
			
 
				+            stride: Down sample stride of the feature map.
			
 
				+            offset: Offset of grid cells.
			
 
				+        return:
			
 
				+            Grid_cells xyxy position. Size should be [feat_w * feat_h, 4]
			
 
				+        """
			
 
				+        cell_size = stride * scale
			
 
				+        h, w = featmap_size
			
 
				+        x_range = (np.arange(w, dtype=np.float32) + offset) * stride
			
 
				+        y_range = (np.arange(h, dtype=np.float32) + offset) * stride
			
 
				+        x, y = np.meshgrid(x_range, y_range)
			
 
				+        y = y.flatten()
			
 
				+        x = x.flatten()
			
 
				+        grid_cells = np.stack(
			
 
				+            [
			
 
				+                x - 0.5 * cell_size, y - 0.5 * cell_size, x + 0.5 * cell_size,
			
 
				+                y + 0.5 * cell_size
			
 
				+            ],
			
 
				+            axis=-1)
			
 
				+        return grid_cells
			
 
				+
			
 
				+    def get_sample(self, assign_gt_inds, gt_bboxes):
			
 
				+        pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
			
 
				+        neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
			
 
				+        pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
			
 
				+
			
 
				+        if gt_bboxes.size == 0:
			
 
				+            # hack for index error case
			
 
				+            assert pos_assigned_gt_inds.size == 0
			
 
				+            pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
			
 
				+        else:
			
 
				+            if len(gt_bboxes.shape) < 2:
			
 
				+                gt_bboxes = gt_bboxes.resize(-1, 4)
			
 
				+            pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
			
 
				+        return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        assert len(samples) > 0
			
 
				+        batch_size = len(samples)
			
 
				+        # get grid cells of image
			
 
				+        h, w = samples[0]['image'].shape[1:3]
			
 
				+        multi_level_grid_cells = []
			
 
				+        for stride in self.downsample_ratios:
			
 
				+            featmap_size = (int(math.ceil(h / stride)),
			
 
				+                            int(math.ceil(w / stride)))
			
 
				+            multi_level_grid_cells.append(
			
 
				+                self.get_grid_cells(featmap_size, self.grid_cell_scale, stride,
			
 
				+                                    self.cell_offset))
			
 
				+        mlvl_grid_cells_list = [
			
 
				+            multi_level_grid_cells for i in range(batch_size)
			
 
				+        ]
			
 
				+        # pixel cell number of multi-level feature maps
			
 
				+        num_level_cells = [
			
 
				+            grid_cells.shape[0] for grid_cells in mlvl_grid_cells_list[0]
			
 
				+        ]
			
 
				+        num_level_cells_list = [num_level_cells] * batch_size
			
 
				+        # concat all level cells and to a single array
			
 
				+        for i in range(batch_size):
			
 
				+            mlvl_grid_cells_list[i] = np.concatenate(mlvl_grid_cells_list[i])
			
 
				+        # target assign on all images
			
 
				+        for sample, grid_cells, num_level_cells in zip(
			
 
				+                samples, mlvl_grid_cells_list, num_level_cells_list):
			
 
				+            gt_bboxes = sample['gt_bbox']
			
 
				+            gt_labels = sample['gt_class'].squeeze()
			
 
				+            if gt_labels.size == 1:
			
 
				+                gt_labels = np.array([gt_labels]).astype(np.int32)
			
 
				+            gt_bboxes_ignore = None
			
 
				+            assign_gt_inds, _ = self.assigner(grid_cells, num_level_cells,
			
 
				+                                              gt_bboxes, gt_bboxes_ignore,
			
 
				+                                              gt_labels)
			
 
				+            pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds = self.get_sample(
			
 
				+                assign_gt_inds, gt_bboxes)
			
 
				+
			
 
				+            num_cells = grid_cells.shape[0]
			
 
				+            bbox_targets = np.zeros_like(grid_cells)
			
 
				+            bbox_weights = np.zeros_like(grid_cells)
			
 
				+            labels = np.ones([num_cells], dtype=np.int64) * self.num_classes
			
 
				+            label_weights = np.zeros([num_cells], dtype=np.float32)
			
 
				+
			
 
				+            if len(pos_inds) > 0:
			
 
				+                pos_bbox_targets = pos_gt_bboxes
			
 
				+                bbox_targets[pos_inds, :] = pos_bbox_targets
			
 
				+                bbox_weights[pos_inds, :] = 1.0
			
 
				+                if not np.any(gt_labels):
			
 
				+                    labels[pos_inds] = 0
			
 
				+                else:
			
 
				+                    labels[pos_inds] = gt_labels[pos_assigned_gt_inds]
			
 
				+
			
 
				+                label_weights[pos_inds] = 1.0
			
 
				+            if len(neg_inds) > 0:
			
 
				+                label_weights[neg_inds] = 1.0
			
 
				+            sample['grid_cells'] = grid_cells
			
 
				+            sample['labels'] = labels
			
 
				+            sample['label_weights'] = label_weights
			
 
				+            sample['bbox_targets'] = bbox_targets
			
 
				+            sample['pos_num'] = max(pos_inds.size, 1)
			
 
				+            sample.pop('is_crowd', None)
			
 
				+            sample.pop('difficult', None)
			
 
				+            sample.pop('gt_class', None)
			
 
				+            sample.pop('gt_bbox', None)
			
 
				+            sample.pop('gt_score', None)
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2TTFTarget(BaseOperator):
			
 
				+    __shared__ = ['num_classes']
			
 
				+    """
			
 
				+    Gt2TTFTarget
			
 
				+    Generate TTFNet targets by ground truth data
			
 
				+
			
 
				+    Args:
			
 
				+        num_classes(int): the number of classes.
			
 
				+        down_ratio(int): the down ratio from images to heatmap, 4 by default.
			
 
				+        alpha(float): the alpha parameter to generate gaussian target.
			
 
				+            0.54 by default.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, num_classes=80, down_ratio=4, alpha=0.54):
			
 
				+        super(Gt2TTFTarget, self).__init__()
			
 
				+        self.down_ratio = down_ratio
			
 
				+        self.num_classes = num_classes
			
 
				+        self.alpha = alpha
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        output_size = samples[0]['image'].shape[1]
			
 
				+        feat_size = output_size // self.down_ratio
			
 
				+        for sample in samples:
			
 
				+            heatmap = np.zeros(
			
 
				+                (self.num_classes, feat_size, feat_size), dtype='float32')
			
 
				+            box_target = np.ones(
			
 
				+                (4, feat_size, feat_size), dtype='float32') * -1
			
 
				+            reg_weight = np.zeros((1, feat_size, feat_size), dtype='float32')
			
 
				+
			
 
				+            gt_bbox = sample['gt_bbox']
			
 
				+            gt_class = sample['gt_class']
			
 
				+
			
 
				+            bbox_w = gt_bbox[:, 2] - gt_bbox[:, 0] + 1
			
 
				+            bbox_h = gt_bbox[:, 3] - gt_bbox[:, 1] + 1
			
 
				+            area = bbox_w * bbox_h
			
 
				+            boxes_areas_log = np.log(area)
			
 
				+            boxes_ind = np.argsort(boxes_areas_log, axis=0)[::-1]
			
 
				+            boxes_area_topk_log = boxes_areas_log[boxes_ind]
			
 
				+            gt_bbox = gt_bbox[boxes_ind]
			
 
				+            gt_class = gt_class[boxes_ind]
			
 
				+
			
 
				+            feat_gt_bbox = gt_bbox / self.down_ratio
			
 
				+            feat_gt_bbox = np.clip(feat_gt_bbox, 0, feat_size - 1)
			
 
				+            feat_hs, feat_ws = (feat_gt_bbox[:, 3] - feat_gt_bbox[:, 1],
			
 
				+                                feat_gt_bbox[:, 2] - feat_gt_bbox[:, 0])
			
 
				+
			
 
				+            ct_inds = np.stack(
			
 
				+                [(gt_bbox[:, 0] + gt_bbox[:, 2]) / 2,
			
 
				+                 (gt_bbox[:, 1] + gt_bbox[:, 3]) / 2],
			
 
				+                axis=1) / self.down_ratio
			
 
				+
			
 
				+            h_radiuses_alpha = (feat_hs / 2. * self.alpha).astype('int32')
			
 
				+            w_radiuses_alpha = (feat_ws / 2. * self.alpha).astype('int32')
			
 
				+
			
 
				+            for k in range(len(gt_bbox)):
			
 
				+                cls_id = gt_class[k]
			
 
				+                fake_heatmap = np.zeros(
			
 
				+                    (feat_size, feat_size), dtype='float32')
			
 
				+                self.draw_truncate_gaussian(fake_heatmap, ct_inds[k],
			
 
				+                                            h_radiuses_alpha[k],
			
 
				+                                            w_radiuses_alpha[k])
			
 
				+
			
 
				+                heatmap[cls_id] = np.maximum(heatmap[cls_id], fake_heatmap)
			
 
				+                box_target_inds = fake_heatmap > 0
			
 
				+                box_target[:, box_target_inds] = gt_bbox[k][:, None]
			
 
				+
			
 
				+                local_heatmap = fake_heatmap[box_target_inds]
			
 
				+                ct_div = np.sum(local_heatmap)
			
 
				+                local_heatmap *= boxes_area_topk_log[k]
			
 
				+                reg_weight[0, box_target_inds] = local_heatmap / ct_div
			
 
				+            sample['ttf_heatmap'] = heatmap
			
 
				+            sample['ttf_box_target'] = box_target
			
 
				+            sample['ttf_reg_weight'] = reg_weight
			
 
				+            sample.pop('is_crowd', None)
			
 
				+            sample.pop('difficult', None)
			
 
				+            sample.pop('gt_class', None)
			
 
				+            sample.pop('gt_bbox', None)
			
 
				+            sample.pop('gt_score', None)
			
 
				+        return samples
			
 
				+
			
 
				+    def draw_truncate_gaussian(self, heatmap, center, h_radius, w_radius):
			
 
				+        h, w = 2 * h_radius + 1, 2 * w_radius + 1
			
 
				+        sigma_x = w / 6
			
 
				+        sigma_y = h / 6
			
 
				+        gaussian = gaussian2D((h, w), sigma_x, sigma_y)
			
 
				+
			
 
				+        x, y = int(center[0]), int(center[1])
			
 
				+
			
 
				+        height, width = heatmap.shape[0:2]
			
 
				+
			
 
				+        left, right = min(x, w_radius), min(width - x, w_radius + 1)
			
 
				+        top, bottom = min(y, h_radius), min(height - y, h_radius + 1)
			
 
				+
			
 
				+        masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
			
 
				+        masked_gaussian = gaussian[h_radius - top:h_radius + bottom, w_radius -
			
 
				+                                   left:w_radius + right]
			
 
				+        if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
			
 
				+            heatmap[y - top:y + bottom, x - left:x + right] = np.maximum(
			
 
				+                masked_heatmap, masked_gaussian)
			
 
				+        return heatmap
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2Solov2Target(BaseOperator):
			
 
				+    """Assign mask target and labels in SOLOv2 network.
			
 
				+    The code of this function is based on:
			
 
				+        https://github.com/WXinlong/SOLO/blob/master/mmdet/models/anchor_heads/solov2_head.py#L271
			
 
				+    Args:
			
 
				+        num_grids (list): The list of feature map grids size.
			
 
				+        scale_ranges (list): The list of mask boundary range.
			
 
				+        coord_sigma (float): The coefficient of coordinate area length.
			
 
				+        sampling_ratio (float): The ratio of down sampling.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 num_grids=[40, 36, 24, 16, 12],
			
 
				+                 scale_ranges=[[1, 96], [48, 192], [96, 384], [192, 768],
			
 
				+                               [384, 2048]],
			
 
				+                 coord_sigma=0.2,
			
 
				+                 sampling_ratio=4.0):
			
 
				+        super(Gt2Solov2Target, self).__init__()
			
 
				+        self.num_grids = num_grids
			
 
				+        self.scale_ranges = scale_ranges
			
 
				+        self.coord_sigma = coord_sigma
			
 
				+        self.sampling_ratio = sampling_ratio
			
 
				+
			
 
				+    def _scale_size(self, im, scale):
			
 
				+        h, w = im.shape[:2]
			
 
				+        new_size = (int(w * float(scale) + 0.5), int(h * float(scale) + 0.5))
			
 
				+        resized_img = cv2.resize(
			
 
				+            im, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
			
 
				+        return resized_img
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        sample_id = 0
			
 
				+        max_ins_num = [0] * len(self.num_grids)
			
 
				+        for sample in samples:
			
 
				+            gt_bboxes_raw = sample['gt_bbox']
			
 
				+            gt_labels_raw = sample['gt_class'] + 1
			
 
				+            im_c, im_h, im_w = sample['image'].shape[:]
			
 
				+            gt_masks_raw = sample['gt_segm'].astype(np.uint8)
			
 
				+            mask_feat_size = [
			
 
				+                int(im_h / self.sampling_ratio),
			
 
				+                int(im_w / self.sampling_ratio)
			
 
				+            ]
			
 
				+            gt_areas = np.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) *
			
 
				+                               (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
			
 
				+            ins_ind_label_list = []
			
 
				+            idx = 0
			
 
				+            for (lower_bound, upper_bound), num_grid \
			
 
				+                    in zip(self.scale_ranges, self.num_grids):
			
 
				+
			
 
				+                hit_indices = ((gt_areas >= lower_bound) &
			
 
				+                               (gt_areas <= upper_bound)).nonzero()[0]
			
 
				+                num_ins = len(hit_indices)
			
 
				+
			
 
				+                ins_label = []
			
 
				+                grid_order = []
			
 
				+                cate_label = np.zeros([num_grid, num_grid], dtype=np.int64)
			
 
				+                ins_ind_label = np.zeros([num_grid**2], dtype=np.bool)
			
 
				+
			
 
				+                if num_ins == 0:
			
 
				+                    ins_label = np.zeros(
			
 
				+                        [1, mask_feat_size[0], mask_feat_size[1]],
			
 
				+                        dtype=np.uint8)
			
 
				+                    ins_ind_label_list.append(ins_ind_label)
			
 
				+                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
			
 
				+                    sample['ins_label{}'.format(idx)] = ins_label
			
 
				+                    sample['grid_order{}'.format(idx)] = np.asarray(
			
 
				+                        [sample_id * num_grid * num_grid + 0], dtype=np.int32)
			
 
				+                    idx += 1
			
 
				+                    continue
			
 
				+                gt_bboxes = gt_bboxes_raw[hit_indices]
			
 
				+                gt_labels = gt_labels_raw[hit_indices]
			
 
				+                gt_masks = gt_masks_raw[hit_indices, ...]
			
 
				+
			
 
				+                half_ws = 0.5 * (
			
 
				+                    gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.coord_sigma
			
 
				+                half_hs = 0.5 * (
			
 
				+                    gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.coord_sigma
			
 
				+
			
 
				+                for seg_mask, gt_label, half_h, half_w in zip(
			
 
				+                        gt_masks, gt_labels, half_hs, half_ws):
			
 
				+                    if seg_mask.sum() == 0:
			
 
				+                        continue
			
 
				+                    # mass center
			
 
				+                    upsampled_size = (mask_feat_size[0] * 4,
			
 
				+                                      mask_feat_size[1] * 4)
			
 
				+                    center_h, center_w = ndimage.measurements.center_of_mass(
			
 
				+                        seg_mask)
			
 
				+                    coord_w = int(
			
 
				+                        (center_w / upsampled_size[1]) // (1. / num_grid))
			
 
				+                    coord_h = int(
			
 
				+                        (center_h / upsampled_size[0]) // (1. / num_grid))
			
 
				+
			
 
				+                    # left, top, right, down
			
 
				+                    top_box = max(0,
			
 
				+                                  int(((center_h - half_h) / upsampled_size[0])
			
 
				+                                      // (1. / num_grid)))
			
 
				+                    down_box = min(
			
 
				+                        num_grid - 1,
			
 
				+                        int(((center_h + half_h) / upsampled_size[0]) //
			
 
				+                            (1. / num_grid)))
			
 
				+                    left_box = max(
			
 
				+                        0,
			
 
				+                        int(((center_w - half_w) / upsampled_size[1]) //
			
 
				+                            (1. / num_grid)))
			
 
				+                    right_box = min(num_grid - 1,
			
 
				+                                    int(((center_w + half_w) /
			
 
				+                                         upsampled_size[1]) //
			
 
				+                                        (1. / num_grid)))
			
 
				+
			
 
				+                    top = max(top_box, coord_h - 1)
			
 
				+                    down = min(down_box, coord_h + 1)
			
 
				+                    left = max(coord_w - 1, left_box)
			
 
				+                    right = min(right_box, coord_w + 1)
			
 
				+
			
 
				+                    cate_label[top:(down + 1), left:(right + 1)] = gt_label
			
 
				+                    seg_mask = self._scale_size(
			
 
				+                        seg_mask, scale=1. / self.sampling_ratio)
			
 
				+                    for i in range(top, down + 1):
			
 
				+                        for j in range(left, right + 1):
			
 
				+                            label = int(i * num_grid + j)
			
 
				+                            cur_ins_label = np.zeros(
			
 
				+                                [mask_feat_size[0], mask_feat_size[1]],
			
 
				+                                dtype=np.uint8)
			
 
				+                            cur_ins_label[:seg_mask.shape[0], :seg_mask.shape[
			
 
				+                                1]] = seg_mask
			
 
				+                            ins_label.append(cur_ins_label)
			
 
				+                            ins_ind_label[label] = True
			
 
				+                            grid_order.append(sample_id * num_grid * num_grid +
			
 
				+                                              label)
			
 
				+                if ins_label == []:
			
 
				+                    ins_label = np.zeros(
			
 
				+                        [1, mask_feat_size[0], mask_feat_size[1]],
			
 
				+                        dtype=np.uint8)
			
 
				+                    ins_ind_label_list.append(ins_ind_label)
			
 
				+                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
			
 
				+                    sample['ins_label{}'.format(idx)] = ins_label
			
 
				+                    sample['grid_order{}'.format(idx)] = np.asarray(
			
 
				+                        [sample_id * num_grid * num_grid + 0], dtype=np.int32)
			
 
				+                else:
			
 
				+                    ins_label = np.stack(ins_label, axis=0)
			
 
				+                    ins_ind_label_list.append(ins_ind_label)
			
 
				+                    sample['cate_label{}'.format(idx)] = cate_label.flatten()
			
 
				+                    sample['ins_label{}'.format(idx)] = ins_label
			
 
				+                    sample['grid_order{}'.format(idx)] = np.asarray(
			
 
				+                        grid_order, dtype=np.int32)
			
 
				+                    assert len(grid_order) > 0
			
 
				+                max_ins_num[idx] = max(
			
 
				+                    max_ins_num[idx],
			
 
				+                    sample['ins_label{}'.format(idx)].shape[0])
			
 
				+                idx += 1
			
 
				+            ins_ind_labels = np.concatenate([
			
 
				+                ins_ind_labels_level_img
			
 
				+                for ins_ind_labels_level_img in ins_ind_label_list
			
 
				+            ])
			
 
				+            fg_num = np.sum(ins_ind_labels)
			
 
				+            sample['fg_num'] = fg_num
			
 
				+            sample_id += 1
			
 
				+
			
 
				+            sample.pop('is_crowd')
			
 
				+            sample.pop('gt_class')
			
 
				+            sample.pop('gt_bbox')
			
 
				+            sample.pop('gt_poly')
			
 
				+            sample.pop('gt_segm')
			
 
				+
			
 
				+        # padding batch
			
 
				+        for data in samples:
			
 
				+            for idx in range(len(self.num_grids)):
			
 
				+                gt_ins_data = np.zeros(
			
 
				+                    [
			
 
				+                        max_ins_num[idx],
			
 
				+                        data['ins_label{}'.format(idx)].shape[1],
			
 
				+                        data['ins_label{}'.format(idx)].shape[2]
			
 
				+                    ],
			
 
				+                    dtype=np.uint8)
			
 
				+                gt_ins_data[0:data['ins_label{}'.format(idx)].shape[
			
 
				+                    0], :, :] = data['ins_label{}'.format(idx)]
			
 
				+                gt_grid_order = np.zeros([max_ins_num[idx]], dtype=np.int32)
			
 
				+                gt_grid_order[0:data['grid_order{}'.format(idx)].shape[
			
 
				+                    0]] = data['grid_order{}'.format(idx)]
			
 
				+                data['ins_label{}'.format(idx)] = gt_ins_data
			
 
				+                data['grid_order{}'.format(idx)] = gt_grid_order
			
 
				+
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2SparseRCNNTarget(BaseOperator):
			
 
				+    '''
			
 
				+    Generate SparseRCNN targets by groud truth data
			
 
				+    '''
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(Gt2SparseRCNNTarget, self).__init__()
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        for sample in samples:
			
 
				+            im = sample["image"]
			
 
				+            h, w = im.shape[1:3]
			
 
				+            img_whwh = np.array([w, h, w, h], dtype=np.int32)
			
 
				+            sample["img_whwh"] = img_whwh
			
 
				+            if "scale_factor" in sample:
			
 
				+                sample["scale_factor_wh"] = np.array(
			
 
				+                    [sample["scale_factor"][1], sample["scale_factor"][0]],
			
 
				+                    dtype=np.float32)
			
 
				+            else:
			
 
				+                sample["scale_factor_wh"] = np.array(
			
 
				+                    [1.0, 1.0], dtype=np.float32)
			
 
				+
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class PadMaskBatch(BaseOperator):
			
 
				+    """
			
 
				+    Pad a batch of samples so they can be divisible by a stride.
			
 
				+    The layout of each image should be 'CHW'.
			
 
				+    Args:
			
 
				+        pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
			
 
				+            height and width is divisible by `pad_to_stride`.
			
 
				+        return_pad_mask (bool): If `return_pad_mask = True`, return
			
 
				+            `pad_mask` for transformer.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, pad_to_stride=0, return_pad_mask=False):
			
 
				+        super(PadMaskBatch, self).__init__()
			
 
				+        self.pad_to_stride = pad_to_stride
			
 
				+        self.return_pad_mask = return_pad_mask
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        """
			
 
				+        Args:
			
 
				+            samples (list): a batch of sample, each is dict.
			
 
				+        """
			
 
				+        coarsest_stride = self.pad_to_stride
			
 
				+
			
 
				+        max_shape = np.array([data['image'].shape for data in samples]).max(
			
 
				+            axis=0)
			
 
				+        if coarsest_stride > 0:
			
 
				+            max_shape[1] = int(
			
 
				+                np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
			
 
				+            max_shape[2] = int(
			
 
				+                np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
			
 
				+
			
 
				+        for data in samples:
			
 
				+            im = data['image']
			
 
				+            im_c, im_h, im_w = im.shape[:]
			
 
				+            padding_im = np.zeros(
			
 
				+                (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
			
 
				+            padding_im[:, :im_h, :im_w] = im
			
 
				+            data['image'] = padding_im
			
 
				+            if 'semantic' in data and data['semantic'] is not None:
			
 
				+                semantic = data['semantic']
			
 
				+                padding_sem = np.zeros(
			
 
				+                    (1, max_shape[1], max_shape[2]), dtype=np.float32)
			
 
				+                padding_sem[:, :im_h, :im_w] = semantic
			
 
				+                data['semantic'] = padding_sem
			
 
				+            if 'gt_segm' in data and data['gt_segm'] is not None:
			
 
				+                gt_segm = data['gt_segm']
			
 
				+                padding_segm = np.zeros(
			
 
				+                    (gt_segm.shape[0], max_shape[1], max_shape[2]),
			
 
				+                    dtype=np.uint8)
			
 
				+                padding_segm[:, :im_h, :im_w] = gt_segm
			
 
				+                data['gt_segm'] = padding_segm
			
 
				+            if self.return_pad_mask:
			
 
				+                padding_mask = np.zeros(
			
 
				+                    (max_shape[1], max_shape[2]), dtype=np.float32)
			
 
				+                padding_mask[:im_h, :im_w] = 1.
			
 
				+                data['pad_mask'] = padding_mask
			
 
				+
			
 
				+            if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
			
 
				+                # ploy to rbox
			
 
				+                polys = data['gt_rbox2poly']
			
 
				+                rbox = bbox_utils.poly2rbox(polys)
			
 
				+                data['gt_rbox'] = rbox
			
 
				+
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2CenterNetTarget(BaseOperator):
			
 
				+    """Gt2CenterNetTarget
			
 
				+    Genterate CenterNet targets by ground-truth
			
 
				+    Args:
			
 
				+        down_ratio (int): The down sample ratio between output feature and
			
 
				+                          input image.
			
 
				+        num_classes (int): The number of classes, 80 by default.
			
 
				+        max_objs (int): The maximum objects detected, 128 by default.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, down_ratio, num_classes=80, max_objs=128):
			
 
				+        super(Gt2CenterNetTarget, self).__init__()
			
 
				+        self.down_ratio = down_ratio
			
 
				+        self.num_classes = num_classes
			
 
				+        self.max_objs = max_objs
			
 
				+
			
 
				+    def __call__(self, sample, context=None):
			
 
				+        input_h, input_w = sample['image'].shape[1:]
			
 
				+        output_h = input_h // self.down_ratio
			
 
				+        output_w = input_w // self.down_ratio
			
 
				+        num_classes = self.num_classes
			
 
				+        c = sample['center']
			
 
				+        s = sample['scale']
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+        gt_class = sample['gt_class']
			
 
				+
			
 
				+        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
			
 
				+        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
			
 
				+        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
			
 
				+        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
			
 
				+        ind = np.zeros((self.max_objs), dtype=np.int64)
			
 
				+        reg_mask = np.zeros((self.max_objs), dtype=np.int32)
			
 
				+        cat_spec_wh = np.zeros(
			
 
				+            (self.max_objs, num_classes * 2), dtype=np.float32)
			
 
				+        cat_spec_mask = np.zeros(
			
 
				+            (self.max_objs, num_classes * 2), dtype=np.int32)
			
 
				+
			
 
				+        trans_output = get_affine_transform(c, [s, s], 0, [output_w, output_h])
			
 
				+
			
 
				+        gt_det = []
			
 
				+        for i, (bbox, cls) in enumerate(zip(gt_bbox, gt_class)):
			
 
				+            cls = int(cls)
			
 
				+            bbox[:2] = affine_transform(bbox[:2], trans_output)
			
 
				+            bbox[2:] = affine_transform(bbox[2:], trans_output)
			
 
				+            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
			
 
				+            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
			
 
				+            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
			
 
				+            if h > 0 and w > 0:
			
 
				+                radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
			
 
				+                radius = max(0, int(radius))
			
 
				+                ct = np.array(
			
 
				+                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
			
 
				+                    dtype=np.float32)
			
 
				+                ct_int = ct.astype(np.int32)
			
 
				+                draw_umich_gaussian(hm[cls], ct_int, radius)
			
 
				+                wh[i] = 1. * w, 1. * h
			
 
				+                ind[i] = ct_int[1] * output_w + ct_int[0]
			
 
				+                reg[i] = ct - ct_int
			
 
				+                reg_mask[i] = 1
			
 
				+                cat_spec_wh[i, cls * 2:cls * 2 + 2] = wh[i]
			
 
				+                cat_spec_mask[i, cls * 2:cls * 2 + 2] = 1
			
 
				+                gt_det.append([
			
 
				+                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
			
 
				+                    1, cls
			
 
				+                ])
			
 
				+
			
 
				+        sample.pop('gt_bbox', None)
			
 
				+        sample.pop('gt_class', None)
			
 
				+        sample.pop('center', None)
			
 
				+        sample.pop('scale', None)
			
 
				+        sample.pop('is_crowd', None)
			
 
				+        sample.pop('difficult', None)
			
 
				+        sample['heatmap'] = hm
			
 
				+        sample['index_mask'] = reg_mask
			
 
				+        sample['index'] = ind
			
 
				+        sample['size'] = wh
			
 
				+        sample['offset'] = reg
			
 
				+        return sample
			
--- a/paddlers/models/ppdet/data/transform/gridmask_utils.py
+++ b/paddlers/models/ppdet/data/transform/gridmask_utils.py
@@ -0,0 +1,86 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# The code is based on:
			
 
				+# https://github.com/dvlab-research/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import print_function
			
 
				+from __future__ import division
			
 
				+
			
 
				+import numpy as np
			
 
				+from PIL import Image
			
 
				+
			
 
				+
			
 
				+class Gridmask(object):
			
 
				+    def __init__(self,
			
 
				+                 use_h=True,
			
 
				+                 use_w=True,
			
 
				+                 rotate=1,
			
 
				+                 offset=False,
			
 
				+                 ratio=0.5,
			
 
				+                 mode=1,
			
 
				+                 prob=0.7,
			
 
				+                 upper_iter=360000):
			
 
				+        super(Gridmask, self).__init__()
			
 
				+        self.use_h = use_h
			
 
				+        self.use_w = use_w
			
 
				+        self.rotate = rotate
			
 
				+        self.offset = offset
			
 
				+        self.ratio = ratio
			
 
				+        self.mode = mode
			
 
				+        self.prob = prob
			
 
				+        self.st_prob = prob
			
 
				+        self.upper_iter = upper_iter
			
 
				+
			
 
				+    def __call__(self, x, curr_iter):
			
 
				+        self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
			
 
				+        if np.random.rand() > self.prob:
			
 
				+            return x
			
 
				+        h, w, _ = x.shape
			
 
				+        hh = int(1.5 * h)
			
 
				+        ww = int(1.5 * w)
			
 
				+        d = np.random.randint(2, h)
			
 
				+        self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
			
 
				+        mask = np.ones((hh, ww), np.float32)
			
 
				+        st_h = np.random.randint(d)
			
 
				+        st_w = np.random.randint(d)
			
 
				+        if self.use_h:
			
 
				+            for i in range(hh // d):
			
 
				+                s = d * i + st_h
			
 
				+                t = min(s + self.l, hh)
			
 
				+                mask[s:t, :] *= 0
			
 
				+        if self.use_w:
			
 
				+            for i in range(ww // d):
			
 
				+                s = d * i + st_w
			
 
				+                t = min(s + self.l, ww)
			
 
				+                mask[:, s:t] *= 0
			
 
				+
			
 
				+        r = np.random.randint(self.rotate)
			
 
				+        mask = Image.fromarray(np.uint8(mask))
			
 
				+        mask = mask.rotate(r)
			
 
				+        mask = np.asarray(mask)
			
 
				+        mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) //
			
 
				+                    2 + w].astype(np.float32)
			
 
				+
			
 
				+        if self.mode == 1:
			
 
				+            mask = 1 - mask
			
 
				+        mask = np.expand_dims(mask, axis=-1)
			
 
				+        if self.offset:
			
 
				+            offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
			
 
				+            x = (x * mask + offset * (1 - mask)).astype(x.dtype)
			
 
				+        else:
			
 
				+            x = (x * mask).astype(x.dtype)
			
 
				+
			
 
				+        return x
			
--- a/paddlers/models/ppdet/data/transform/keypoint_operators.py
+++ b/paddlers/models/ppdet/data/transform/keypoint_operators.py
@@ -0,0 +1,868 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# function:
			
 
				+#    operators to process sample,
			
 
				+#    eg: decode/resize/crop image
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except Exception:
			
 
				+    from collections import Sequence
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import math
			
 
				+import copy
			
 
				+
			
 
				+from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
			
 
				+from paddlers.models.ppdet.core.workspace import serializable
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+registered_ops = []
			
 
				+
			
 
				+__all__ = [
			
 
				+    'RandomAffine',
			
 
				+    'KeyPointFlip',
			
 
				+    'TagGenerate',
			
 
				+    'ToHeatmaps',
			
 
				+    'NormalizePermute',
			
 
				+    'EvalAffine',
			
 
				+    'RandomFlipHalfBodyTransform',
			
 
				+    'TopDownAffine',
			
 
				+    'ToHeatmapsTopDown',
			
 
				+    'ToHeatmapsTopDown_DARK',
			
 
				+    'ToHeatmapsTopDown_UDP',
			
 
				+    'TopDownEvalAffine',
			
 
				+    'AugmentationbyInformantionDropping',
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def register_keypointop(cls):
			
 
				+    return serializable(cls)
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class KeyPointFlip(object):
			
 
				+    """Get the fliped image by flip_prob. flip the coords also
			
 
				+    the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped
			
 
				+
			
 
				+    Args:
			
 
				+        flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16]
			
 
				+        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
			
 
				+        flip_prob (float): the ratio whether to flip the image
			
 
				+        records(dict): the dict contained the image, mask and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records(dict): contain the image, mask and coords after tranformed
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
			
 
				+        super(KeyPointFlip, self).__init__()
			
 
				+        assert isinstance(flip_permutation, Sequence)
			
 
				+        self.flip_permutation = flip_permutation
			
 
				+        self.flip_prob = flip_prob
			
 
				+        self.hmsize = hmsize
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        image = records['image']
			
 
				+        kpts_lst = records['joints']
			
 
				+        mask_lst = records['mask']
			
 
				+        flip = np.random.random() < self.flip_prob
			
 
				+        if flip:
			
 
				+            image = image[:, ::-1]
			
 
				+            for idx, hmsize in enumerate(self.hmsize):
			
 
				+                if len(mask_lst) > idx:
			
 
				+                    mask_lst[idx] = mask_lst[idx][:, ::-1]
			
 
				+                if kpts_lst[idx].ndim == 3:
			
 
				+                    kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
			
 
				+                else:
			
 
				+                    kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
			
 
				+                kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
			
 
				+                kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
			
 
				+                kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
			
 
				+                kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
			
 
				+                kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
			
 
				+                kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
			
 
				+        records['image'] = image
			
 
				+        records['joints'] = kpts_lst
			
 
				+        records['mask'] = mask_lst
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class RandomAffine(object):
			
 
				+    """apply affine transform to image, mask and coords
			
 
				+    to achieve the rotate, scale and shift effect for training image
			
 
				+
			
 
				+    Args:
			
 
				+        max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree]
			
 
				+        max_scale (list[2]): the scale range to apply, transform range is [min, max]
			
 
				+        max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
			
 
				+        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
			
 
				+        trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
			
 
				+        scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
			
 
				+        records(dict): the dict contained the image, mask and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records(dict): contain the image, mask and coords after tranformed
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 max_degree=30,
			
 
				+                 scale=[0.75, 1.5],
			
 
				+                 max_shift=0.2,
			
 
				+                 hmsize=[128, 256],
			
 
				+                 trainsize=512,
			
 
				+                 scale_type='short'):
			
 
				+        super(RandomAffine, self).__init__()
			
 
				+        self.max_degree = max_degree
			
 
				+        self.min_scale = scale[0]
			
 
				+        self.max_scale = scale[1]
			
 
				+        self.max_shift = max_shift
			
 
				+        self.hmsize = hmsize
			
 
				+        self.trainsize = trainsize
			
 
				+        self.scale_type = scale_type
			
 
				+
			
 
				+    def _get_affine_matrix(self, center, scale, res, rot=0):
			
 
				+        """Generate transformation matrix."""
			
 
				+        h = scale
			
 
				+        t = np.zeros((3, 3), dtype=np.float32)
			
 
				+        t[0, 0] = float(res[1]) / h
			
 
				+        t[1, 1] = float(res[0]) / h
			
 
				+        t[0, 2] = res[1] * (-float(center[0]) / h + .5)
			
 
				+        t[1, 2] = res[0] * (-float(center[1]) / h + .5)
			
 
				+        t[2, 2] = 1
			
 
				+        if rot != 0:
			
 
				+            rot = -rot  # To match direction of rotation from cropping
			
 
				+            rot_mat = np.zeros((3, 3), dtype=np.float32)
			
 
				+            rot_rad = rot * np.pi / 180
			
 
				+            sn, cs = np.sin(rot_rad), np.cos(rot_rad)
			
 
				+            rot_mat[0, :2] = [cs, -sn]
			
 
				+            rot_mat[1, :2] = [sn, cs]
			
 
				+            rot_mat[2, 2] = 1
			
 
				+            # Need to rotate around center
			
 
				+            t_mat = np.eye(3)
			
 
				+            t_mat[0, 2] = -res[1] / 2
			
 
				+            t_mat[1, 2] = -res[0] / 2
			
 
				+            t_inv = t_mat.copy()
			
 
				+            t_inv[:2, 2] *= -1
			
 
				+            t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
			
 
				+        return t
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        image = records['image']
			
 
				+        keypoints = records['joints']
			
 
				+        heatmap_mask = records['mask']
			
 
				+
			
 
				+        degree = (np.random.random() * 2 - 1) * self.max_degree
			
 
				+        shape = np.array(image.shape[:2][::-1])
			
 
				+        center = center = np.array((np.array(shape) / 2))
			
 
				+
			
 
				+        aug_scale = np.random.random() * (self.max_scale - self.min_scale
			
 
				+                                          ) + self.min_scale
			
 
				+        if self.scale_type == 'long':
			
 
				+            scale = max(shape[0], shape[1]) / 1.0
			
 
				+        elif self.scale_type == 'short':
			
 
				+            scale = min(shape[0], shape[1]) / 1.0
			
 
				+        else:
			
 
				+            raise ValueError('Unknown scale type: {}'.format(self.scale_type))
			
 
				+        roi_size = aug_scale * scale
			
 
				+        dx = int(0)
			
 
				+        dy = int(0)
			
 
				+        if self.max_shift > 0:
			
 
				+
			
 
				+            dx = np.random.randint(-self.max_shift * roi_size,
			
 
				+                                   self.max_shift * roi_size)
			
 
				+            dy = np.random.randint(-self.max_shift * roi_size,
			
 
				+                                   self.max_shift * roi_size)
			
 
				+
			
 
				+        center += np.array([dx, dy])
			
 
				+        input_size = 2 * center
			
 
				+
			
 
				+        keypoints[..., :2] *= shape
			
 
				+        heatmap_mask *= 255
			
 
				+        kpts_lst = []
			
 
				+        mask_lst = []
			
 
				+
			
 
				+        image_affine_mat = self._get_affine_matrix(
			
 
				+            center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
			
 
				+        image = cv2.warpAffine(
			
 
				+            image,
			
 
				+            image_affine_mat, (self.trainsize, self.trainsize),
			
 
				+            flags=cv2.INTER_LINEAR)
			
 
				+        for hmsize in self.hmsize:
			
 
				+            kpts = copy.deepcopy(keypoints)
			
 
				+            mask_affine_mat = self._get_affine_matrix(
			
 
				+                center, roi_size, (hmsize, hmsize), degree)[:2]
			
 
				+            if heatmap_mask is not None:
			
 
				+                mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
			
 
				+                                      (hmsize, hmsize))
			
 
				+                mask = ((mask / 255) > 0.5).astype(np.float32)
			
 
				+            kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
			
 
				+                                                mask_affine_mat)
			
 
				+            kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
			
 
				+            kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
			
 
				+            kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
			
 
				+            kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
			
 
				+            kpts_lst.append(kpts)
			
 
				+            mask_lst.append(mask)
			
 
				+        records['image'] = image
			
 
				+        records['joints'] = kpts_lst
			
 
				+        records['mask'] = mask_lst
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class EvalAffine(object):
			
 
				+    """apply affine transform to image
			
 
				+    resize the short of [h,w] to standard size for eval
			
 
				+
			
 
				+    Args:
			
 
				+        size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard
			
 
				+        records(dict): the dict contained the image, mask and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records(dict): contain the image, mask and coords after tranformed
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, size, stride=64):
			
 
				+        super(EvalAffine, self).__init__()
			
 
				+        self.size = size
			
 
				+        self.stride = stride
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        image = records['image']
			
 
				+        mask = records['mask'] if 'mask' in records else None
			
 
				+        s = self.size
			
 
				+        h, w, _ = image.shape
			
 
				+        trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False)
			
 
				+        image_resized = cv2.warpAffine(image, trans, size_resized)
			
 
				+        if mask is not None:
			
 
				+            mask = cv2.warpAffine(mask, trans, size_resized)
			
 
				+            records['mask'] = mask
			
 
				+        if 'joints' in records:
			
 
				+            del records['joints']
			
 
				+        records['image'] = image_resized
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class NormalizePermute(object):
			
 
				+    def __init__(self,
			
 
				+                 mean=[123.675, 116.28, 103.53],
			
 
				+                 std=[58.395, 57.120, 57.375],
			
 
				+                 is_scale=True):
			
 
				+        super(NormalizePermute, self).__init__()
			
 
				+        self.mean = mean
			
 
				+        self.std = std
			
 
				+        self.is_scale = is_scale
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        image = records['image']
			
 
				+        image = image.astype(np.float32)
			
 
				+        if self.is_scale:
			
 
				+            image /= 255.
			
 
				+        image = image.transpose((2, 0, 1))
			
 
				+        mean = np.array(self.mean, dtype=np.float32)
			
 
				+        std = np.array(self.std, dtype=np.float32)
			
 
				+        invstd = 1. / std
			
 
				+        for v, m, s in zip(image, mean, invstd):
			
 
				+            v.__isub__(m).__imul__(s)
			
 
				+        records['image'] = image
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class TagGenerate(object):
			
 
				+    """record gt coords for aeloss to sample coords value in tagmaps
			
 
				+
			
 
				+    Args:
			
 
				+        num_joints (int): the keypoint numbers of dataset to train
			
 
				+        num_people (int): maxmum people to support for sample aeloss
			
 
				+        records(dict): the dict contained the image, mask and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records(dict): contain the gt coords used in tagmap
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, num_joints, max_people=30):
			
 
				+        super(TagGenerate, self).__init__()
			
 
				+        self.max_people = max_people
			
 
				+        self.num_joints = num_joints
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        kpts_lst = records['joints']
			
 
				+        kpts = kpts_lst[0]
			
 
				+        tagmap = np.zeros(
			
 
				+            (self.max_people, self.num_joints, 4), dtype=np.int64)
			
 
				+        inds = np.where(kpts[..., 2] > 0)
			
 
				+        p, j = inds[0], inds[1]
			
 
				+        visible = kpts[inds]
			
 
				+        # tagmap is [p, j, 3], where last dim is j, y, x
			
 
				+        tagmap[p, j, 0] = j
			
 
				+        tagmap[p, j, 1] = visible[..., 1]  # y
			
 
				+        tagmap[p, j, 2] = visible[..., 0]  # x
			
 
				+        tagmap[p, j, 3] = 1
			
 
				+        records['tagmap'] = tagmap
			
 
				+        del records['joints']
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class ToHeatmaps(object):
			
 
				+    """to generate the gaussin heatmaps of keypoint for heatmap loss
			
 
				+
			
 
				+    Args:
			
 
				+        num_joints (int): the keypoint numbers of dataset to train
			
 
				+        hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
			
 
				+        sigma (float): the std of gaussin kernel genereted
			
 
				+        records(dict): the dict contained the image, mask and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records(dict): contain the heatmaps used to heatmaploss
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, num_joints, hmsize, sigma=None):
			
 
				+        super(ToHeatmaps, self).__init__()
			
 
				+        self.num_joints = num_joints
			
 
				+        self.hmsize = np.array(hmsize)
			
 
				+        if sigma is None:
			
 
				+            sigma = hmsize[0] // 64
			
 
				+        self.sigma = sigma
			
 
				+
			
 
				+        r = 6 * sigma + 3
			
 
				+        x = np.arange(0, r, 1, np.float32)
			
 
				+        y = x[:, None]
			
 
				+        x0, y0 = 3 * sigma + 1, 3 * sigma + 1
			
 
				+        self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        kpts_lst = records['joints']
			
 
				+        mask_lst = records['mask']
			
 
				+        for idx, hmsize in enumerate(self.hmsize):
			
 
				+            mask = mask_lst[idx]
			
 
				+            kpts = kpts_lst[idx]
			
 
				+            heatmaps = np.zeros((self.num_joints, hmsize, hmsize))
			
 
				+            inds = np.where(kpts[..., 2] > 0)
			
 
				+            visible = kpts[inds].astype(np.int64)[..., :2]
			
 
				+            ul = np.round(visible - 3 * self.sigma - 1)
			
 
				+            br = np.round(visible + 3 * self.sigma + 2)
			
 
				+            sul = np.maximum(0, -ul)
			
 
				+            sbr = np.minimum(hmsize, br) - ul
			
 
				+            dul = np.clip(ul, 0, hmsize - 1)
			
 
				+            dbr = np.clip(br, 0, hmsize)
			
 
				+            for i in range(len(visible)):
			
 
				+                if visible[i][0] < 0 or visible[i][1] < 0 or visible[i][
			
 
				+                        0] >= hmsize or visible[i][1] >= hmsize:
			
 
				+                    continue
			
 
				+                dx1, dy1 = dul[i]
			
 
				+                dx2, dy2 = dbr[i]
			
 
				+                sx1, sy1 = sul[i]
			
 
				+                sx2, sy2 = sbr[i]
			
 
				+                heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum(
			
 
				+                    self.gaussian[sy1:sy2, sx1:sx2],
			
 
				+                    heatmaps[inds[1][i], dy1:dy2, dx1:dx2])
			
 
				+            records['heatmap_gt{}x'.format(idx + 1)] = heatmaps
			
 
				+            records['mask_{}x'.format(idx + 1)] = mask
			
 
				+        del records['mask']
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class RandomFlipHalfBodyTransform(object):
			
 
				+    """apply data augment to image and coords
			
 
				+    to achieve the flip, scale, rotate and half body transform effect for training image
			
 
				+
			
 
				+    Args:
			
 
				+        trainsize (list):[w, h], Image target size
			
 
				+        upper_body_ids (list): The upper body joint ids
			
 
				+        flip_pairs (list): The left-right joints exchange order list
			
 
				+        pixel_std (int): The pixel std of the scale
			
 
				+        scale (float): The scale factor to transform the image
			
 
				+        rot (int): The rotate factor to transform the image
			
 
				+        num_joints_half_body (int): The joints threshold of the half body transform
			
 
				+        prob_half_body (float): The threshold of the half body transform
			
 
				+        flip (bool): Whether to flip the image
			
 
				+
			
 
				+    Returns:
			
 
				+        records(dict): contain the image and coords after tranformed
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 trainsize,
			
 
				+                 upper_body_ids,
			
 
				+                 flip_pairs,
			
 
				+                 pixel_std,
			
 
				+                 scale=0.35,
			
 
				+                 rot=40,
			
 
				+                 num_joints_half_body=8,
			
 
				+                 prob_half_body=0.3,
			
 
				+                 flip=True,
			
 
				+                 rot_prob=0.6):
			
 
				+        super(RandomFlipHalfBodyTransform, self).__init__()
			
 
				+        self.trainsize = trainsize
			
 
				+        self.upper_body_ids = upper_body_ids
			
 
				+        self.flip_pairs = flip_pairs
			
 
				+        self.pixel_std = pixel_std
			
 
				+        self.scale = scale
			
 
				+        self.rot = rot
			
 
				+        self.num_joints_half_body = num_joints_half_body
			
 
				+        self.prob_half_body = prob_half_body
			
 
				+        self.flip = flip
			
 
				+        self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
			
 
				+        self.rot_prob = rot_prob
			
 
				+
			
 
				+    def halfbody_transform(self, joints, joints_vis):
			
 
				+        upper_joints = []
			
 
				+        lower_joints = []
			
 
				+        for joint_id in range(joints.shape[0]):
			
 
				+            if joints_vis[joint_id][0] > 0:
			
 
				+                if joint_id in self.upper_body_ids:
			
 
				+                    upper_joints.append(joints[joint_id])
			
 
				+                else:
			
 
				+                    lower_joints.append(joints[joint_id])
			
 
				+        if np.random.randn() < 0.5 and len(upper_joints) > 2:
			
 
				+            selected_joints = upper_joints
			
 
				+        else:
			
 
				+            selected_joints = lower_joints if len(
			
 
				+                lower_joints) > 2 else upper_joints
			
 
				+        if len(selected_joints) < 2:
			
 
				+            return None, None
			
 
				+        selected_joints = np.array(selected_joints, dtype=np.float32)
			
 
				+        center = selected_joints.mean(axis=0)[:2]
			
 
				+        left_top = np.amin(selected_joints, axis=0)
			
 
				+        right_bottom = np.amax(selected_joints, axis=0)
			
 
				+        w = right_bottom[0] - left_top[0]
			
 
				+        h = right_bottom[1] - left_top[1]
			
 
				+        if w > self.aspect_ratio * h:
			
 
				+            h = w * 1.0 / self.aspect_ratio
			
 
				+        elif w < self.aspect_ratio * h:
			
 
				+            w = h * self.aspect_ratio
			
 
				+        scale = np.array(
			
 
				+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
			
 
				+            dtype=np.float32)
			
 
				+        scale = scale * 1.5
			
 
				+
			
 
				+        return center, scale
			
 
				+
			
 
				+    def flip_joints(self, joints, joints_vis, width, matched_parts):
			
 
				+        joints[:, 0] = width - joints[:, 0] - 1
			
 
				+        for pair in matched_parts:
			
 
				+            joints[pair[0], :], joints[pair[1], :] = \
			
 
				+                joints[pair[1], :], joints[pair[0], :].copy()
			
 
				+            joints_vis[pair[0], :], joints_vis[pair[1], :] = \
			
 
				+                joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
			
 
				+
			
 
				+        return joints * joints_vis, joints_vis
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        image = records['image']
			
 
				+        joints = records['joints']
			
 
				+        joints_vis = records['joints_vis']
			
 
				+        c = records['center']
			
 
				+        s = records['scale']
			
 
				+        r = 0
			
 
				+        if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and
			
 
				+                np.random.rand() < self.prob_half_body):
			
 
				+            c_half_body, s_half_body = self.halfbody_transform(joints,
			
 
				+                                                               joints_vis)
			
 
				+            if c_half_body is not None and s_half_body is not None:
			
 
				+                c, s = c_half_body, s_half_body
			
 
				+        sf = self.scale
			
 
				+        rf = self.rot
			
 
				+        s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
			
 
				+        r = np.clip(np.random.randn() * rf, -rf * 2,
			
 
				+                    rf * 2) if np.random.random() <= self.rot_prob else 0
			
 
				+
			
 
				+        if self.flip and np.random.random() <= 0.5:
			
 
				+            image = image[:, ::-1, :]
			
 
				+            joints, joints_vis = self.flip_joints(
			
 
				+                joints, joints_vis, image.shape[1], self.flip_pairs)
			
 
				+            c[0] = image.shape[1] - c[0] - 1
			
 
				+        records['image'] = image
			
 
				+        records['joints'] = joints
			
 
				+        records['joints_vis'] = joints_vis
			
 
				+        records['center'] = c
			
 
				+        records['scale'] = s
			
 
				+        records['rotate'] = r
			
 
				+
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class AugmentationbyInformantionDropping(object):
			
 
				+    """AID: Augmentation by Informantion Dropping. Please refer
			
 
				+        to https://arxiv.org/abs/2008.07139
			
 
				+
			
 
				+    Args:
			
 
				+        prob_cutout (float): The probability of the Cutout augmentation.
			
 
				+        offset_factor (float): Offset factor of cutout center.
			
 
				+        num_patch (int): Number of patches to be cutout.
			
 
				+        records(dict): the dict contained the image and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records (dict): contain the image and coords after tranformed
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 trainsize,
			
 
				+                 prob_cutout=0.0,
			
 
				+                 offset_factor=0.2,
			
 
				+                 num_patch=1):
			
 
				+        self.prob_cutout = prob_cutout
			
 
				+        self.offset_factor = offset_factor
			
 
				+        self.num_patch = num_patch
			
 
				+        self.trainsize = trainsize
			
 
				+
			
 
				+    def _cutout(self, img, joints, joints_vis):
			
 
				+        height, width, _ = img.shape
			
 
				+        img = img.reshape((height * width, -1))
			
 
				+        feat_x_int = np.arange(0, width)
			
 
				+        feat_y_int = np.arange(0, height)
			
 
				+        feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int)
			
 
				+        feat_x_int = feat_x_int.reshape((-1, ))
			
 
				+        feat_y_int = feat_y_int.reshape((-1, ))
			
 
				+        for _ in range(self.num_patch):
			
 
				+            vis_idx, _ = np.where(joints_vis > 0)
			
 
				+            occlusion_joint_id = np.random.choice(vis_idx)
			
 
				+            center = joints[occlusion_joint_id, 0:2]
			
 
				+            offset = np.random.randn(2) * self.trainsize[
			
 
				+                0] * self.offset_factor
			
 
				+            center = center + offset
			
 
				+            radius = np.random.uniform(0.1, 0.2) * self.trainsize[0]
			
 
				+            x_offset = (center[0] - feat_x_int) / radius
			
 
				+            y_offset = (center[1] - feat_y_int) / radius
			
 
				+            dis = x_offset**2 + y_offset**2
			
 
				+            keep_pos = np.where((dis <= 1) & (dis >= 0))[0]
			
 
				+            img[keep_pos, :] = 0
			
 
				+        img = img.reshape((height, width, -1))
			
 
				+        return img
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        img = records['image']
			
 
				+        joints = records['joints']
			
 
				+        joints_vis = records['joints_vis']
			
 
				+        if np.random.rand() < self.prob_cutout:
			
 
				+            img = self._cutout(img, joints, joints_vis)
			
 
				+        records['image'] = img
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class TopDownAffine(object):
			
 
				+    """apply affine transform to image and coords
			
 
				+
			
 
				+    Args:
			
 
				+        trainsize (list): [w, h], the standard size used to train
			
 
				+        use_udp (bool): whether to use Unbiased Data Processing.
			
 
				+        records(dict): the dict contained the image and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records (dict): contain the image and coords after tranformed
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, trainsize, use_udp=False):
			
 
				+        self.trainsize = trainsize
			
 
				+        self.use_udp = use_udp
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        image = records['image']
			
 
				+        joints = records['joints']
			
 
				+        joints_vis = records['joints_vis']
			
 
				+        rot = records['rotate'] if "rotate" in records else 0
			
 
				+        if self.use_udp:
			
 
				+            trans = get_warp_matrix(
			
 
				+                rot, records['center'] * 2.0,
			
 
				+                [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
			
 
				+                records['scale'] * 200.0)
			
 
				+            image = cv2.warpAffine(
			
 
				+                image,
			
 
				+                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
			
 
				+                flags=cv2.INTER_LINEAR)
			
 
				+            joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), trans)
			
 
				+        else:
			
 
				+            trans = get_affine_transform(records['center'], records['scale'] *
			
 
				+                                         200, rot, self.trainsize)
			
 
				+            image = cv2.warpAffine(
			
 
				+                image,
			
 
				+                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
			
 
				+                flags=cv2.INTER_LINEAR)
			
 
				+            for i in range(joints.shape[0]):
			
 
				+                if joints_vis[i, 0] > 0.0:
			
 
				+                    joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
			
 
				+
			
 
				+        records['image'] = image
			
 
				+        records['joints'] = joints
			
 
				+
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class TopDownEvalAffine(object):
			
 
				+    """apply affine transform to image and coords
			
 
				+
			
 
				+    Args:
			
 
				+        trainsize (list): [w, h], the standard size used to train
			
 
				+        use_udp (bool): whether to use Unbiased Data Processing.
			
 
				+        records(dict): the dict contained the image and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records (dict): contain the image and coords after tranformed
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, trainsize, use_udp=False):
			
 
				+        self.trainsize = trainsize
			
 
				+        self.use_udp = use_udp
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        image = records['image']
			
 
				+        rot = 0
			
 
				+        imshape = records['im_shape'][::-1]
			
 
				+        center = imshape / 2.
			
 
				+        scale = imshape
			
 
				+
			
 
				+        if self.use_udp:
			
 
				+            trans = get_warp_matrix(
			
 
				+                rot, center * 2.0,
			
 
				+                [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
			
 
				+            image = cv2.warpAffine(
			
 
				+                image,
			
 
				+                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
			
 
				+                flags=cv2.INTER_LINEAR)
			
 
				+        else:
			
 
				+            trans = get_affine_transform(center, scale, rot, self.trainsize)
			
 
				+            image = cv2.warpAffine(
			
 
				+                image,
			
 
				+                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
			
 
				+                flags=cv2.INTER_LINEAR)
			
 
				+        records['image'] = image
			
 
				+
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class ToHeatmapsTopDown(object):
			
 
				+    """to generate the gaussin heatmaps of keypoint for heatmap loss
			
 
				+
			
 
				+    Args:
			
 
				+        hmsize (list): [w, h] output heatmap's size
			
 
				+        sigma (float): the std of gaussin kernel genereted
			
 
				+        records(dict): the dict contained the image and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records (dict): contain the heatmaps used to heatmaploss
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, hmsize, sigma):
			
 
				+        super(ToHeatmapsTopDown, self).__init__()
			
 
				+        self.hmsize = np.array(hmsize)
			
 
				+        self.sigma = sigma
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        """refer to
			
 
				+            https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
			
 
				+            Copyright (c) Microsoft, under the MIT License.
			
 
				+        """
			
 
				+        joints = records['joints']
			
 
				+        joints_vis = records['joints_vis']
			
 
				+        num_joints = joints.shape[0]
			
 
				+        image_size = np.array(
			
 
				+            [records['image'].shape[1], records['image'].shape[0]])
			
 
				+        target_weight = np.ones((num_joints, 1), dtype=np.float32)
			
 
				+        target_weight[:, 0] = joints_vis[:, 0]
			
 
				+        target = np.zeros(
			
 
				+            (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
			
 
				+        tmp_size = self.sigma * 3
			
 
				+        feat_stride = image_size / self.hmsize
			
 
				+        for joint_id in range(num_joints):
			
 
				+            mu_x = int(joints[joint_id][0] + 0.5) / feat_stride[0]
			
 
				+            mu_y = int(joints[joint_id][1] + 0.5) / feat_stride[1]
			
 
				+            # Check that any part of the gaussian is in-bounds
			
 
				+            ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
			
 
				+            br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
			
 
				+            if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
			
 
				+                    0] < 0 or br[1] < 0:
			
 
				+                # If not, just return the image as is
			
 
				+                target_weight[joint_id] = 0
			
 
				+                continue
			
 
				+            # # Generate gaussian
			
 
				+            size = 2 * tmp_size + 1
			
 
				+            x = np.arange(0, size, 1, np.float32)
			
 
				+            y = x[:, np.newaxis]
			
 
				+            x0 = y0 = size // 2
			
 
				+            # The gaussian is not normalized, we want the center value to equal 1
			
 
				+            g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
			
 
				+
			
 
				+            # Usable gaussian range
			
 
				+            g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
			
 
				+            g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
			
 
				+            # Image range
			
 
				+            img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
			
 
				+            img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
			
 
				+
			
 
				+            v = target_weight[joint_id]
			
 
				+            if v > 0.5:
			
 
				+                target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
			
 
				+                    0]:g_y[1], g_x[0]:g_x[1]]
			
 
				+        records['target'] = target
			
 
				+        records['target_weight'] = target_weight
			
 
				+        del records['joints'], records['joints_vis']
			
 
				+
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class ToHeatmapsTopDown_DARK(object):
			
 
				+    """to generate the gaussin heatmaps of keypoint for heatmap loss
			
 
				+
			
 
				+    Args:
			
 
				+        hmsize (list): [w, h] output heatmap's size
			
 
				+        sigma (float): the std of gaussin kernel genereted
			
 
				+        records(dict): the dict contained the image and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records (dict): contain the heatmaps used to heatmaploss
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, hmsize, sigma):
			
 
				+        super(ToHeatmapsTopDown_DARK, self).__init__()
			
 
				+        self.hmsize = np.array(hmsize)
			
 
				+        self.sigma = sigma
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        joints = records['joints']
			
 
				+        joints_vis = records['joints_vis']
			
 
				+        num_joints = joints.shape[0]
			
 
				+        image_size = np.array(
			
 
				+            [records['image'].shape[1], records['image'].shape[0]])
			
 
				+        target_weight = np.ones((num_joints, 1), dtype=np.float32)
			
 
				+        target_weight[:, 0] = joints_vis[:, 0]
			
 
				+        target = np.zeros(
			
 
				+            (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
			
 
				+        tmp_size = self.sigma * 3
			
 
				+        feat_stride = image_size / self.hmsize
			
 
				+        for joint_id in range(num_joints):
			
 
				+            mu_x = joints[joint_id][0] / feat_stride[0]
			
 
				+            mu_y = joints[joint_id][1] / feat_stride[1]
			
 
				+            # Check that any part of the gaussian is in-bounds
			
 
				+            ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
			
 
				+            br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
			
 
				+            if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
			
 
				+                    0] < 0 or br[1] < 0:
			
 
				+                # If not, just return the image as is
			
 
				+                target_weight[joint_id] = 0
			
 
				+                continue
			
 
				+
			
 
				+            x = np.arange(0, self.hmsize[0], 1, np.float32)
			
 
				+            y = np.arange(0, self.hmsize[1], 1, np.float32)
			
 
				+            y = y[:, np.newaxis]
			
 
				+
			
 
				+            v = target_weight[joint_id]
			
 
				+            if v > 0.5:
			
 
				+                target[joint_id] = np.exp(-(
			
 
				+                    (x - mu_x)**2 + (y - mu_y)**2) / (2 * self.sigma**2))
			
 
				+        records['target'] = target
			
 
				+        records['target_weight'] = target_weight
			
 
				+        del records['joints'], records['joints_vis']
			
 
				+
			
 
				+        return records
			
 
				+
			
 
				+
			
 
				+@register_keypointop
			
 
				+class ToHeatmapsTopDown_UDP(object):
			
 
				+    """This code is based on:
			
 
				+        https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py
			
 
				+
			
 
				+        to generate the gaussian heatmaps of keypoint for heatmap loss.
			
 
				+        ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
			
 
				+        for Human Pose Estimation (CVPR 2020).
			
 
				+
			
 
				+    Args:
			
 
				+        hmsize (list): [w, h] output heatmap's size
			
 
				+        sigma (float): the std of gaussin kernel genereted
			
 
				+        records(dict): the dict contained the image and coords
			
 
				+
			
 
				+    Returns:
			
 
				+        records (dict): contain the heatmaps used to heatmaploss
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, hmsize, sigma):
			
 
				+        super(ToHeatmapsTopDown_UDP, self).__init__()
			
 
				+        self.hmsize = np.array(hmsize)
			
 
				+        self.sigma = sigma
			
 
				+
			
 
				+    def __call__(self, records):
			
 
				+        joints = records['joints']
			
 
				+        joints_vis = records['joints_vis']
			
 
				+        num_joints = joints.shape[0]
			
 
				+        image_size = np.array(
			
 
				+            [records['image'].shape[1], records['image'].shape[0]])
			
 
				+        target_weight = np.ones((num_joints, 1), dtype=np.float32)
			
 
				+        target_weight[:, 0] = joints_vis[:, 0]
			
 
				+        target = np.zeros(
			
 
				+            (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
			
 
				+        tmp_size = self.sigma * 3
			
 
				+        size = 2 * tmp_size + 1
			
 
				+        x = np.arange(0, size, 1, np.float32)
			
 
				+        y = x[:, None]
			
 
				+        feat_stride = (image_size - 1.0) / (self.hmsize - 1.0)
			
 
				+        for joint_id in range(num_joints):
			
 
				+            mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
			
 
				+            mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
			
 
				+            # Check that any part of the gaussian is in-bounds
			
 
				+            ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
			
 
				+            br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
			
 
				+            if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
			
 
				+                    0] < 0 or br[1] < 0:
			
 
				+                # If not, just return the image as is
			
 
				+                target_weight[joint_id] = 0
			
 
				+                continue
			
 
				+
			
 
				+            mu_x_ac = joints[joint_id][0] / feat_stride[0]
			
 
				+            mu_y_ac = joints[joint_id][1] / feat_stride[1]
			
 
				+            x0 = y0 = size // 2
			
 
				+            x0 += mu_x_ac - mu_x
			
 
				+            y0 += mu_y_ac - mu_y
			
 
				+            g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
			
 
				+            # Usable gaussian range
			
 
				+            g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
			
 
				+            g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
			
 
				+            # Image range
			
 
				+            img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
			
 
				+            img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
			
 
				+
			
 
				+            v = target_weight[joint_id]
			
 
				+            if v > 0.5:
			
 
				+                target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
			
 
				+                    0]:g_y[1], g_x[0]:g_x[1]]
			
 
				+        records['target'] = target
			
 
				+        records['target_weight'] = target_weight
			
 
				+        del records['joints'], records['joints_vis']
			
 
				+
			
 
				+        return records
			
--- a/paddlers/models/ppdet/data/transform/mot_operators.py
+++ b/paddlers/models/ppdet/data/transform/mot_operators.py
@@ -0,0 +1,628 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except Exception:
			
 
				+    from collections import Sequence
			
 
				+from numbers import Integral
			
 
				+
			
 
				+import cv2
			
 
				+import copy
			
 
				+import numpy as np
			
 
				+import random
			
 
				+import math
			
 
				+
			
 
				+from .operators import BaseOperator, register_op
			
 
				+from .batch_operators import Gt2TTFTarget
			
 
				+from paddlers.models.ppdet.modeling.bbox_utils import bbox_iou_np_expand
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+from .op_helper import gaussian_radius
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = [
			
 
				+    'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
			
 
				+    'Gt2JDETargetMax', 'Gt2FairMOTTarget'
			
 
				+]
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RGBReverse(BaseOperator):
			
 
				+    """RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(RGBReverse, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        im = sample['image']
			
 
				+        sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class LetterBoxResize(BaseOperator):
			
 
				+    def __init__(self, target_size):
			
 
				+        """
			
 
				+        Resize image to target size, convert normalized xywh to pixel xyxy
			
 
				+        format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
			
 
				+        Args:
			
 
				+            target_size (int|list): image target size.
			
 
				+        """
			
 
				+        super(LetterBoxResize, self).__init__()
			
 
				+        if not isinstance(target_size, (Integral, Sequence)):
			
 
				+            raise TypeError(
			
 
				+                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
			
 
				+                format(type(target_size)))
			
 
				+        if isinstance(target_size, Integral):
			
 
				+            target_size = [target_size, target_size]
			
 
				+        self.target_size = target_size
			
 
				+
			
 
				+    def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
			
 
				+        # letterbox: resize a rectangular image to a padded rectangular
			
 
				+        shape = img.shape[:2]  # [height, width]
			
 
				+        ratio_h = float(height) / shape[0]
			
 
				+        ratio_w = float(width) / shape[1]
			
 
				+        ratio = min(ratio_h, ratio_w)
			
 
				+        new_shape = (round(shape[1] * ratio),
			
 
				+                     round(shape[0] * ratio))  # [width, height]
			
 
				+        padw = (width - new_shape[0]) / 2
			
 
				+        padh = (height - new_shape[1]) / 2
			
 
				+        top, bottom = round(padh - 0.1), round(padh + 0.1)
			
 
				+        left, right = round(padw - 0.1), round(padw + 0.1)
			
 
				+
			
 
				+        img = cv2.resize(
			
 
				+            img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
			
 
				+        img = cv2.copyMakeBorder(
			
 
				+            img, top, bottom, left, right, cv2.BORDER_CONSTANT,
			
 
				+            value=color)  # padded rectangular
			
 
				+        return img, ratio, padw, padh
			
 
				+
			
 
				+    def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
			
 
				+        bboxes = bbox0.copy()
			
 
				+        bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
			
 
				+        bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
			
 
				+        bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
			
 
				+        bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
			
 
				+        return bboxes
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """ Resize the image numpy.
			
 
				+        """
			
 
				+        im = sample['image']
			
 
				+        h, w = sample['im_shape']
			
 
				+        if not isinstance(im, np.ndarray):
			
 
				+            raise TypeError("{}: image type is not numpy.".format(self))
			
 
				+        if len(im.shape) != 3:
			
 
				+            from PIL import UnidentifiedImageError
			
 
				+            raise UnidentifiedImageError(
			
 
				+                '{}: image is not 3-dimensional.'.format(self))
			
 
				+
			
 
				+        # apply image
			
 
				+        height, width = self.target_size
			
 
				+        img, ratio, padw, padh = self.apply_image(
			
 
				+            im, height=height, width=width)
			
 
				+
			
 
				+        sample['image'] = img
			
 
				+        new_shape = (round(h * ratio), round(w * ratio))
			
 
				+        sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
			
 
				+        sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
			
 
				+
			
 
				+        # apply bbox
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
			
 
				+                                                padw, padh)
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class MOTRandomAffine(BaseOperator):
			
 
				+    """
			
 
				+    Affine transform to image and coords to achieve the rotate, scale and
			
 
				+    shift effect for training image.
			
 
				+
			
 
				+    Args:
			
 
				+        degrees (list[2]): the rotate range to apply, transform range is [min, max]
			
 
				+        translate (list[2]): the translate range to apply, transform range is [min, max]
			
 
				+        scale (list[2]): the scale range to apply, transform range is [min, max]
			
 
				+        shear (list[2]): the shear range to apply, transform range is [min, max]
			
 
				+        borderValue (list[3]): value used in case of a constant border when appling
			
 
				+            the perspective transformation
			
 
				+        reject_outside (bool): reject warped bounding bboxes outside of image
			
 
				+
			
 
				+    Returns:
			
 
				+        records(dict): contain the image and coords after tranformed
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 degrees=(-5, 5),
			
 
				+                 translate=(0.10, 0.10),
			
 
				+                 scale=(0.50, 1.20),
			
 
				+                 shear=(-2, 2),
			
 
				+                 borderValue=(127.5, 127.5, 127.5),
			
 
				+                 reject_outside=True):
			
 
				+        super(MOTRandomAffine, self).__init__()
			
 
				+        self.degrees = degrees
			
 
				+        self.translate = translate
			
 
				+        self.scale = scale
			
 
				+        self.shear = shear
			
 
				+        self.borderValue = borderValue
			
 
				+        self.reject_outside = reject_outside
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
			
 
				+        border = 0  # width of added border (optional)
			
 
				+
			
 
				+        img = sample['image']
			
 
				+        height, width = img.shape[0], img.shape[1]
			
 
				+
			
 
				+        # Rotation and Scale
			
 
				+        R = np.eye(3)
			
 
				+        a = random.random() * (self.degrees[1] - self.degrees[0]
			
 
				+                               ) + self.degrees[0]
			
 
				+        s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
			
 
				+        R[:2] = cv2.getRotationMatrix2D(
			
 
				+            angle=a, center=(width / 2, height / 2), scale=s)
			
 
				+
			
 
				+        # Translation
			
 
				+        T = np.eye(3)
			
 
				+        T[0, 2] = (
			
 
				+            random.random() * 2 - 1
			
 
				+        ) * self.translate[0] * height + border  # x translation (pixels)
			
 
				+        T[1, 2] = (
			
 
				+            random.random() * 2 - 1
			
 
				+        ) * self.translate[1] * width + border  # y translation (pixels)
			
 
				+
			
 
				+        # Shear
			
 
				+        S = np.eye(3)
			
 
				+        S[0, 1] = math.tan((random.random() *
			
 
				+                            (self.shear[1] - self.shear[0]) + self.shear[0]) *
			
 
				+                           math.pi / 180)  # x shear (deg)
			
 
				+        S[1, 0] = math.tan((random.random() *
			
 
				+                            (self.shear[1] - self.shear[0]) + self.shear[0]) *
			
 
				+                           math.pi / 180)  # y shear (deg)
			
 
				+
			
 
				+        M = S @T @R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
			
 
				+        imw = cv2.warpPerspective(
			
 
				+            img,
			
 
				+            M,
			
 
				+            dsize=(width, height),
			
 
				+            flags=cv2.INTER_LINEAR,
			
 
				+            borderValue=self.borderValue)  # BGR order borderValue
			
 
				+
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            targets = sample['gt_bbox']
			
 
				+            n = targets.shape[0]
			
 
				+            points = targets.copy()
			
 
				+            area0 = (points[:, 2] - points[:, 0]) * (
			
 
				+                points[:, 3] - points[:, 1])
			
 
				+
			
 
				+            # warp points
			
 
				+            xy = np.ones((n * 4, 3))
			
 
				+            xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
			
 
				+                n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
			
 
				+            xy = (xy @M.T)[:, :2].reshape(n, 8)
			
 
				+
			
 
				+            # create new boxes
			
 
				+            x = xy[:, [0, 2, 4, 6]]
			
 
				+            y = xy[:, [1, 3, 5, 7]]
			
 
				+            xy = np.concatenate(
			
 
				+                (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
			
 
				+
			
 
				+            # apply angle-based reduction
			
 
				+            radians = a * math.pi / 180
			
 
				+            reduction = max(abs(math.sin(radians)),
			
 
				+                            abs(math.cos(radians)))**0.5
			
 
				+            x = (xy[:, 2] + xy[:, 0]) / 2
			
 
				+            y = (xy[:, 3] + xy[:, 1]) / 2
			
 
				+            w = (xy[:, 2] - xy[:, 0]) * reduction
			
 
				+            h = (xy[:, 3] - xy[:, 1]) * reduction
			
 
				+            xy = np.concatenate(
			
 
				+                (x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
			
 
				+
			
 
				+            # reject warped points outside of image
			
 
				+            if self.reject_outside:
			
 
				+                np.clip(xy[:, 0], 0, width, out=xy[:, 0])
			
 
				+                np.clip(xy[:, 2], 0, width, out=xy[:, 2])
			
 
				+                np.clip(xy[:, 1], 0, height, out=xy[:, 1])
			
 
				+                np.clip(xy[:, 3], 0, height, out=xy[:, 3])
			
 
				+            w = xy[:, 2] - xy[:, 0]
			
 
				+            h = xy[:, 3] - xy[:, 1]
			
 
				+            area = w * h
			
 
				+            ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
			
 
				+            i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
			
 
				+
			
 
				+            if sum(i) > 0:
			
 
				+                sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
			
 
				+                sample['gt_class'] = sample['gt_class'][i]
			
 
				+                if 'difficult' in sample:
			
 
				+                    sample['difficult'] = sample['difficult'][i]
			
 
				+                if 'gt_ide' in sample:
			
 
				+                    sample['gt_ide'] = sample['gt_ide'][i]
			
 
				+                if 'is_crowd' in sample:
			
 
				+                    sample['is_crowd'] = sample['is_crowd'][i]
			
 
				+                sample['image'] = imw
			
 
				+                return sample
			
 
				+            else:
			
 
				+                return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2JDETargetThres(BaseOperator):
			
 
				+    __shared__ = ['num_classes']
			
 
				+    """
			
 
				+    Generate JDE targets by groud truth data when training
			
 
				+    Args:
			
 
				+        anchors (list): anchors of JDE model
			
 
				+        anchor_masks (list): anchor_masks of JDE model
			
 
				+        downsample_ratios (list): downsample ratios of JDE model
			
 
				+        ide_thresh (float): thresh of identity, higher is groud truth
			
 
				+        fg_thresh (float): thresh of foreground, higher is foreground
			
 
				+        bg_thresh (float): thresh of background, lower is background
			
 
				+        num_classes (int): number of classes
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 anchors,
			
 
				+                 anchor_masks,
			
 
				+                 downsample_ratios,
			
 
				+                 ide_thresh=0.5,
			
 
				+                 fg_thresh=0.5,
			
 
				+                 bg_thresh=0.4,
			
 
				+                 num_classes=1):
			
 
				+        super(Gt2JDETargetThres, self).__init__()
			
 
				+        self.anchors = anchors
			
 
				+        self.anchor_masks = anchor_masks
			
 
				+        self.downsample_ratios = downsample_ratios
			
 
				+        self.ide_thresh = ide_thresh
			
 
				+        self.fg_thresh = fg_thresh
			
 
				+        self.bg_thresh = bg_thresh
			
 
				+        self.num_classes = num_classes
			
 
				+
			
 
				+    def generate_anchor(self, nGh, nGw, anchor_hw):
			
 
				+        nA = len(anchor_hw)
			
 
				+        yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
			
 
				+
			
 
				+        mesh = np.stack([xx.T, yy.T], axis=0)  # [2, nGh, nGw]
			
 
				+        mesh = np.repeat(mesh[None, :], nA, axis=0)  # [nA, 2, nGh, nGw]
			
 
				+
			
 
				+        anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
			
 
				+        anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
			
 
				+        anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
			
 
				+
			
 
				+        anchor_mesh = np.concatenate(
			
 
				+            [mesh, anchor_offset_mesh], axis=1)  # [nA, 4, nGh, nGw]
			
 
				+        return anchor_mesh
			
 
				+
			
 
				+    def encode_delta(self, gt_box_list, fg_anchor_list):
			
 
				+        px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
			
 
				+                        fg_anchor_list[:, 2], fg_anchor_list[:,3]
			
 
				+        gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
			
 
				+                        gt_box_list[:, 2], gt_box_list[:, 3]
			
 
				+        dx = (gx - px) / pw
			
 
				+        dy = (gy - py) / ph
			
 
				+        dw = np.log(gw / pw)
			
 
				+        dh = np.log(gh / ph)
			
 
				+        return np.stack([dx, dy, dw, dh], axis=1)
			
 
				+
			
 
				+    def pad_box(self, sample, num_max):
			
 
				+        assert 'gt_bbox' in sample
			
 
				+        bbox = sample['gt_bbox']
			
 
				+        gt_num = len(bbox)
			
 
				+        pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
			
 
				+        if gt_num > 0:
			
 
				+            pad_bbox[:gt_num, :] = bbox[:gt_num, :]
			
 
				+        sample['gt_bbox'] = pad_bbox
			
 
				+        if 'gt_score' in sample:
			
 
				+            pad_score = np.zeros((num_max, ), dtype=np.float32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
			
 
				+            sample['gt_score'] = pad_score
			
 
				+        if 'difficult' in sample:
			
 
				+            pad_diff = np.zeros((num_max, ), dtype=np.int32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
			
 
				+            sample['difficult'] = pad_diff
			
 
				+        if 'is_crowd' in sample:
			
 
				+            pad_crowd = np.zeros((num_max, ), dtype=np.int32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
			
 
				+            sample['is_crowd'] = pad_crowd
			
 
				+        if 'gt_ide' in sample:
			
 
				+            pad_ide = np.zeros((num_max, ), dtype=np.int32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
			
 
				+            sample['gt_ide'] = pad_ide
			
 
				+        return sample
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        assert len(self.anchor_masks) == len(self.downsample_ratios), \
			
 
				+            "anchor_masks', and 'downsample_ratios' should have same length."
			
 
				+        h, w = samples[0]['image'].shape[1:3]
			
 
				+
			
 
				+        num_max = 0
			
 
				+        for sample in samples:
			
 
				+            num_max = max(num_max, len(sample['gt_bbox']))
			
 
				+
			
 
				+        for sample in samples:
			
 
				+            gt_bbox = sample['gt_bbox']
			
 
				+            gt_ide = sample['gt_ide']
			
 
				+            for i, (anchor_hw, downsample_ratio
			
 
				+                    ) in enumerate(zip(self.anchors, self.downsample_ratios)):
			
 
				+                anchor_hw = np.array(
			
 
				+                    anchor_hw, dtype=np.float32) / downsample_ratio
			
 
				+                nA = len(anchor_hw)
			
 
				+                nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
			
 
				+                tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
			
 
				+                tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
			
 
				+                tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
			
 
				+
			
 
				+                gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
			
 
				+                gxy[:, 0] = gxy[:, 0] * nGw
			
 
				+                gxy[:, 1] = gxy[:, 1] * nGh
			
 
				+                gwh[:, 0] = gwh[:, 0] * nGw
			
 
				+                gwh[:, 1] = gwh[:, 1] * nGh
			
 
				+                gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
			
 
				+                gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
			
 
				+                tboxes = np.concatenate([gxy, gwh], axis=1)
			
 
				+
			
 
				+                anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
			
 
				+
			
 
				+                anchor_list = np.transpose(anchor_mesh,
			
 
				+                                           (0, 2, 3, 1)).reshape(-1, 4)
			
 
				+                iou_pdist = bbox_iou_np_expand(
			
 
				+                    anchor_list, tboxes, x1y1x2y2=False)
			
 
				+
			
 
				+                iou_max = np.max(iou_pdist, axis=1)
			
 
				+                max_gt_index = np.argmax(iou_pdist, axis=1)
			
 
				+
			
 
				+                iou_map = iou_max.reshape(nA, nGh, nGw)
			
 
				+                gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
			
 
				+
			
 
				+                id_index = iou_map > self.ide_thresh
			
 
				+                fg_index = iou_map > self.fg_thresh
			
 
				+                bg_index = iou_map < self.bg_thresh
			
 
				+                ign_index = (iou_map < self.fg_thresh) * (
			
 
				+                    iou_map > self.bg_thresh)
			
 
				+                tconf[fg_index] = 1
			
 
				+                tconf[bg_index] = 0
			
 
				+                tconf[ign_index] = -1
			
 
				+
			
 
				+                gt_index = gt_index_map[fg_index]
			
 
				+                gt_box_list = tboxes[gt_index]
			
 
				+                gt_id_list = gt_ide[gt_index_map[id_index]]
			
 
				+
			
 
				+                if np.sum(fg_index) > 0:
			
 
				+                    tid[id_index] = gt_id_list
			
 
				+
			
 
				+                    fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
			
 
				+                                                         4)[fg_index]
			
 
				+                    delta_target = self.encode_delta(gt_box_list,
			
 
				+                                                     fg_anchor_list)
			
 
				+                    tbox[fg_index] = delta_target
			
 
				+
			
 
				+                sample['tbox{}'.format(i)] = tbox
			
 
				+                sample['tconf{}'.format(i)] = tconf
			
 
				+                sample['tide{}'.format(i)] = tid
			
 
				+            sample.pop('gt_class')
			
 
				+            sample = self.pad_box(sample, num_max)
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Gt2JDETargetMax(BaseOperator):
			
 
				+    __shared__ = ['num_classes']
			
 
				+    """
			
 
				+    Generate JDE targets by groud truth data when evaluating
			
 
				+    Args:
			
 
				+        anchors (list): anchors of JDE model
			
 
				+        anchor_masks (list): anchor_masks of JDE model
			
 
				+        downsample_ratios (list): downsample ratios of JDE model
			
 
				+        max_iou_thresh (float): iou thresh for high quality anchor
			
 
				+        num_classes (int): number of classes
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 anchors,
			
 
				+                 anchor_masks,
			
 
				+                 downsample_ratios,
			
 
				+                 max_iou_thresh=0.60,
			
 
				+                 num_classes=1):
			
 
				+        super(Gt2JDETargetMax, self).__init__()
			
 
				+        self.anchors = anchors
			
 
				+        self.anchor_masks = anchor_masks
			
 
				+        self.downsample_ratios = downsample_ratios
			
 
				+        self.max_iou_thresh = max_iou_thresh
			
 
				+        self.num_classes = num_classes
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        assert len(self.anchor_masks) == len(self.downsample_ratios), \
			
 
				+            "anchor_masks', and 'downsample_ratios' should have same length."
			
 
				+        h, w = samples[0]['image'].shape[1:3]
			
 
				+        for sample in samples:
			
 
				+            gt_bbox = sample['gt_bbox']
			
 
				+            gt_ide = sample['gt_ide']
			
 
				+            for i, (anchor_hw, downsample_ratio
			
 
				+                    ) in enumerate(zip(self.anchors, self.downsample_ratios)):
			
 
				+                anchor_hw = np.array(
			
 
				+                    anchor_hw, dtype=np.float32) / downsample_ratio
			
 
				+                nA = len(anchor_hw)
			
 
				+                nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
			
 
				+                tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
			
 
				+                tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
			
 
				+                tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
			
 
				+
			
 
				+                gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
			
 
				+                gxy[:, 0] = gxy[:, 0] * nGw
			
 
				+                gxy[:, 1] = gxy[:, 1] * nGh
			
 
				+                gwh[:, 0] = gwh[:, 0] * nGw
			
 
				+                gwh[:, 1] = gwh[:, 1] * nGh
			
 
				+                gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
			
 
				+                gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
			
 
				+
			
 
				+                # iou of targets-anchors (using wh only)
			
 
				+                box1 = gwh
			
 
				+                box2 = anchor_hw[:, None, :]
			
 
				+                inter_area = np.minimum(box1, box2).prod(2)
			
 
				+                iou = inter_area / (
			
 
				+                    box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
			
 
				+
			
 
				+                # Select best iou_pred and anchor
			
 
				+                iou_best = iou.max(0)  # best anchor [0-2] for each target
			
 
				+                a = np.argmax(iou, axis=0)
			
 
				+
			
 
				+                # Select best unique target-anchor combinations
			
 
				+                iou_order = np.argsort(-iou_best)  # best to worst
			
 
				+
			
 
				+                # Unique anchor selection
			
 
				+                u = np.stack((gi, gj, a), 0)[:, iou_order]
			
 
				+                _, first_unique = np.unique(u, axis=1, return_index=True)
			
 
				+                mask = iou_order[first_unique]
			
 
				+                # best anchor must share significant commonality (iou) with target
			
 
				+                # TODO: examine arbitrary threshold
			
 
				+                idx = mask[iou_best[mask] > self.max_iou_thresh]
			
 
				+
			
 
				+                if len(idx) > 0:
			
 
				+                    a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
			
 
				+                    t_box = gt_bbox[idx]
			
 
				+                    t_id = gt_ide[idx]
			
 
				+                    if len(t_box.shape) == 1:
			
 
				+                        t_box = t_box.reshape(1, 4)
			
 
				+
			
 
				+                    gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
			
 
				+                    gxy[:, 0] = gxy[:, 0] * nGw
			
 
				+                    gxy[:, 1] = gxy[:, 1] * nGh
			
 
				+                    gwh[:, 0] = gwh[:, 0] * nGw
			
 
				+                    gwh[:, 1] = gwh[:, 1] * nGh
			
 
				+
			
 
				+                    # XY coordinates
			
 
				+                    tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
			
 
				+                    # Width and height in yolo method
			
 
				+                    tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(
			
 
				+                        gwh / anchor_hw[a_i])
			
 
				+                    tconf[a_i, gj_i, gi_i] = 1
			
 
				+                    tid[a_i, gj_i, gi_i] = t_id
			
 
				+
			
 
				+                sample['tbox{}'.format(i)] = tbox
			
 
				+                sample['tconf{}'.format(i)] = tconf
			
 
				+                sample['tide{}'.format(i)] = tid
			
 
				+
			
 
				+
			
 
				+class Gt2FairMOTTarget(Gt2TTFTarget):
			
 
				+    __shared__ = ['num_classes']
			
 
				+    """
			
 
				+    Generate FairMOT targets by ground truth data.
			
 
				+    Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
			
 
				+        1. the gaussian kernal radius to generate a heatmap.
			
 
				+        2. the targets needed during traing.
			
 
				+
			
 
				+    Args:
			
 
				+        num_classes(int): the number of classes.
			
 
				+        down_ratio(int): the down ratio from images to heatmap, 4 by default.
			
 
				+        max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
			
 
				+        super(Gt2TTFTarget, self).__init__()
			
 
				+        self.down_ratio = down_ratio
			
 
				+        self.num_classes = num_classes
			
 
				+        self.max_objs = max_objs
			
 
				+
			
 
				+    def __call__(self, samples, context=None):
			
 
				+        for b_id, sample in enumerate(samples):
			
 
				+            output_h = sample['image'].shape[1] // self.down_ratio
			
 
				+            output_w = sample['image'].shape[2] // self.down_ratio
			
 
				+
			
 
				+            heatmap = np.zeros(
			
 
				+                (self.num_classes, output_h, output_w), dtype='float32')
			
 
				+            bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
			
 
				+            center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
			
 
				+            index = np.zeros((self.max_objs, ), dtype=np.int64)
			
 
				+            index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
			
 
				+            reid = np.zeros((self.max_objs, ), dtype=np.int64)
			
 
				+            bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
			
 
				+            if self.num_classes > 1:
			
 
				+                # each category corresponds to a set of track ids
			
 
				+                cls_tr_ids = np.zeros(
			
 
				+                    (self.num_classes, output_h, output_w), dtype=np.int64)
			
 
				+                cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64)
			
 
				+
			
 
				+            gt_bbox = sample['gt_bbox']
			
 
				+            gt_class = sample['gt_class']
			
 
				+            gt_ide = sample['gt_ide']
			
 
				+
			
 
				+            for k in range(len(gt_bbox)):
			
 
				+                cls_id = gt_class[k][0]
			
 
				+                bbox = gt_bbox[k]
			
 
				+                ide = gt_ide[k][0]
			
 
				+                bbox[[0, 2]] = bbox[[0, 2]] * output_w
			
 
				+                bbox[[1, 3]] = bbox[[1, 3]] * output_h
			
 
				+                bbox_amodal = copy.deepcopy(bbox)
			
 
				+                bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
			
 
				+                bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
			
 
				+                bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
			
 
				+                bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
			
 
				+                bbox[0] = np.clip(bbox[0], 0, output_w - 1)
			
 
				+                bbox[1] = np.clip(bbox[1], 0, output_h - 1)
			
 
				+                h = bbox[3]
			
 
				+                w = bbox[2]
			
 
				+
			
 
				+                bbox_xy = copy.deepcopy(bbox)
			
 
				+                bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
			
 
				+                bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
			
 
				+                bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
			
 
				+                bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
			
 
				+
			
 
				+                if h > 0 and w > 0:
			
 
				+                    radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
			
 
				+                    radius = max(0, int(radius))
			
 
				+                    ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
			
 
				+                    ct_int = ct.astype(np.int32)
			
 
				+                    self.draw_truncate_gaussian(heatmap[cls_id], ct_int,
			
 
				+                                                radius, radius)
			
 
				+                    bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
			
 
				+                            bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
			
 
				+
			
 
				+                    index[k] = ct_int[1] * output_w + ct_int[0]
			
 
				+                    center_offset[k] = ct - ct_int
			
 
				+                    index_mask[k] = 1
			
 
				+                    reid[k] = ide
			
 
				+                    bbox_xys[k] = bbox_xy
			
 
				+                    if self.num_classes > 1:
			
 
				+                        cls_id_map[ct_int[1], ct_int[0]] = cls_id
			
 
				+                        cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1
			
 
				+                        # track id start from 0
			
 
				+
			
 
				+            sample['heatmap'] = heatmap
			
 
				+            sample['index'] = index
			
 
				+            sample['offset'] = center_offset
			
 
				+            sample['size'] = bbox_size
			
 
				+            sample['index_mask'] = index_mask
			
 
				+            sample['reid'] = reid
			
 
				+            if self.num_classes > 1:
			
 
				+                sample['cls_id_map'] = cls_id_map
			
 
				+                sample['cls_tr_ids'] = cls_tr_ids
			
 
				+            sample['bbox_xys'] = bbox_xys
			
 
				+            sample.pop('is_crowd', None)
			
 
				+            sample.pop('difficult', None)
			
 
				+            sample.pop('gt_class', None)
			
 
				+            sample.pop('gt_bbox', None)
			
 
				+            sample.pop('gt_score', None)
			
 
				+            sample.pop('gt_ide', None)
			
 
				+        return samples
			
--- a/paddlers/models/ppdet/data/transform/op_helper.py
+++ b/paddlers/models/ppdet/data/transform/op_helper.py
@@ -0,0 +1,498 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+# this file contains helper methods for BBOX processing
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import random
			
 
				+import math
			
 
				+import cv2
			
 
				+
			
 
				+
			
 
				+def meet_emit_constraint(src_bbox, sample_bbox):
			
 
				+    center_x = (src_bbox[2] + src_bbox[0]) / 2
			
 
				+    center_y = (src_bbox[3] + src_bbox[1]) / 2
			
 
				+    if center_x >= sample_bbox[0] and \
			
 
				+            center_x <= sample_bbox[2] and \
			
 
				+            center_y >= sample_bbox[1] and \
			
 
				+            center_y <= sample_bbox[3]:
			
 
				+        return True
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def clip_bbox(src_bbox):
			
 
				+    src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
			
 
				+    src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
			
 
				+    src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
			
 
				+    src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
			
 
				+    return src_bbox
			
 
				+
			
 
				+
			
 
				+def bbox_area(src_bbox):
			
 
				+    if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
			
 
				+        return 0.
			
 
				+    else:
			
 
				+        width = src_bbox[2] - src_bbox[0]
			
 
				+        height = src_bbox[3] - src_bbox[1]
			
 
				+        return width * height
			
 
				+
			
 
				+
			
 
				+def is_overlap(object_bbox, sample_bbox):
			
 
				+    if object_bbox[0] >= sample_bbox[2] or \
			
 
				+       object_bbox[2] <= sample_bbox[0] or \
			
 
				+       object_bbox[1] >= sample_bbox[3] or \
			
 
				+       object_bbox[3] <= sample_bbox[1]:
			
 
				+        return False
			
 
				+    else:
			
 
				+        return True
			
 
				+
			
 
				+
			
 
				+def filter_and_process(sample_bbox,
			
 
				+                       bboxes,
			
 
				+                       labels,
			
 
				+                       scores=None,
			
 
				+                       keypoints=None):
			
 
				+    new_bboxes = []
			
 
				+    new_labels = []
			
 
				+    new_scores = []
			
 
				+    new_keypoints = []
			
 
				+    new_kp_ignore = []
			
 
				+    for i in range(len(bboxes)):
			
 
				+        new_bbox = [0, 0, 0, 0]
			
 
				+        obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
			
 
				+        if not meet_emit_constraint(obj_bbox, sample_bbox):
			
 
				+            continue
			
 
				+        if not is_overlap(obj_bbox, sample_bbox):
			
 
				+            continue
			
 
				+        sample_width = sample_bbox[2] - sample_bbox[0]
			
 
				+        sample_height = sample_bbox[3] - sample_bbox[1]
			
 
				+        new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
			
 
				+        new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
			
 
				+        new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
			
 
				+        new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
			
 
				+        new_bbox = clip_bbox(new_bbox)
			
 
				+        if bbox_area(new_bbox) > 0:
			
 
				+            new_bboxes.append(new_bbox)
			
 
				+            new_labels.append([labels[i][0]])
			
 
				+            if scores is not None:
			
 
				+                new_scores.append([scores[i][0]])
			
 
				+            if keypoints is not None:
			
 
				+                sample_keypoint = keypoints[0][i]
			
 
				+                for j in range(len(sample_keypoint)):
			
 
				+                    kp_len = sample_height if j % 2 else sample_width
			
 
				+                    sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
			
 
				+                    sample_keypoint[j] = (
			
 
				+                        sample_keypoint[j] - sample_coord) / kp_len
			
 
				+                    sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
			
 
				+                new_keypoints.append(sample_keypoint)
			
 
				+                new_kp_ignore.append(keypoints[1][i])
			
 
				+
			
 
				+    bboxes = np.array(new_bboxes)
			
 
				+    labels = np.array(new_labels)
			
 
				+    scores = np.array(new_scores)
			
 
				+    if keypoints is not None:
			
 
				+        keypoints = np.array(new_keypoints)
			
 
				+        new_kp_ignore = np.array(new_kp_ignore)
			
 
				+        return bboxes, labels, scores, (keypoints, new_kp_ignore)
			
 
				+    return bboxes, labels, scores
			
 
				+
			
 
				+
			
 
				+def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
			
 
				+    new_bboxes = []
			
 
				+    new_labels = []
			
 
				+    new_scores = []
			
 
				+    for i, bbox in enumerate(bboxes):
			
 
				+        w = float((bbox[2] - bbox[0]) * target_size)
			
 
				+        h = float((bbox[3] - bbox[1]) * target_size)
			
 
				+        if w * h < float(min_size * min_size):
			
 
				+            continue
			
 
				+        else:
			
 
				+            new_bboxes.append(bbox)
			
 
				+            new_labels.append(labels[i])
			
 
				+            if scores is not None and scores.size != 0:
			
 
				+                new_scores.append(scores[i])
			
 
				+    bboxes = np.array(new_bboxes)
			
 
				+    labels = np.array(new_labels)
			
 
				+    scores = np.array(new_scores)
			
 
				+    return bboxes, labels, scores
			
 
				+
			
 
				+
			
 
				+def generate_sample_bbox(sampler):
			
 
				+    scale = np.random.uniform(sampler[2], sampler[3])
			
 
				+    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
			
 
				+    aspect_ratio = max(aspect_ratio, (scale**2.0))
			
 
				+    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
			
 
				+    bbox_width = scale * (aspect_ratio**0.5)
			
 
				+    bbox_height = scale / (aspect_ratio**0.5)
			
 
				+    xmin_bound = 1 - bbox_width
			
 
				+    ymin_bound = 1 - bbox_height
			
 
				+    xmin = np.random.uniform(0, xmin_bound)
			
 
				+    ymin = np.random.uniform(0, ymin_bound)
			
 
				+    xmax = xmin + bbox_width
			
 
				+    ymax = ymin + bbox_height
			
 
				+    sampled_bbox = [xmin, ymin, xmax, ymax]
			
 
				+    return sampled_bbox
			
 
				+
			
 
				+
			
 
				+def generate_sample_bbox_square(sampler, image_width, image_height):
			
 
				+    scale = np.random.uniform(sampler[2], sampler[3])
			
 
				+    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
			
 
				+    aspect_ratio = max(aspect_ratio, (scale**2.0))
			
 
				+    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
			
 
				+    bbox_width = scale * (aspect_ratio**0.5)
			
 
				+    bbox_height = scale / (aspect_ratio**0.5)
			
 
				+    if image_height < image_width:
			
 
				+        bbox_width = bbox_height * image_height / image_width
			
 
				+    else:
			
 
				+        bbox_height = bbox_width * image_width / image_height
			
 
				+    xmin_bound = 1 - bbox_width
			
 
				+    ymin_bound = 1 - bbox_height
			
 
				+    xmin = np.random.uniform(0, xmin_bound)
			
 
				+    ymin = np.random.uniform(0, ymin_bound)
			
 
				+    xmax = xmin + bbox_width
			
 
				+    ymax = ymin + bbox_height
			
 
				+    sampled_bbox = [xmin, ymin, xmax, ymax]
			
 
				+    return sampled_bbox
			
 
				+
			
 
				+
			
 
				+def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
			
 
				+                         resize_width):
			
 
				+    num_gt = len(bbox_labels)
			
 
				+    # np.random.randint range: [low, high)
			
 
				+    rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
			
 
				+
			
 
				+    if num_gt != 0:
			
 
				+        norm_xmin = bbox_labels[rand_idx][0]
			
 
				+        norm_ymin = bbox_labels[rand_idx][1]
			
 
				+        norm_xmax = bbox_labels[rand_idx][2]
			
 
				+        norm_ymax = bbox_labels[rand_idx][3]
			
 
				+
			
 
				+        xmin = norm_xmin * image_width
			
 
				+        ymin = norm_ymin * image_height
			
 
				+        wid = image_width * (norm_xmax - norm_xmin)
			
 
				+        hei = image_height * (norm_ymax - norm_ymin)
			
 
				+        range_size = 0
			
 
				+
			
 
				+        area = wid * hei
			
 
				+        for scale_ind in range(0, len(scale_array) - 1):
			
 
				+            if area > scale_array[scale_ind] ** 2 and area < \
			
 
				+                    scale_array[scale_ind + 1] ** 2:
			
 
				+                range_size = scale_ind + 1
			
 
				+                break
			
 
				+
			
 
				+        if area > scale_array[len(scale_array) - 2]**2:
			
 
				+            range_size = len(scale_array) - 2
			
 
				+
			
 
				+        scale_choose = 0.0
			
 
				+        if range_size == 0:
			
 
				+            rand_idx_size = 0
			
 
				+        else:
			
 
				+            # np.random.randint range: [low, high)
			
 
				+            rng_rand_size = np.random.randint(0, range_size + 1)
			
 
				+            rand_idx_size = rng_rand_size % (range_size + 1)
			
 
				+
			
 
				+        if rand_idx_size == range_size:
			
 
				+            min_resize_val = scale_array[rand_idx_size] / 2.0
			
 
				+            max_resize_val = min(2.0 * scale_array[rand_idx_size],
			
 
				+                                 2 * math.sqrt(wid * hei))
			
 
				+            scale_choose = random.uniform(min_resize_val, max_resize_val)
			
 
				+        else:
			
 
				+            min_resize_val = scale_array[rand_idx_size] / 2.0
			
 
				+            max_resize_val = 2.0 * scale_array[rand_idx_size]
			
 
				+            scale_choose = random.uniform(min_resize_val, max_resize_val)
			
 
				+
			
 
				+        sample_bbox_size = wid * resize_width / scale_choose
			
 
				+
			
 
				+        w_off_orig = 0.0
			
 
				+        h_off_orig = 0.0
			
 
				+        if sample_bbox_size < max(image_height, image_width):
			
 
				+            if wid <= sample_bbox_size:
			
 
				+                w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
			
 
				+                                               xmin)
			
 
				+            else:
			
 
				+                w_off_orig = np.random.uniform(xmin,
			
 
				+                                               xmin + wid - sample_bbox_size)
			
 
				+
			
 
				+            if hei <= sample_bbox_size:
			
 
				+                h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
			
 
				+                                               ymin)
			
 
				+            else:
			
 
				+                h_off_orig = np.random.uniform(ymin,
			
 
				+                                               ymin + hei - sample_bbox_size)
			
 
				+
			
 
				+        else:
			
 
				+            w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
			
 
				+            h_off_orig = np.random.uniform(image_height - sample_bbox_size,
			
 
				+                                           0.0)
			
 
				+
			
 
				+        w_off_orig = math.floor(w_off_orig)
			
 
				+        h_off_orig = math.floor(h_off_orig)
			
 
				+
			
 
				+        # Figure out top left coordinates.
			
 
				+        w_off = float(w_off_orig / image_width)
			
 
				+        h_off = float(h_off_orig / image_height)
			
 
				+
			
 
				+        sampled_bbox = [
			
 
				+            w_off, h_off, w_off + float(sample_bbox_size / image_width),
			
 
				+            h_off + float(sample_bbox_size / image_height)
			
 
				+        ]
			
 
				+        return sampled_bbox
			
 
				+    else:
			
 
				+        return 0
			
 
				+
			
 
				+
			
 
				+def jaccard_overlap(sample_bbox, object_bbox):
			
 
				+    if sample_bbox[0] >= object_bbox[2] or \
			
 
				+        sample_bbox[2] <= object_bbox[0] or \
			
 
				+        sample_bbox[1] >= object_bbox[3] or \
			
 
				+        sample_bbox[3] <= object_bbox[1]:
			
 
				+        return 0
			
 
				+    intersect_xmin = max(sample_bbox[0], object_bbox[0])
			
 
				+    intersect_ymin = max(sample_bbox[1], object_bbox[1])
			
 
				+    intersect_xmax = min(sample_bbox[2], object_bbox[2])
			
 
				+    intersect_ymax = min(sample_bbox[3], object_bbox[3])
			
 
				+    intersect_size = (intersect_xmax - intersect_xmin) * (
			
 
				+        intersect_ymax - intersect_ymin)
			
 
				+    sample_bbox_size = bbox_area(sample_bbox)
			
 
				+    object_bbox_size = bbox_area(object_bbox)
			
 
				+    overlap = intersect_size / (
			
 
				+        sample_bbox_size + object_bbox_size - intersect_size)
			
 
				+    return overlap
			
 
				+
			
 
				+
			
 
				+def intersect_bbox(bbox1, bbox2):
			
 
				+    if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
			
 
				+        bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
			
 
				+        intersection_box = [0.0, 0.0, 0.0, 0.0]
			
 
				+    else:
			
 
				+        intersection_box = [
			
 
				+            max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
			
 
				+            min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
			
 
				+        ]
			
 
				+    return intersection_box
			
 
				+
			
 
				+
			
 
				+def bbox_coverage(bbox1, bbox2):
			
 
				+    inter_box = intersect_bbox(bbox1, bbox2)
			
 
				+    intersect_size = bbox_area(inter_box)
			
 
				+
			
 
				+    if intersect_size > 0:
			
 
				+        bbox1_size = bbox_area(bbox1)
			
 
				+        return intersect_size / bbox1_size
			
 
				+    else:
			
 
				+        return 0.
			
 
				+
			
 
				+
			
 
				+def satisfy_sample_constraint(sampler,
			
 
				+                              sample_bbox,
			
 
				+                              gt_bboxes,
			
 
				+                              satisfy_all=False):
			
 
				+    if sampler[6] == 0 and sampler[7] == 0:
			
 
				+        return True
			
 
				+    satisfied = []
			
 
				+    for i in range(len(gt_bboxes)):
			
 
				+        object_bbox = [
			
 
				+            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
			
 
				+        ]
			
 
				+        overlap = jaccard_overlap(sample_bbox, object_bbox)
			
 
				+        if sampler[6] != 0 and \
			
 
				+                overlap < sampler[6]:
			
 
				+            satisfied.append(False)
			
 
				+            continue
			
 
				+        if sampler[7] != 0 and \
			
 
				+                overlap > sampler[7]:
			
 
				+            satisfied.append(False)
			
 
				+            continue
			
 
				+        satisfied.append(True)
			
 
				+        if not satisfy_all:
			
 
				+            return True
			
 
				+
			
 
				+    if satisfy_all:
			
 
				+        return np.all(satisfied)
			
 
				+    else:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
			
 
				+    if sampler[6] == 0 and sampler[7] == 0:
			
 
				+        has_jaccard_overlap = False
			
 
				+    else:
			
 
				+        has_jaccard_overlap = True
			
 
				+    if sampler[8] == 0 and sampler[9] == 0:
			
 
				+        has_object_coverage = False
			
 
				+    else:
			
 
				+        has_object_coverage = True
			
 
				+
			
 
				+    if not has_jaccard_overlap and not has_object_coverage:
			
 
				+        return True
			
 
				+    found = False
			
 
				+    for i in range(len(gt_bboxes)):
			
 
				+        object_bbox = [
			
 
				+            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
			
 
				+        ]
			
 
				+        if has_jaccard_overlap:
			
 
				+            overlap = jaccard_overlap(sample_bbox, object_bbox)
			
 
				+            if sampler[6] != 0 and \
			
 
				+                    overlap < sampler[6]:
			
 
				+                continue
			
 
				+            if sampler[7] != 0 and \
			
 
				+                    overlap > sampler[7]:
			
 
				+                continue
			
 
				+            found = True
			
 
				+        if has_object_coverage:
			
 
				+            object_coverage = bbox_coverage(object_bbox, sample_bbox)
			
 
				+            if sampler[8] != 0 and \
			
 
				+                    object_coverage < sampler[8]:
			
 
				+                continue
			
 
				+            if sampler[9] != 0 and \
			
 
				+                    object_coverage > sampler[9]:
			
 
				+                continue
			
 
				+            found = True
			
 
				+        if found:
			
 
				+            return True
			
 
				+    return found
			
 
				+
			
 
				+
			
 
				+def crop_image_sampling(img, sample_bbox, image_width, image_height,
			
 
				+                        target_size):
			
 
				+    # no clipping here
			
 
				+    xmin = int(sample_bbox[0] * image_width)
			
 
				+    xmax = int(sample_bbox[2] * image_width)
			
 
				+    ymin = int(sample_bbox[1] * image_height)
			
 
				+    ymax = int(sample_bbox[3] * image_height)
			
 
				+
			
 
				+    w_off = xmin
			
 
				+    h_off = ymin
			
 
				+    width = xmax - xmin
			
 
				+    height = ymax - ymin
			
 
				+    cross_xmin = max(0.0, float(w_off))
			
 
				+    cross_ymin = max(0.0, float(h_off))
			
 
				+    cross_xmax = min(float(w_off + width - 1.0), float(image_width))
			
 
				+    cross_ymax = min(float(h_off + height - 1.0), float(image_height))
			
 
				+    cross_width = cross_xmax - cross_xmin
			
 
				+    cross_height = cross_ymax - cross_ymin
			
 
				+
			
 
				+    roi_xmin = 0 if w_off >= 0 else abs(w_off)
			
 
				+    roi_ymin = 0 if h_off >= 0 else abs(h_off)
			
 
				+    roi_width = cross_width
			
 
				+    roi_height = cross_height
			
 
				+
			
 
				+    roi_y1 = int(roi_ymin)
			
 
				+    roi_y2 = int(roi_ymin + roi_height)
			
 
				+    roi_x1 = int(roi_xmin)
			
 
				+    roi_x2 = int(roi_xmin + roi_width)
			
 
				+
			
 
				+    cross_y1 = int(cross_ymin)
			
 
				+    cross_y2 = int(cross_ymin + cross_height)
			
 
				+    cross_x1 = int(cross_xmin)
			
 
				+    cross_x2 = int(cross_xmin + cross_width)
			
 
				+
			
 
				+    sample_img = np.zeros((height, width, 3))
			
 
				+    sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
			
 
				+        img[cross_y1: cross_y2, cross_x1: cross_x2]
			
 
				+
			
 
				+    sample_img = cv2.resize(
			
 
				+        sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
			
 
				+
			
 
				+    return sample_img
			
 
				+
			
 
				+
			
 
				+def is_poly(segm):
			
 
				+    assert isinstance(segm, (list, dict)), \
			
 
				+        "Invalid segm type: {}".format(type(segm))
			
 
				+    return isinstance(segm, list)
			
 
				+
			
 
				+
			
 
				+def gaussian_radius(bbox_size, min_overlap):
			
 
				+    height, width = bbox_size
			
 
				+
			
 
				+    a1 = 1
			
 
				+    b1 = (height + width)
			
 
				+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
			
 
				+    sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
			
 
				+    radius1 = (b1 + sq1) / (2 * a1)
			
 
				+
			
 
				+    a2 = 4
			
 
				+    b2 = 2 * (height + width)
			
 
				+    c2 = (1 - min_overlap) * width * height
			
 
				+    sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
			
 
				+    radius2 = (b2 + sq2) / 2
			
 
				+
			
 
				+    a3 = 4 * min_overlap
			
 
				+    b3 = -2 * min_overlap * (height + width)
			
 
				+    c3 = (min_overlap - 1) * width * height
			
 
				+    sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
			
 
				+    radius3 = (b3 + sq3) / 2
			
 
				+    return min(radius1, radius2, radius3)
			
 
				+
			
 
				+
			
 
				+def draw_gaussian(heatmap, center, radius, k=1, delte=6):
			
 
				+    diameter = 2 * radius + 1
			
 
				+    sigma = diameter / delte
			
 
				+    gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
			
 
				+
			
 
				+    x, y = center
			
 
				+
			
 
				+    height, width = heatmap.shape[0:2]
			
 
				+
			
 
				+    left, right = min(x, radius), min(width - x, radius + 1)
			
 
				+    top, bottom = min(y, radius), min(height - y, radius + 1)
			
 
				+
			
 
				+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
			
 
				+    masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
			
 
				+                               radius + right]
			
 
				+    np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
			
 
				+
			
 
				+
			
 
				+def gaussian2D(shape, sigma_x=1, sigma_y=1):
			
 
				+    m, n = [(ss - 1.) / 2. for ss in shape]
			
 
				+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
			
 
				+
			
 
				+    h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
			
 
				+                                                            sigma_y)))
			
 
				+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
			
 
				+    return h
			
 
				+
			
 
				+
			
 
				+def draw_umich_gaussian(heatmap, center, radius, k=1):
			
 
				+    """
			
 
				+    draw_umich_gaussian, refer to https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py#L126
			
 
				+    """
			
 
				+    diameter = 2 * radius + 1
			
 
				+    gaussian = gaussian2D(
			
 
				+        (diameter, diameter), sigma_x=diameter / 6, sigma_y=diameter / 6)
			
 
				+
			
 
				+    x, y = int(center[0]), int(center[1])
			
 
				+
			
 
				+    height, width = heatmap.shape[0:2]
			
 
				+
			
 
				+    left, right = min(x, radius), min(width - x, radius + 1)
			
 
				+    top, bottom = min(y, radius), min(height - y, radius + 1)
			
 
				+
			
 
				+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
			
 
				+    masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
			
 
				+                               radius + right]
			
 
				+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
			
 
				+        np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
			
 
				+    return heatmap
			
 
				+
			
 
				+
			
 
				+def get_border(border, size):
			
 
				+    i = 1
			
 
				+    while size - border // i <= border // i:
			
 
				+        i *= 2
			
 
				+    return border // i
			
--- a/paddlers/models/ppdet/data/transform/operators.py
+++ b/paddlers/models/ppdet/data/transform/operators.py
@@ -0,0 +1,3025 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# function:
			
 
				+#    operators to process sample,
			
 
				+#    eg: decode/resize/crop image
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import print_function
			
 
				+from __future__ import division
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except Exception:
			
 
				+    from collections import Sequence
			
 
				+
			
 
				+from numbers import Number, Integral
			
 
				+
			
 
				+import uuid
			
 
				+import random
			
 
				+import math
			
 
				+import numpy as np
			
 
				+import os
			
 
				+import copy
			
 
				+import logging
			
 
				+import cv2
			
 
				+from PIL import Image, ImageDraw
			
 
				+import pickle
			
 
				+import threading
			
 
				+MUTEX = threading.Lock()
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import serializable
			
 
				+from paddlers.models.ppdet.modeling import bbox_utils
			
 
				+from ..reader import Compose
			
 
				+
			
 
				+from .op_helper import (satisfy_sample_constraint, filter_and_process,
			
 
				+                        generate_sample_bbox, clip_bbox, data_anchor_sampling,
			
 
				+                        satisfy_sample_constraint_coverage,
			
 
				+                        crop_image_sampling, generate_sample_bbox_square,
			
 
				+                        bbox_area_sampling, is_poly, get_border)
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+from paddlers.models.ppdet.modeling.keypoint_utils import get_affine_transform, affine_transform
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+registered_ops = []
			
 
				+
			
 
				+
			
 
				+def register_op(cls):
			
 
				+    registered_ops.append(cls.__name__)
			
 
				+    if not hasattr(BaseOperator, cls.__name__):
			
 
				+        setattr(BaseOperator, cls.__name__, cls)
			
 
				+    else:
			
 
				+        raise KeyError("The {} class has been registered.".format(
			
 
				+            cls.__name__))
			
 
				+    return serializable(cls)
			
 
				+
			
 
				+
			
 
				+class BboxError(ValueError):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+class ImageError(ValueError):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+class BaseOperator(object):
			
 
				+    def __init__(self, name=None):
			
 
				+        if name is None:
			
 
				+            name = self.__class__.__name__
			
 
				+        self._id = name + '_' + str(uuid.uuid4())[-6:]
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """ Process a sample.
			
 
				+        Args:
			
 
				+            sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
			
 
				+            context (dict): info about this sample processing
			
 
				+        Returns:
			
 
				+            result (dict): a processed sample
			
 
				+        """
			
 
				+        return sample
			
 
				+
			
 
				+    def __call__(self, sample, context=None):
			
 
				+        """ Process a sample.
			
 
				+        Args:
			
 
				+            sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
			
 
				+            context (dict): info about this sample processing
			
 
				+        Returns:
			
 
				+            result (dict): a processed sample
			
 
				+        """
			
 
				+        if isinstance(sample, Sequence):
			
 
				+            for i in range(len(sample)):
			
 
				+                sample[i] = self.apply(sample[i], context)
			
 
				+        else:
			
 
				+            sample = self.apply(sample, context)
			
 
				+        return sample
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return str(self._id)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Decode(BaseOperator):
			
 
				+    def __init__(self):
			
 
				+        """ Transform the image data to numpy format following the rgb format
			
 
				+        """
			
 
				+        super(Decode, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """ load image if 'im_file' field is not empty but 'image' is"""
			
 
				+        if 'image' not in sample:
			
 
				+            with open(sample['im_file'], 'rb') as f:
			
 
				+                sample['image'] = f.read()
			
 
				+            sample.pop('im_file')
			
 
				+
			
 
				+        im = sample['image']
			
 
				+        data = np.frombuffer(im, dtype='uint8')
			
 
				+        im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
			
 
				+        if 'keep_ori_im' in sample and sample['keep_ori_im']:
			
 
				+            sample['ori_image'] = im
			
 
				+        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
			
 
				+
			
 
				+        sample['image'] = im
			
 
				+        if 'h' not in sample:
			
 
				+            sample['h'] = im.shape[0]
			
 
				+        elif sample['h'] != im.shape[0]:
			
 
				+            logger.warning(
			
 
				+                "The actual image height: {} is not equal to the "
			
 
				+                "height: {} in annotation, and update sample['h'] by actual "
			
 
				+                "image height.".format(im.shape[0], sample['h']))
			
 
				+            sample['h'] = im.shape[0]
			
 
				+        if 'w' not in sample:
			
 
				+            sample['w'] = im.shape[1]
			
 
				+        elif sample['w'] != im.shape[1]:
			
 
				+            logger.warning(
			
 
				+                "The actual image width: {} is not equal to the "
			
 
				+                "width: {} in annotation, and update sample['w'] by actual "
			
 
				+                "image width.".format(im.shape[1], sample['w']))
			
 
				+            sample['w'] = im.shape[1]
			
 
				+
			
 
				+        sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
			
 
				+        sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+def _make_dirs(dirname):
			
 
				+    try:
			
 
				+        from pathlib import Path
			
 
				+    except ImportError:
			
 
				+        from pathlib2 import Path
			
 
				+    Path(dirname).mkdir(exist_ok=True)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class DecodeCache(BaseOperator):
			
 
				+    def __init__(self, cache_root=None):
			
 
				+        '''decode image and caching
			
 
				+        '''
			
 
				+        super(DecodeCache, self).__init__()
			
 
				+
			
 
				+        self.use_cache = False if cache_root is None else True
			
 
				+        self.cache_root = cache_root
			
 
				+
			
 
				+        if cache_root is not None:
			
 
				+            _make_dirs(cache_root)
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+
			
 
				+        if self.use_cache and os.path.exists(
			
 
				+                self.cache_path(self.cache_root, sample['im_file'])):
			
 
				+            path = self.cache_path(self.cache_root, sample['im_file'])
			
 
				+            im = self.load(path)
			
 
				+
			
 
				+        else:
			
 
				+            if 'image' not in sample:
			
 
				+                with open(sample['im_file'], 'rb') as f:
			
 
				+                    sample['image'] = f.read()
			
 
				+
			
 
				+            im = sample['image']
			
 
				+            data = np.frombuffer(im, dtype='uint8')
			
 
				+            im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
			
 
				+            if 'keep_ori_im' in sample and sample['keep_ori_im']:
			
 
				+                sample['ori_image'] = im
			
 
				+            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
			
 
				+
			
 
				+            if self.use_cache and not os.path.exists(
			
 
				+                    self.cache_path(self.cache_root, sample['im_file'])):
			
 
				+                path = self.cache_path(self.cache_root, sample['im_file'])
			
 
				+                self.dump(im, path)
			
 
				+
			
 
				+        sample['image'] = im
			
 
				+        sample['h'] = im.shape[0]
			
 
				+        sample['w'] = im.shape[1]
			
 
				+
			
 
				+        sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
			
 
				+        sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
			
 
				+
			
 
				+        sample.pop('im_file')
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def cache_path(dir_oot, im_file):
			
 
				+        return os.path.join(dir_oot, os.path.basename(im_file) + '.pkl')
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def load(path):
			
 
				+        with open(path, 'rb') as f:
			
 
				+            im = pickle.load(f)
			
 
				+        return im
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def dump(obj, path):
			
 
				+        MUTEX.acquire()
			
 
				+        try:
			
 
				+            with open(path, 'wb') as f:
			
 
				+                pickle.dump(obj, f)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.warning('dump {} occurs exception {}'.format(path, str(e)))
			
 
				+
			
 
				+        finally:
			
 
				+            MUTEX.release()
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class SniperDecodeCrop(BaseOperator):
			
 
				+    def __init__(self):
			
 
				+        super(SniperDecodeCrop, self).__init__()
			
 
				+
			
 
				+    def __call__(self, sample, context=None):
			
 
				+        if 'image' not in sample:
			
 
				+            with open(sample['im_file'], 'rb') as f:
			
 
				+                sample['image'] = f.read()
			
 
				+            sample.pop('im_file')
			
 
				+
			
 
				+        im = sample['image']
			
 
				+        data = np.frombuffer(im, dtype='uint8')
			
 
				+        im = cv2.imdecode(data,
			
 
				+                          cv2.IMREAD_COLOR)  # BGR mode, but need RGB mode
			
 
				+        if 'keep_ori_im' in sample and sample['keep_ori_im']:
			
 
				+            sample['ori_image'] = im
			
 
				+        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
			
 
				+
			
 
				+        chip = sample['chip']
			
 
				+        x1, y1, x2, y2 = [int(xi) for xi in chip]
			
 
				+        im = im[max(y1, 0):min(y2, im.shape[0]), max(x1, 0):min(x2, im.shape[
			
 
				+            1]), :]
			
 
				+
			
 
				+        sample['image'] = im
			
 
				+        h = im.shape[0]
			
 
				+        w = im.shape[1]
			
 
				+        # sample['im_info'] = [h, w, 1.0]
			
 
				+        sample['h'] = h
			
 
				+        sample['w'] = w
			
 
				+
			
 
				+        sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
			
 
				+        sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Permute(BaseOperator):
			
 
				+    def __init__(self):
			
 
				+        """
			
 
				+        Change the channel to be (C, H, W)
			
 
				+        """
			
 
				+        super(Permute, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        im = sample['image']
			
 
				+        im = im.transpose((2, 0, 1))
			
 
				+        sample['image'] = im
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Lighting(BaseOperator):
			
 
				+    """
			
 
				+    Lighting the image by eigenvalues and eigenvectors
			
 
				+    Args:
			
 
				+        eigval (list): eigenvalues
			
 
				+        eigvec (list): eigenvectors
			
 
				+        alphastd (float): random weight of lighting, 0.1 by default
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, eigval, eigvec, alphastd=0.1):
			
 
				+        super(Lighting, self).__init__()
			
 
				+        self.alphastd = alphastd
			
 
				+        self.eigval = np.array(eigval).astype('float32')
			
 
				+        self.eigvec = np.array(eigvec).astype('float32')
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        alpha = np.random.normal(scale=self.alphastd, size=(3, ))
			
 
				+        sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomErasingImage(BaseOperator):
			
 
				+    def __init__(self, prob=0.5, lower=0.02, higher=0.4, aspect_ratio=0.3):
			
 
				+        """
			
 
				+        Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
			
 
				+        Args:
			
 
				+            prob (float): probability to carry out random erasing
			
 
				+            lower (float): lower limit of the erasing area ratio
			
 
				+            higher (float): upper limit of the erasing area ratio
			
 
				+            aspect_ratio (float): aspect ratio of the erasing region
			
 
				+        """
			
 
				+        super(RandomErasingImage, self).__init__()
			
 
				+        self.prob = prob
			
 
				+        self.lower = lower
			
 
				+        self.higher = higher
			
 
				+        self.aspect_ratio = aspect_ratio
			
 
				+
			
 
				+    def apply(self, sample):
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+        im = sample['image']
			
 
				+        if not isinstance(im, np.ndarray):
			
 
				+            raise TypeError("{}: image is not a numpy array.".format(self))
			
 
				+        if len(im.shape) != 3:
			
 
				+            raise ImageError("{}: image is not 3-dimensional.".format(self))
			
 
				+
			
 
				+        for idx in range(gt_bbox.shape[0]):
			
 
				+            if self.prob <= np.random.rand():
			
 
				+                continue
			
 
				+
			
 
				+            x1, y1, x2, y2 = gt_bbox[idx, :]
			
 
				+            w_bbox = x2 - x1
			
 
				+            h_bbox = y2 - y1
			
 
				+            area = w_bbox * h_bbox
			
 
				+
			
 
				+            target_area = random.uniform(self.lower, self.higher) * area
			
 
				+            aspect_ratio = random.uniform(self.aspect_ratio,
			
 
				+                                          1 / self.aspect_ratio)
			
 
				+
			
 
				+            h = int(round(math.sqrt(target_area * aspect_ratio)))
			
 
				+            w = int(round(math.sqrt(target_area / aspect_ratio)))
			
 
				+
			
 
				+            if w < w_bbox and h < h_bbox:
			
 
				+                off_y1 = random.randint(0, int(h_bbox - h))
			
 
				+                off_x1 = random.randint(0, int(w_bbox - w))
			
 
				+                im[int(y1 + off_y1):int(y1 + off_y1 + h), int(x1 + off_x1):int(
			
 
				+                    x1 + off_x1 + w), :] = 0
			
 
				+        sample['image'] = im
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class NormalizeImage(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 mean=[0.485, 0.456, 0.406],
			
 
				+                 std=[1, 1, 1],
			
 
				+                 is_scale=True):
			
 
				+        """
			
 
				+        Args:
			
 
				+            mean (list): the pixel mean
			
 
				+            std (list): the pixel variance
			
 
				+        """
			
 
				+        super(NormalizeImage, self).__init__()
			
 
				+        self.mean = mean
			
 
				+        self.std = std
			
 
				+        self.is_scale = is_scale
			
 
				+        if not (isinstance(self.mean, list) and isinstance(self.std, list) and
			
 
				+                isinstance(self.is_scale, bool)):
			
 
				+            raise TypeError("{}: input type is invalid.".format(self))
			
 
				+        from functools import reduce
			
 
				+        if reduce(lambda x, y: x * y, self.std) == 0:
			
 
				+            raise ValueError('{}: std is invalid!'.format(self))
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """Normalize the image.
			
 
				+        Operators:
			
 
				+            1.(optional) Scale the image to [0,1]
			
 
				+            2. Each pixel minus mean and is divided by std
			
 
				+        """
			
 
				+        im = sample['image']
			
 
				+        im = im.astype(np.float32, copy=False)
			
 
				+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
			
 
				+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
			
 
				+
			
 
				+        if self.is_scale:
			
 
				+            im = im / 255.0
			
 
				+
			
 
				+        im -= mean
			
 
				+        im /= std
			
 
				+
			
 
				+        sample['image'] = im
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class GridMask(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 use_h=True,
			
 
				+                 use_w=True,
			
 
				+                 rotate=1,
			
 
				+                 offset=False,
			
 
				+                 ratio=0.5,
			
 
				+                 mode=1,
			
 
				+                 prob=0.7,
			
 
				+                 upper_iter=360000):
			
 
				+        """
			
 
				+        GridMask Data Augmentation, see https://arxiv.org/abs/2001.04086
			
 
				+        Args:
			
 
				+            use_h (bool): whether to mask vertically
			
 
				+            use_w (boo;): whether to mask horizontally
			
 
				+            rotate (float): angle for the mask to rotate
			
 
				+            offset (float): mask offset
			
 
				+            ratio (float): mask ratio
			
 
				+            mode (int): gridmask mode
			
 
				+            prob (float): max probability to carry out gridmask
			
 
				+            upper_iter (int): suggested to be equal to global max_iter
			
 
				+        """
			
 
				+        super(GridMask, self).__init__()
			
 
				+        self.use_h = use_h
			
 
				+        self.use_w = use_w
			
 
				+        self.rotate = rotate
			
 
				+        self.offset = offset
			
 
				+        self.ratio = ratio
			
 
				+        self.mode = mode
			
 
				+        self.prob = prob
			
 
				+        self.upper_iter = upper_iter
			
 
				+
			
 
				+        from .gridmask_utils import Gridmask
			
 
				+        self.gridmask_op = Gridmask(
			
 
				+            use_h,
			
 
				+            use_w,
			
 
				+            rotate=rotate,
			
 
				+            offset=offset,
			
 
				+            ratio=ratio,
			
 
				+            mode=mode,
			
 
				+            prob=prob,
			
 
				+            upper_iter=upper_iter)
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        sample['image'] = self.gridmask_op(sample['image'],
			
 
				+                                           sample['curr_iter'])
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomDistort(BaseOperator):
			
 
				+    """Random color distortion.
			
 
				+    Args:
			
 
				+        hue (list): hue settings. in [lower, upper, probability] format.
			
 
				+        saturation (list): saturation settings. in [lower, upper, probability] format.
			
 
				+        contrast (list): contrast settings. in [lower, upper, probability] format.
			
 
				+        brightness (list): brightness settings. in [lower, upper, probability] format.
			
 
				+        random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
			
 
				+            order.
			
 
				+        count (int): the number of doing distrot
			
 
				+        random_channel (bool): whether to swap channels randomly
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 hue=[-18, 18, 0.5],
			
 
				+                 saturation=[0.5, 1.5, 0.5],
			
 
				+                 contrast=[0.5, 1.5, 0.5],
			
 
				+                 brightness=[0.5, 1.5, 0.5],
			
 
				+                 random_apply=True,
			
 
				+                 count=4,
			
 
				+                 random_channel=False):
			
 
				+        super(RandomDistort, self).__init__()
			
 
				+        self.hue = hue
			
 
				+        self.saturation = saturation
			
 
				+        self.contrast = contrast
			
 
				+        self.brightness = brightness
			
 
				+        self.random_apply = random_apply
			
 
				+        self.count = count
			
 
				+        self.random_channel = random_channel
			
 
				+
			
 
				+    def apply_hue(self, img):
			
 
				+        low, high, prob = self.hue
			
 
				+        if np.random.uniform(0., 1.) < prob:
			
 
				+            return img
			
 
				+
			
 
				+        img = img.astype(np.float32)
			
 
				+        # it works, but result differ from HSV version
			
 
				+        delta = np.random.uniform(low, high)
			
 
				+        u = np.cos(delta * np.pi)
			
 
				+        w = np.sin(delta * np.pi)
			
 
				+        bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
			
 
				+        tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
			
 
				+                         [0.211, -0.523, 0.311]])
			
 
				+        ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
			
 
				+                          [1.0, -1.107, 1.705]])
			
 
				+        t = np.dot(np.dot(ityiq, bt), tyiq).T
			
 
				+        img = np.dot(img, t)
			
 
				+        return img
			
 
				+
			
 
				+    def apply_saturation(self, img):
			
 
				+        low, high, prob = self.saturation
			
 
				+        if np.random.uniform(0., 1.) < prob:
			
 
				+            return img
			
 
				+        delta = np.random.uniform(low, high)
			
 
				+        img = img.astype(np.float32)
			
 
				+        # it works, but result differ from HSV version
			
 
				+        gray = img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
			
 
				+        gray = gray.sum(axis=2, keepdims=True)
			
 
				+        gray *= (1.0 - delta)
			
 
				+        img *= delta
			
 
				+        img += gray
			
 
				+        return img
			
 
				+
			
 
				+    def apply_contrast(self, img):
			
 
				+        low, high, prob = self.contrast
			
 
				+        if np.random.uniform(0., 1.) < prob:
			
 
				+            return img
			
 
				+        delta = np.random.uniform(low, high)
			
 
				+        img = img.astype(np.float32)
			
 
				+        img *= delta
			
 
				+        return img
			
 
				+
			
 
				+    def apply_brightness(self, img):
			
 
				+        low, high, prob = self.brightness
			
 
				+        if np.random.uniform(0., 1.) < prob:
			
 
				+            return img
			
 
				+        delta = np.random.uniform(low, high)
			
 
				+        img = img.astype(np.float32)
			
 
				+        img += delta
			
 
				+        return img
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        img = sample['image']
			
 
				+        if self.random_apply:
			
 
				+            functions = [
			
 
				+                self.apply_brightness, self.apply_contrast,
			
 
				+                self.apply_saturation, self.apply_hue
			
 
				+            ]
			
 
				+            distortions = np.random.permutation(functions)[:self.count]
			
 
				+            for func in distortions:
			
 
				+                img = func(img)
			
 
				+            sample['image'] = img
			
 
				+            return sample
			
 
				+
			
 
				+        img = self.apply_brightness(img)
			
 
				+        mode = np.random.randint(0, 2)
			
 
				+
			
 
				+        if mode:
			
 
				+            img = self.apply_contrast(img)
			
 
				+
			
 
				+        img = self.apply_saturation(img)
			
 
				+        img = self.apply_hue(img)
			
 
				+
			
 
				+        if not mode:
			
 
				+            img = self.apply_contrast(img)
			
 
				+
			
 
				+        if self.random_channel:
			
 
				+            if np.random.randint(0, 2):
			
 
				+                img = img[..., np.random.permutation(3)]
			
 
				+        sample['image'] = img
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class AutoAugment(BaseOperator):
			
 
				+    def __init__(self, autoaug_type="v1"):
			
 
				+        """
			
 
				+        Args:
			
 
				+            autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
			
 
				+        """
			
 
				+        super(AutoAugment, self).__init__()
			
 
				+        self.autoaug_type = autoaug_type
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """
			
 
				+        Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
			
 
				+        """
			
 
				+        im = sample['image']
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+        if not isinstance(im, np.ndarray):
			
 
				+            raise TypeError("{}: image is not a numpy array.".format(self))
			
 
				+        if len(im.shape) != 3:
			
 
				+            raise ImageError("{}: image is not 3-dimensional.".format(self))
			
 
				+        if len(gt_bbox) == 0:
			
 
				+            return sample
			
 
				+
			
 
				+        height, width, _ = im.shape
			
 
				+        norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
			
 
				+        norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
			
 
				+        norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
			
 
				+        norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
			
 
				+        norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
			
 
				+
			
 
				+        from .autoaugment_utils import distort_image_with_autoaugment
			
 
				+        im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
			
 
				+                                                          self.autoaug_type)
			
 
				+
			
 
				+        gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
			
 
				+        gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
			
 
				+        gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
			
 
				+        gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
			
 
				+
			
 
				+        sample['image'] = im
			
 
				+        sample['gt_bbox'] = gt_bbox
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomFlip(BaseOperator):
			
 
				+    def __init__(self, prob=0.5):
			
 
				+        """
			
 
				+        Args:
			
 
				+            prob (float): the probability of flipping image
			
 
				+        """
			
 
				+        super(RandomFlip, self).__init__()
			
 
				+        self.prob = prob
			
 
				+        if not (isinstance(self.prob, float)):
			
 
				+            raise TypeError("{}: input type is invalid.".format(self))
			
 
				+
			
 
				+    def apply_segm(self, segms, height, width):
			
 
				+        def _flip_poly(poly, width):
			
 
				+            flipped_poly = np.array(poly)
			
 
				+            flipped_poly[0::2] = width - np.array(poly[0::2])
			
 
				+            return flipped_poly.tolist()
			
 
				+
			
 
				+        def _flip_rle(rle, height, width):
			
 
				+            if 'counts' in rle and type(rle['counts']) == list:
			
 
				+                rle = mask_util.frPyObjects(rle, height, width)
			
 
				+            mask = mask_util.decode(rle)
			
 
				+            mask = mask[:, ::-1]
			
 
				+            rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
			
 
				+            return rle
			
 
				+
			
 
				+        flipped_segms = []
			
 
				+        for segm in segms:
			
 
				+            if is_poly(segm):
			
 
				+                # Polygon format
			
 
				+                flipped_segms.append(
			
 
				+                    [_flip_poly(poly, width) for poly in segm])
			
 
				+            else:
			
 
				+                # RLE format
			
 
				+                import pycocotools.mask as mask_util
			
 
				+                flipped_segms.append(_flip_rle(segm, height, width))
			
 
				+        return flipped_segms
			
 
				+
			
 
				+    def apply_keypoint(self, gt_keypoint, width):
			
 
				+        for i in range(gt_keypoint.shape[1]):
			
 
				+            if i % 2 == 0:
			
 
				+                old_x = gt_keypoint[:, i].copy()
			
 
				+                gt_keypoint[:, i] = width - old_x
			
 
				+        return gt_keypoint
			
 
				+
			
 
				+    def apply_image(self, image):
			
 
				+        return image[:, ::-1, :]
			
 
				+
			
 
				+    def apply_bbox(self, bbox, width):
			
 
				+        oldx1 = bbox[:, 0].copy()
			
 
				+        oldx2 = bbox[:, 2].copy()
			
 
				+        bbox[:, 0] = width - oldx2
			
 
				+        bbox[:, 2] = width - oldx1
			
 
				+        return bbox
			
 
				+
			
 
				+    def apply_rbox(self, bbox, width):
			
 
				+        oldx1 = bbox[:, 0].copy()
			
 
				+        oldx2 = bbox[:, 2].copy()
			
 
				+        oldx3 = bbox[:, 4].copy()
			
 
				+        oldx4 = bbox[:, 6].copy()
			
 
				+        bbox[:, 0] = width - oldx1
			
 
				+        bbox[:, 2] = width - oldx2
			
 
				+        bbox[:, 4] = width - oldx3
			
 
				+        bbox[:, 6] = width - oldx4
			
 
				+        bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
			
 
				+        return bbox
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """Filp the image and bounding box.
			
 
				+        Operators:
			
 
				+            1. Flip the image numpy.
			
 
				+            2. Transform the bboxes' x coordinates.
			
 
				+              (Must judge whether the coordinates are normalized!)
			
 
				+            3. Transform the segmentations' x coordinates.
			
 
				+              (Must judge whether the coordinates are normalized!)
			
 
				+        Output:
			
 
				+            sample: the image, bounding box and segmentation part
			
 
				+                    in sample are flipped.
			
 
				+        """
			
 
				+        if np.random.uniform(0, 1) < self.prob:
			
 
				+            im = sample['image']
			
 
				+            height, width = im.shape[:2]
			
 
				+            im = self.apply_image(im)
			
 
				+            if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+                sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width)
			
 
				+            if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				+                sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height,
			
 
				+                                                    width)
			
 
				+            if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
			
 
				+                sample['gt_keypoint'] = self.apply_keypoint(
			
 
				+                    sample['gt_keypoint'], width)
			
 
				+
			
 
				+            if 'semantic' in sample and sample['semantic']:
			
 
				+                sample['semantic'] = sample['semantic'][:, ::-1]
			
 
				+
			
 
				+            if 'gt_segm' in sample and sample['gt_segm'].any():
			
 
				+                sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
			
 
				+
			
 
				+            if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
			
 
				+                sample['gt_rbox2poly'] = self.apply_rbox(
			
 
				+                    sample['gt_rbox2poly'], width)
			
 
				+
			
 
				+            sample['flipped'] = True
			
 
				+            sample['image'] = im
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Resize(BaseOperator):
			
 
				+    def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
			
 
				+        """
			
 
				+        Resize image to target size. if keep_ratio is True,
			
 
				+        resize the image's long side to the maximum of target_size
			
 
				+        if keep_ratio is False, resize the image to target size(h, w)
			
 
				+        Args:
			
 
				+            target_size (int|list): image target size
			
 
				+            keep_ratio (bool): whether keep_ratio or not, default true
			
 
				+            interp (int): the interpolation method
			
 
				+        """
			
 
				+        super(Resize, self).__init__()
			
 
				+        self.keep_ratio = keep_ratio
			
 
				+        self.interp = interp
			
 
				+        if not isinstance(target_size, (Integral, Sequence)):
			
 
				+            raise TypeError(
			
 
				+                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
			
 
				+                format(type(target_size)))
			
 
				+        if isinstance(target_size, Integral):
			
 
				+            target_size = [target_size, target_size]
			
 
				+        self.target_size = target_size
			
 
				+
			
 
				+    def apply_image(self, image, scale):
			
 
				+        im_scale_x, im_scale_y = scale
			
 
				+
			
 
				+        return cv2.resize(
			
 
				+            image,
			
 
				+            None,
			
 
				+            None,
			
 
				+            fx=im_scale_x,
			
 
				+            fy=im_scale_y,
			
 
				+            interpolation=self.interp)
			
 
				+
			
 
				+    def apply_bbox(self, bbox, scale, size):
			
 
				+        im_scale_x, im_scale_y = scale
			
 
				+        resize_w, resize_h = size
			
 
				+        bbox[:, 0::2] *= im_scale_x
			
 
				+        bbox[:, 1::2] *= im_scale_y
			
 
				+        bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
			
 
				+        bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
			
 
				+        return bbox
			
 
				+
			
 
				+    def apply_segm(self, segms, im_size, scale):
			
 
				+        def _resize_poly(poly, im_scale_x, im_scale_y):
			
 
				+            resized_poly = np.array(poly).astype('float32')
			
 
				+            resized_poly[0::2] *= im_scale_x
			
 
				+            resized_poly[1::2] *= im_scale_y
			
 
				+            return resized_poly.tolist()
			
 
				+
			
 
				+        def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
			
 
				+            if 'counts' in rle and type(rle['counts']) == list:
			
 
				+                rle = mask_util.frPyObjects(rle, im_h, im_w)
			
 
				+
			
 
				+            mask = mask_util.decode(rle)
			
 
				+            mask = cv2.resize(
			
 
				+                mask,
			
 
				+                None,
			
 
				+                None,
			
 
				+                fx=im_scale_x,
			
 
				+                fy=im_scale_y,
			
 
				+                interpolation=self.interp)
			
 
				+            rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
			
 
				+            return rle
			
 
				+
			
 
				+        im_h, im_w = im_size
			
 
				+        im_scale_x, im_scale_y = scale
			
 
				+        resized_segms = []
			
 
				+        for segm in segms:
			
 
				+            if is_poly(segm):
			
 
				+                # Polygon format
			
 
				+                resized_segms.append([
			
 
				+                    _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
			
 
				+                ])
			
 
				+            else:
			
 
				+                # RLE format
			
 
				+                import pycocotools.mask as mask_util
			
 
				+                resized_segms.append(
			
 
				+                    _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
			
 
				+
			
 
				+        return resized_segms
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """ Resize the image numpy.
			
 
				+        """
			
 
				+        im = sample['image']
			
 
				+        if not isinstance(im, np.ndarray):
			
 
				+            raise TypeError("{}: image type is not numpy.".format(self))
			
 
				+        if len(im.shape) != 3:
			
 
				+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
			
 
				+
			
 
				+        # apply image
			
 
				+        im_shape = im.shape
			
 
				+        if self.keep_ratio:
			
 
				+
			
 
				+            im_size_min = np.min(im_shape[0:2])
			
 
				+            im_size_max = np.max(im_shape[0:2])
			
 
				+
			
 
				+            target_size_min = np.min(self.target_size)
			
 
				+            target_size_max = np.max(self.target_size)
			
 
				+
			
 
				+            im_scale = min(target_size_min / im_size_min,
			
 
				+                           target_size_max / im_size_max)
			
 
				+
			
 
				+            resize_h = im_scale * float(im_shape[0])
			
 
				+            resize_w = im_scale * float(im_shape[1])
			
 
				+
			
 
				+            im_scale_x = im_scale
			
 
				+            im_scale_y = im_scale
			
 
				+        else:
			
 
				+            resize_h, resize_w = self.target_size
			
 
				+            im_scale_y = resize_h / im_shape[0]
			
 
				+            im_scale_x = resize_w / im_shape[1]
			
 
				+
			
 
				+        im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
			
 
				+        sample['image'] = im
			
 
				+        sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
			
 
				+        if 'scale_factor' in sample:
			
 
				+            scale_factor = sample['scale_factor']
			
 
				+            sample['scale_factor'] = np.asarray(
			
 
				+                [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
			
 
				+                dtype=np.float32)
			
 
				+        else:
			
 
				+            sample['scale_factor'] = np.asarray(
			
 
				+                [im_scale_y, im_scale_x], dtype=np.float32)
			
 
				+
			
 
				+        # apply bbox
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
			
 
				+                                                [im_scale_x, im_scale_y],
			
 
				+                                                [resize_w, resize_h])
			
 
				+
			
 
				+        # apply rbox
			
 
				+        if 'gt_rbox2poly' in sample:
			
 
				+            if np.array(sample['gt_rbox2poly']).shape[1] != 8:
			
 
				+                logger.warning(
			
 
				+                    "gt_rbox2poly's length shoule be 8, but actually is {}".
			
 
				+                    format(len(sample['gt_rbox2poly'])))
			
 
				+            sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
			
 
				+                                                     [im_scale_x, im_scale_y],
			
 
				+                                                     [resize_w, resize_h])
			
 
				+
			
 
				+        # apply polygon
			
 
				+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				+            sample['gt_poly'] = self.apply_segm(
			
 
				+                sample['gt_poly'], im_shape[:2], [im_scale_x, im_scale_y])
			
 
				+
			
 
				+        # apply semantic
			
 
				+        if 'semantic' in sample and sample['semantic']:
			
 
				+            semantic = sample['semantic']
			
 
				+            semantic = cv2.resize(
			
 
				+                semantic.astype('float32'),
			
 
				+                None,
			
 
				+                None,
			
 
				+                fx=im_scale_x,
			
 
				+                fy=im_scale_y,
			
 
				+                interpolation=self.interp)
			
 
				+            semantic = np.asarray(semantic).astype('int32')
			
 
				+            semantic = np.expand_dims(semantic, 0)
			
 
				+            sample['semantic'] = semantic
			
 
				+
			
 
				+        # apply gt_segm
			
 
				+        if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
			
 
				+            masks = [
			
 
				+                cv2.resize(
			
 
				+                    gt_segm,
			
 
				+                    None,
			
 
				+                    None,
			
 
				+                    fx=im_scale_x,
			
 
				+                    fy=im_scale_y,
			
 
				+                    interpolation=cv2.INTER_NEAREST)
			
 
				+                for gt_segm in sample['gt_segm']
			
 
				+            ]
			
 
				+            sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class MultiscaleTestResize(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 origin_target_size=[800, 1333],
			
 
				+                 target_size=[],
			
 
				+                 interp=cv2.INTER_LINEAR,
			
 
				+                 use_flip=True):
			
 
				+        """
			
 
				+        Rescale image to the each size in target size, and capped at max_size.
			
 
				+        Args:
			
 
				+            origin_target_size (list): origin target size of image
			
 
				+            target_size (list): A list of target sizes of image.
			
 
				+            interp (int): the interpolation method.
			
 
				+            use_flip (bool): whether use flip augmentation.
			
 
				+        """
			
 
				+        super(MultiscaleTestResize, self).__init__()
			
 
				+        self.interp = interp
			
 
				+        self.use_flip = use_flip
			
 
				+
			
 
				+        if not isinstance(target_size, Sequence):
			
 
				+            raise TypeError(
			
 
				+                "Type of target_size is invalid. Must be List or Tuple, now is {}".
			
 
				+                format(type(target_size)))
			
 
				+        self.target_size = target_size
			
 
				+
			
 
				+        if not isinstance(origin_target_size, Sequence):
			
 
				+            raise TypeError(
			
 
				+                "Type of origin_target_size is invalid. Must be List or Tuple, now is {}".
			
 
				+                format(type(origin_target_size)))
			
 
				+
			
 
				+        self.origin_target_size = origin_target_size
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """ Resize the image numpy for multi-scale test.
			
 
				+        """
			
 
				+        samples = []
			
 
				+        resizer = Resize(
			
 
				+            self.origin_target_size, keep_ratio=True, interp=self.interp)
			
 
				+        samples.append(resizer(sample.copy(), context))
			
 
				+        if self.use_flip:
			
 
				+            flipper = RandomFlip(1.1)
			
 
				+            samples.append(flipper(sample.copy(), context=context))
			
 
				+
			
 
				+        for size in self.target_size:
			
 
				+            resizer = Resize(size, keep_ratio=True, interp=self.interp)
			
 
				+            samples.append(resizer(sample.copy(), context))
			
 
				+
			
 
				+        return samples
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomResize(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 target_size,
			
 
				+                 keep_ratio=True,
			
 
				+                 interp=cv2.INTER_LINEAR,
			
 
				+                 random_size=True,
			
 
				+                 random_interp=False):
			
 
				+        """
			
 
				+        Resize image to target size randomly. random target_size and interpolation method
			
 
				+        Args:
			
 
				+            target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
			
 
				+            keep_ratio (bool): whether keep_raio or not, default true
			
 
				+            interp (int): the interpolation method
			
 
				+            random_size (bool): whether random select target size of image
			
 
				+            random_interp (bool): whether random select interpolation method
			
 
				+        """
			
 
				+        super(RandomResize, self).__init__()
			
 
				+        self.keep_ratio = keep_ratio
			
 
				+        self.interp = interp
			
 
				+        self.interps = [
			
 
				+            cv2.INTER_NEAREST,
			
 
				+            cv2.INTER_LINEAR,
			
 
				+            cv2.INTER_AREA,
			
 
				+            cv2.INTER_CUBIC,
			
 
				+            cv2.INTER_LANCZOS4,
			
 
				+        ]
			
 
				+        assert isinstance(target_size, (
			
 
				+            Integral, Sequence)), "target_size must be Integer, List or Tuple"
			
 
				+        if random_size and not isinstance(target_size, Sequence):
			
 
				+            raise TypeError(
			
 
				+                "Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}".
			
 
				+                format(type(target_size)))
			
 
				+        self.target_size = target_size
			
 
				+        self.random_size = random_size
			
 
				+        self.random_interp = random_interp
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        """ Resize the image numpy.
			
 
				+        """
			
 
				+        if self.random_size:
			
 
				+            target_size = random.choice(self.target_size)
			
 
				+        else:
			
 
				+            target_size = self.target_size
			
 
				+
			
 
				+        if self.random_interp:
			
 
				+            interp = random.choice(self.interps)
			
 
				+        else:
			
 
				+            interp = self.interp
			
 
				+
			
 
				+        resizer = Resize(target_size, self.keep_ratio, interp)
			
 
				+        return resizer(sample, context=context)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomExpand(BaseOperator):
			
 
				+    """Random expand the canvas.
			
 
				+    Args:
			
 
				+        ratio (float): maximum expansion ratio.
			
 
				+        prob (float): probability to expand.
			
 
				+        fill_value (list): color value used to fill the canvas. in RGB order.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, ratio=4., prob=0.5, fill_value=(127.5, 127.5, 127.5)):
			
 
				+        super(RandomExpand, self).__init__()
			
 
				+        assert ratio > 1.01, "expand ratio must be larger than 1.01"
			
 
				+        self.ratio = ratio
			
 
				+        self.prob = prob
			
 
				+        assert isinstance(fill_value, (Number, Sequence)), \
			
 
				+            "fill value must be either float or sequence"
			
 
				+        if isinstance(fill_value, Number):
			
 
				+            fill_value = (fill_value, ) * 3
			
 
				+        if not isinstance(fill_value, tuple):
			
 
				+            fill_value = tuple(fill_value)
			
 
				+        self.fill_value = fill_value
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        if np.random.uniform(0., 1.) < self.prob:
			
 
				+            return sample
			
 
				+
			
 
				+        im = sample['image']
			
 
				+        height, width = im.shape[:2]
			
 
				+        ratio = np.random.uniform(1., self.ratio)
			
 
				+        h = int(height * ratio)
			
 
				+        w = int(width * ratio)
			
 
				+        if not h > height or not w > width:
			
 
				+            return sample
			
 
				+        y = np.random.randint(0, h - height)
			
 
				+        x = np.random.randint(0, w - width)
			
 
				+        offsets, size = [x, y], [h, w]
			
 
				+
			
 
				+        pad = Pad(size,
			
 
				+                  pad_mode=-1,
			
 
				+                  offsets=offsets,
			
 
				+                  fill_value=self.fill_value)
			
 
				+
			
 
				+        return pad(sample, context=context)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class CropWithSampling(BaseOperator):
			
 
				+    def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True):
			
 
				+        """
			
 
				+        Args:
			
 
				+            batch_sampler (list): Multiple sets of different
			
 
				+                                  parameters for cropping.
			
 
				+            satisfy_all (bool): whether all boxes must satisfy.
			
 
				+            e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
			
 
				+                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
			
 
				+                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
			
 
				+                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
			
 
				+                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
			
 
				+                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
			
 
				+                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
			
 
				+           [max sample, max trial, min scale, max scale,
			
 
				+            min aspect ratio, max aspect ratio,
			
 
				+            min overlap, max overlap]
			
 
				+            avoid_no_bbox (bool): whether to to avoid the
			
 
				+                                  situation where the box does not appear.
			
 
				+        """
			
 
				+        super(CropWithSampling, self).__init__()
			
 
				+        self.batch_sampler = batch_sampler
			
 
				+        self.satisfy_all = satisfy_all
			
 
				+        self.avoid_no_bbox = avoid_no_bbox
			
 
				+
			
 
				+    def apply(self, sample, context):
			
 
				+        """
			
 
				+        Crop the image and modify bounding box.
			
 
				+        Operators:
			
 
				+            1. Scale the image width and height.
			
 
				+            2. Crop the image according to a radom sample.
			
 
				+            3. Rescale the bounding box.
			
 
				+            4. Determine if the new bbox is satisfied in the new image.
			
 
				+        Returns:
			
 
				+            sample: the image, bounding box are replaced.
			
 
				+        """
			
 
				+        assert 'image' in sample, "image data not found"
			
 
				+        im = sample['image']
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+        gt_class = sample['gt_class']
			
 
				+        im_height, im_width = im.shape[:2]
			
 
				+        gt_score = None
			
 
				+        if 'gt_score' in sample:
			
 
				+            gt_score = sample['gt_score']
			
 
				+        sampled_bbox = []
			
 
				+        gt_bbox = gt_bbox.tolist()
			
 
				+        for sampler in self.batch_sampler:
			
 
				+            found = 0
			
 
				+            for i in range(sampler[1]):
			
 
				+                if found >= sampler[0]:
			
 
				+                    break
			
 
				+                sample_bbox = generate_sample_bbox(sampler)
			
 
				+                if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox,
			
 
				+                                             self.satisfy_all):
			
 
				+                    sampled_bbox.append(sample_bbox)
			
 
				+                    found = found + 1
			
 
				+        im = np.array(im)
			
 
				+        while sampled_bbox:
			
 
				+            idx = int(np.random.uniform(0, len(sampled_bbox)))
			
 
				+            sample_bbox = sampled_bbox.pop(idx)
			
 
				+            sample_bbox = clip_bbox(sample_bbox)
			
 
				+            crop_bbox, crop_class, crop_score = \
			
 
				+                filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score)
			
 
				+            if self.avoid_no_bbox:
			
 
				+                if len(crop_bbox) < 1:
			
 
				+                    continue
			
 
				+            xmin = int(sample_bbox[0] * im_width)
			
 
				+            xmax = int(sample_bbox[2] * im_width)
			
 
				+            ymin = int(sample_bbox[1] * im_height)
			
 
				+            ymax = int(sample_bbox[3] * im_height)
			
 
				+            im = im[ymin:ymax, xmin:xmax]
			
 
				+            sample['image'] = im
			
 
				+            sample['gt_bbox'] = crop_bbox
			
 
				+            sample['gt_class'] = crop_class
			
 
				+            sample['gt_score'] = crop_score
			
 
				+            return sample
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class CropWithDataAchorSampling(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 batch_sampler,
			
 
				+                 anchor_sampler=None,
			
 
				+                 target_size=None,
			
 
				+                 das_anchor_scales=[16, 32, 64, 128],
			
 
				+                 sampling_prob=0.5,
			
 
				+                 min_size=8.,
			
 
				+                 avoid_no_bbox=True):
			
 
				+        """
			
 
				+        Args:
			
 
				+            anchor_sampler (list): anchor_sampling sets of different
			
 
				+                                  parameters for cropping.
			
 
				+            batch_sampler (list): Multiple sets of different
			
 
				+                                  parameters for cropping.
			
 
				+              e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
			
 
				+                  [[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
			
 
				+                   [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
			
 
				+                   [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
			
 
				+                   [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
			
 
				+                   [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
			
 
				+              [max sample, max trial, min scale, max scale,
			
 
				+               min aspect ratio, max aspect ratio,
			
 
				+               min overlap, max overlap, min coverage, max coverage]
			
 
				+            target_size (int): target image size.
			
 
				+            das_anchor_scales (list[float]): a list of anchor scales in data
			
 
				+                anchor smapling.
			
 
				+            min_size (float): minimum size of sampled bbox.
			
 
				+            avoid_no_bbox (bool): whether to to avoid the
			
 
				+                                  situation where the box does not appear.
			
 
				+        """
			
 
				+        super(CropWithDataAchorSampling, self).__init__()
			
 
				+        self.anchor_sampler = anchor_sampler
			
 
				+        self.batch_sampler = batch_sampler
			
 
				+        self.target_size = target_size
			
 
				+        self.sampling_prob = sampling_prob
			
 
				+        self.min_size = min_size
			
 
				+        self.avoid_no_bbox = avoid_no_bbox
			
 
				+        self.das_anchor_scales = np.array(das_anchor_scales)
			
 
				+
			
 
				+    def apply(self, sample, context):
			
 
				+        """
			
 
				+        Crop the image and modify bounding box.
			
 
				+        Operators:
			
 
				+            1. Scale the image width and height.
			
 
				+            2. Crop the image according to a radom sample.
			
 
				+            3. Rescale the bounding box.
			
 
				+            4. Determine if the new bbox is satisfied in the new image.
			
 
				+        Returns:
			
 
				+            sample: the image, bounding box are replaced.
			
 
				+        """
			
 
				+        assert 'image' in sample, "image data not found"
			
 
				+        im = sample['image']
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+        gt_class = sample['gt_class']
			
 
				+        image_height, image_width = im.shape[:2]
			
 
				+        gt_bbox[:, 0] /= image_width
			
 
				+        gt_bbox[:, 1] /= image_height
			
 
				+        gt_bbox[:, 2] /= image_width
			
 
				+        gt_bbox[:, 3] /= image_height
			
 
				+        gt_score = None
			
 
				+        if 'gt_score' in sample:
			
 
				+            gt_score = sample['gt_score']
			
 
				+        sampled_bbox = []
			
 
				+        gt_bbox = gt_bbox.tolist()
			
 
				+
			
 
				+        prob = np.random.uniform(0., 1.)
			
 
				+        if prob > self.sampling_prob:  # anchor sampling
			
 
				+            assert self.anchor_sampler
			
 
				+            for sampler in self.anchor_sampler:
			
 
				+                found = 0
			
 
				+                for i in range(sampler[1]):
			
 
				+                    if found >= sampler[0]:
			
 
				+                        break
			
 
				+                    sample_bbox = data_anchor_sampling(
			
 
				+                        gt_bbox, image_width, image_height,
			
 
				+                        self.das_anchor_scales, self.target_size)
			
 
				+                    if sample_bbox == 0:
			
 
				+                        break
			
 
				+                    if satisfy_sample_constraint_coverage(sampler, sample_bbox,
			
 
				+                                                          gt_bbox):
			
 
				+                        sampled_bbox.append(sample_bbox)
			
 
				+                        found = found + 1
			
 
				+            im = np.array(im)
			
 
				+            while sampled_bbox:
			
 
				+                idx = int(np.random.uniform(0, len(sampled_bbox)))
			
 
				+                sample_bbox = sampled_bbox.pop(idx)
			
 
				+
			
 
				+                if 'gt_keypoint' in sample.keys():
			
 
				+                    keypoints = (sample['gt_keypoint'],
			
 
				+                                 sample['keypoint_ignore'])
			
 
				+                    crop_bbox, crop_class, crop_score, gt_keypoints = \
			
 
				+                        filter_and_process(sample_bbox, gt_bbox, gt_class,
			
 
				+                                scores=gt_score,
			
 
				+                                keypoints=keypoints)
			
 
				+                else:
			
 
				+                    crop_bbox, crop_class, crop_score = filter_and_process(
			
 
				+                        sample_bbox, gt_bbox, gt_class, scores=gt_score)
			
 
				+                crop_bbox, crop_class, crop_score = bbox_area_sampling(
			
 
				+                    crop_bbox, crop_class, crop_score, self.target_size,
			
 
				+                    self.min_size)
			
 
				+
			
 
				+                if self.avoid_no_bbox:
			
 
				+                    if len(crop_bbox) < 1:
			
 
				+                        continue
			
 
				+                im = crop_image_sampling(im, sample_bbox, image_width,
			
 
				+                                         image_height, self.target_size)
			
 
				+                height, width = im.shape[:2]
			
 
				+                crop_bbox[:, 0] *= width
			
 
				+                crop_bbox[:, 1] *= height
			
 
				+                crop_bbox[:, 2] *= width
			
 
				+                crop_bbox[:, 3] *= height
			
 
				+                sample['image'] = im
			
 
				+                sample['gt_bbox'] = crop_bbox
			
 
				+                sample['gt_class'] = crop_class
			
 
				+                if 'gt_score' in sample:
			
 
				+                    sample['gt_score'] = crop_score
			
 
				+                if 'gt_keypoint' in sample.keys():
			
 
				+                    sample['gt_keypoint'] = gt_keypoints[0]
			
 
				+                    sample['keypoint_ignore'] = gt_keypoints[1]
			
 
				+                return sample
			
 
				+            return sample
			
 
				+
			
 
				+        else:
			
 
				+            for sampler in self.batch_sampler:
			
 
				+                found = 0
			
 
				+                for i in range(sampler[1]):
			
 
				+                    if found >= sampler[0]:
			
 
				+                        break
			
 
				+                    sample_bbox = generate_sample_bbox_square(
			
 
				+                        sampler, image_width, image_height)
			
 
				+                    if satisfy_sample_constraint_coverage(sampler, sample_bbox,
			
 
				+                                                          gt_bbox):
			
 
				+                        sampled_bbox.append(sample_bbox)
			
 
				+                        found = found + 1
			
 
				+            im = np.array(im)
			
 
				+            while sampled_bbox:
			
 
				+                idx = int(np.random.uniform(0, len(sampled_bbox)))
			
 
				+                sample_bbox = sampled_bbox.pop(idx)
			
 
				+                sample_bbox = clip_bbox(sample_bbox)
			
 
				+
			
 
				+                if 'gt_keypoint' in sample.keys():
			
 
				+                    keypoints = (sample['gt_keypoint'],
			
 
				+                                 sample['keypoint_ignore'])
			
 
				+                    crop_bbox, crop_class, crop_score, gt_keypoints = \
			
 
				+                        filter_and_process(sample_bbox, gt_bbox, gt_class,
			
 
				+                                scores=gt_score,
			
 
				+                                keypoints=keypoints)
			
 
				+                else:
			
 
				+                    crop_bbox, crop_class, crop_score = filter_and_process(
			
 
				+                        sample_bbox, gt_bbox, gt_class, scores=gt_score)
			
 
				+                # sampling bbox according the bbox area
			
 
				+                crop_bbox, crop_class, crop_score = bbox_area_sampling(
			
 
				+                    crop_bbox, crop_class, crop_score, self.target_size,
			
 
				+                    self.min_size)
			
 
				+
			
 
				+                if self.avoid_no_bbox:
			
 
				+                    if len(crop_bbox) < 1:
			
 
				+                        continue
			
 
				+                xmin = int(sample_bbox[0] * image_width)
			
 
				+                xmax = int(sample_bbox[2] * image_width)
			
 
				+                ymin = int(sample_bbox[1] * image_height)
			
 
				+                ymax = int(sample_bbox[3] * image_height)
			
 
				+                im = im[ymin:ymax, xmin:xmax]
			
 
				+                height, width = im.shape[:2]
			
 
				+                crop_bbox[:, 0] *= width
			
 
				+                crop_bbox[:, 1] *= height
			
 
				+                crop_bbox[:, 2] *= width
			
 
				+                crop_bbox[:, 3] *= height
			
 
				+                sample['image'] = im
			
 
				+                sample['gt_bbox'] = crop_bbox
			
 
				+                sample['gt_class'] = crop_class
			
 
				+                if 'gt_score' in sample:
			
 
				+                    sample['gt_score'] = crop_score
			
 
				+                if 'gt_keypoint' in sample.keys():
			
 
				+                    sample['gt_keypoint'] = gt_keypoints[0]
			
 
				+                    sample['keypoint_ignore'] = gt_keypoints[1]
			
 
				+                return sample
			
 
				+            return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomCrop(BaseOperator):
			
 
				+    """Random crop image and bboxes.
			
 
				+    Args:
			
 
				+        aspect_ratio (list): aspect ratio of cropped region.
			
 
				+            in [min, max] format.
			
 
				+        thresholds (list): iou thresholds for decide a valid bbox crop.
			
 
				+        scaling (list): ratio between a cropped region and the original image.
			
 
				+             in [min, max] format.
			
 
				+        num_attempts (int): number of tries before giving up.
			
 
				+        allow_no_crop (bool): allow return without actually cropping them.
			
 
				+        cover_all_box (bool): ensure all bboxes are covered in the final crop.
			
 
				+        is_mask_crop(bool): whether crop the segmentation.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 aspect_ratio=[.5, 2.],
			
 
				+                 thresholds=[.0, .1, .3, .5, .7, .9],
			
 
				+                 scaling=[.3, 1.],
			
 
				+                 num_attempts=50,
			
 
				+                 allow_no_crop=True,
			
 
				+                 cover_all_box=False,
			
 
				+                 is_mask_crop=False):
			
 
				+        super(RandomCrop, self).__init__()
			
 
				+        self.aspect_ratio = aspect_ratio
			
 
				+        self.thresholds = thresholds
			
 
				+        self.scaling = scaling
			
 
				+        self.num_attempts = num_attempts
			
 
				+        self.allow_no_crop = allow_no_crop
			
 
				+        self.cover_all_box = cover_all_box
			
 
				+        self.is_mask_crop = is_mask_crop
			
 
				+
			
 
				+    def crop_segms(self, segms, valid_ids, crop, height, width):
			
 
				+        def _crop_poly(segm, crop):
			
 
				+            xmin, ymin, xmax, ymax = crop
			
 
				+            crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
			
 
				+            crop_p = np.array(crop_coord).reshape(4, 2)
			
 
				+            crop_p = Polygon(crop_p)
			
 
				+
			
 
				+            crop_segm = list()
			
 
				+            for poly in segm:
			
 
				+                poly = np.array(poly).reshape(len(poly) // 2, 2)
			
 
				+                polygon = Polygon(poly)
			
 
				+                if not polygon.is_valid:
			
 
				+                    exterior = polygon.exterior
			
 
				+                    multi_lines = exterior.intersection(exterior)
			
 
				+                    polygons = shapely.ops.polygonize(multi_lines)
			
 
				+                    polygon = MultiPolygon(polygons)
			
 
				+                multi_polygon = list()
			
 
				+                if isinstance(polygon, MultiPolygon):
			
 
				+                    multi_polygon = copy.deepcopy(polygon)
			
 
				+                else:
			
 
				+                    multi_polygon.append(copy.deepcopy(polygon))
			
 
				+                for per_polygon in multi_polygon:
			
 
				+                    inter = per_polygon.intersection(crop_p)
			
 
				+                    if not inter:
			
 
				+                        continue
			
 
				+                    if isinstance(inter, (MultiPolygon, GeometryCollection)):
			
 
				+                        for part in inter:
			
 
				+                            if not isinstance(part, Polygon):
			
 
				+                                continue
			
 
				+                            part = np.squeeze(
			
 
				+                                np.array(part.exterior.coords[:-1]).reshape(
			
 
				+                                    1, -1))
			
 
				+                            part[0::2] -= xmin
			
 
				+                            part[1::2] -= ymin
			
 
				+                            crop_segm.append(part.tolist())
			
 
				+                    elif isinstance(inter, Polygon):
			
 
				+                        crop_poly = np.squeeze(
			
 
				+                            np.array(inter.exterior.coords[:-1]).reshape(1,
			
 
				+                                                                         -1))
			
 
				+                        crop_poly[0::2] -= xmin
			
 
				+                        crop_poly[1::2] -= ymin
			
 
				+                        crop_segm.append(crop_poly.tolist())
			
 
				+                    else:
			
 
				+                        continue
			
 
				+            return crop_segm
			
 
				+
			
 
				+        def _crop_rle(rle, crop, height, width):
			
 
				+            if 'counts' in rle and type(rle['counts']) == list:
			
 
				+                rle = mask_util.frPyObjects(rle, height, width)
			
 
				+            mask = mask_util.decode(rle)
			
 
				+            mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
			
 
				+            rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
			
 
				+            return rle
			
 
				+
			
 
				+        crop_segms = []
			
 
				+        for id in valid_ids:
			
 
				+            segm = segms[id]
			
 
				+            if is_poly(segm):
			
 
				+                import copy
			
 
				+                import shapely.ops
			
 
				+                from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
			
 
				+                logging.getLogger("shapely").setLevel(logging.WARNING)
			
 
				+                # Polygon format
			
 
				+                crop_segms.append(_crop_poly(segm, crop))
			
 
				+            else:
			
 
				+                # RLE format
			
 
				+                import pycocotools.mask as mask_util
			
 
				+                crop_segms.append(_crop_rle(segm, crop, height, width))
			
 
				+        return crop_segms
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
			
 
				+            return sample
			
 
				+
			
 
				+        h, w = sample['image'].shape[:2]
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+
			
 
				+        # NOTE Original method attempts to generate one candidate for each
			
 
				+        # threshold then randomly sample one from the resulting list.
			
 
				+        # Here a short circuit approach is taken, i.e., randomly choose a
			
 
				+        # threshold and attempt to find a valid crop, and simply return the
			
 
				+        # first one found.
			
 
				+        # The probability is not exactly the same, kinda resembling the
			
 
				+        # "Monty Hall" problem. Actually carrying out the attempts will affect
			
 
				+        # observability (just like opening doors in the "Monty Hall" game).
			
 
				+        thresholds = list(self.thresholds)
			
 
				+        if self.allow_no_crop:
			
 
				+            thresholds.append('no_crop')
			
 
				+        np.random.shuffle(thresholds)
			
 
				+
			
 
				+        for thresh in thresholds:
			
 
				+            if thresh == 'no_crop':
			
 
				+                return sample
			
 
				+
			
 
				+            found = False
			
 
				+            for i in range(self.num_attempts):
			
 
				+                scale = np.random.uniform(*self.scaling)
			
 
				+                if self.aspect_ratio is not None:
			
 
				+                    min_ar, max_ar = self.aspect_ratio
			
 
				+                    aspect_ratio = np.random.uniform(
			
 
				+                        max(min_ar, scale**2), min(max_ar, scale**-2))
			
 
				+                    h_scale = scale / np.sqrt(aspect_ratio)
			
 
				+                    w_scale = scale * np.sqrt(aspect_ratio)
			
 
				+                else:
			
 
				+                    h_scale = np.random.uniform(*self.scaling)
			
 
				+                    w_scale = np.random.uniform(*self.scaling)
			
 
				+                crop_h = h * h_scale
			
 
				+                crop_w = w * w_scale
			
 
				+                if self.aspect_ratio is None:
			
 
				+                    if crop_h / crop_w < 0.5 or crop_h / crop_w > 2.0:
			
 
				+                        continue
			
 
				+
			
 
				+                crop_h = int(crop_h)
			
 
				+                crop_w = int(crop_w)
			
 
				+                crop_y = np.random.randint(0, h - crop_h)
			
 
				+                crop_x = np.random.randint(0, w - crop_w)
			
 
				+                crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
			
 
				+                iou = self._iou_matrix(
			
 
				+                    gt_bbox, np.array(
			
 
				+                        [crop_box], dtype=np.float32))
			
 
				+                if iou.max() < thresh:
			
 
				+                    continue
			
 
				+
			
 
				+                if self.cover_all_box and iou.min() < thresh:
			
 
				+                    continue
			
 
				+
			
 
				+                cropped_box, valid_ids = self._crop_box_with_center_constraint(
			
 
				+                    gt_bbox, np.array(
			
 
				+                        crop_box, dtype=np.float32))
			
 
				+                if valid_ids.size > 0:
			
 
				+                    found = True
			
 
				+                    break
			
 
				+
			
 
				+            if found:
			
 
				+                if self.is_mask_crop and 'gt_poly' in sample and len(sample[
			
 
				+                        'gt_poly']) > 0:
			
 
				+                    crop_polys = self.crop_segms(
			
 
				+                        sample['gt_poly'],
			
 
				+                        valid_ids,
			
 
				+                        np.array(
			
 
				+                            crop_box, dtype=np.int64),
			
 
				+                        h,
			
 
				+                        w)
			
 
				+                    if [] in crop_polys:
			
 
				+                        delete_id = list()
			
 
				+                        valid_polys = list()
			
 
				+                        for id, crop_poly in enumerate(crop_polys):
			
 
				+                            if crop_poly == []:
			
 
				+                                delete_id.append(id)
			
 
				+                            else:
			
 
				+                                valid_polys.append(crop_poly)
			
 
				+                        valid_ids = np.delete(valid_ids, delete_id)
			
 
				+                        if len(valid_polys) == 0:
			
 
				+                            return sample
			
 
				+                        sample['gt_poly'] = valid_polys
			
 
				+                    else:
			
 
				+                        sample['gt_poly'] = crop_polys
			
 
				+
			
 
				+                if 'gt_segm' in sample:
			
 
				+                    sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
			
 
				+                                                        crop_box)
			
 
				+                    sample['gt_segm'] = np.take(
			
 
				+                        sample['gt_segm'], valid_ids, axis=0)
			
 
				+
			
 
				+                sample['image'] = self._crop_image(sample['image'], crop_box)
			
 
				+                sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
			
 
				+                sample['gt_class'] = np.take(
			
 
				+                    sample['gt_class'], valid_ids, axis=0)
			
 
				+                if 'gt_score' in sample:
			
 
				+                    sample['gt_score'] = np.take(
			
 
				+                        sample['gt_score'], valid_ids, axis=0)
			
 
				+
			
 
				+                if 'is_crowd' in sample:
			
 
				+                    sample['is_crowd'] = np.take(
			
 
				+                        sample['is_crowd'], valid_ids, axis=0)
			
 
				+                return sample
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+    def _iou_matrix(self, a, b):
			
 
				+        tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
			
 
				+        br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
			
 
				+
			
 
				+        area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
			
 
				+        area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
			
 
				+        area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
			
 
				+        area_o = (area_a[:, np.newaxis] + area_b - area_i)
			
 
				+        return area_i / (area_o + 1e-10)
			
 
				+
			
 
				+    def _crop_box_with_center_constraint(self, box, crop):
			
 
				+        cropped_box = box.copy()
			
 
				+
			
 
				+        cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
			
 
				+        cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
			
 
				+        cropped_box[:, :2] -= crop[:2]
			
 
				+        cropped_box[:, 2:] -= crop[:2]
			
 
				+
			
 
				+        centers = (box[:, :2] + box[:, 2:]) / 2
			
 
				+        valid = np.logical_and(crop[:2] <= centers,
			
 
				+                               centers < crop[2:]).all(axis=1)
			
 
				+        valid = np.logical_and(
			
 
				+            valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
			
 
				+
			
 
				+        return cropped_box, np.where(valid)[0]
			
 
				+
			
 
				+    def _crop_image(self, img, crop):
			
 
				+        x1, y1, x2, y2 = crop
			
 
				+        return img[y1:y2, x1:x2, :]
			
 
				+
			
 
				+    def _crop_segm(self, segm, crop):
			
 
				+        x1, y1, x2, y2 = crop
			
 
				+        return segm[:, y1:y2, x1:x2]
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomScaledCrop(BaseOperator):
			
 
				+    """Resize image and bbox based on long side (with optional random scaling),
			
 
				+       then crop or pad image to target size.
			
 
				+    Args:
			
 
				+        target_dim (int): target size.
			
 
				+        scale_range (list): random scale range.
			
 
				+        interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 target_dim=512,
			
 
				+                 scale_range=[.1, 2.],
			
 
				+                 interp=cv2.INTER_LINEAR):
			
 
				+        super(RandomScaledCrop, self).__init__()
			
 
				+        self.target_dim = target_dim
			
 
				+        self.scale_range = scale_range
			
 
				+        self.interp = interp
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        img = sample['image']
			
 
				+        h, w = img.shape[:2]
			
 
				+        random_scale = np.random.uniform(*self.scale_range)
			
 
				+        dim = self.target_dim
			
 
				+        random_dim = int(dim * random_scale)
			
 
				+        dim_max = max(h, w)
			
 
				+        scale = random_dim / dim_max
			
 
				+        resize_w = w * scale
			
 
				+        resize_h = h * scale
			
 
				+        offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
			
 
				+        offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
			
 
				+
			
 
				+        img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
			
 
				+        img = np.array(img)
			
 
				+        canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
			
 
				+        canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
			
 
				+            offset_y:offset_y + dim, offset_x:offset_x + dim, :]
			
 
				+        sample['image'] = canvas
			
 
				+        sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
			
 
				+        scale_factor = sample['sacle_factor']
			
 
				+        sample['scale_factor'] = np.asarray(
			
 
				+            [scale_factor[0] * scale, scale_factor[1] * scale],
			
 
				+            dtype=np.float32)
			
 
				+
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            scale_array = np.array([scale, scale] * 2, dtype=np.float32)
			
 
				+            shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
			
 
				+            boxes = sample['gt_bbox'] * scale_array - shift_array
			
 
				+            boxes = np.clip(boxes, 0, dim - 1)
			
 
				+            # filter boxes with no area
			
 
				+            area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
			
 
				+            valid = (area > 1.).nonzero()[0]
			
 
				+            sample['gt_bbox'] = boxes[valid]
			
 
				+            sample['gt_class'] = sample['gt_class'][valid]
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Cutmix(BaseOperator):
			
 
				+    def __init__(self, alpha=1.5, beta=1.5):
			
 
				+        """
			
 
				+        CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
			
 
				+        Cutmix image and gt_bbbox/gt_score
			
 
				+        Args:
			
 
				+             alpha (float): alpha parameter of beta distribute
			
 
				+             beta (float): beta parameter of beta distribute
			
 
				+        """
			
 
				+        super(Cutmix, self).__init__()
			
 
				+        self.alpha = alpha
			
 
				+        self.beta = beta
			
 
				+        if self.alpha <= 0.0:
			
 
				+            raise ValueError("alpha shold be positive in {}".format(self))
			
 
				+        if self.beta <= 0.0:
			
 
				+            raise ValueError("beta shold be positive in {}".format(self))
			
 
				+
			
 
				+    def apply_image(self, img1, img2, factor):
			
 
				+        """ _rand_bbox """
			
 
				+        h = max(img1.shape[0], img2.shape[0])
			
 
				+        w = max(img1.shape[1], img2.shape[1])
			
 
				+        cut_rat = np.sqrt(1. - factor)
			
 
				+
			
 
				+        cut_w = np.int32(w * cut_rat)
			
 
				+        cut_h = np.int32(h * cut_rat)
			
 
				+
			
 
				+        # uniform
			
 
				+        cx = np.random.randint(w)
			
 
				+        cy = np.random.randint(h)
			
 
				+
			
 
				+        bbx1 = np.clip(cx - cut_w // 2, 0, w - 1)
			
 
				+        bby1 = np.clip(cy - cut_h // 2, 0, h - 1)
			
 
				+        bbx2 = np.clip(cx + cut_w // 2, 0, w - 1)
			
 
				+        bby2 = np.clip(cy + cut_h // 2, 0, h - 1)
			
 
				+
			
 
				+        img_1_pad = np.zeros((h, w, img1.shape[2]), 'float32')
			
 
				+        img_1_pad[:img1.shape[0], :img1.shape[1], :] = \
			
 
				+            img1.astype('float32')
			
 
				+        img_2_pad = np.zeros((h, w, img2.shape[2]), 'float32')
			
 
				+        img_2_pad[:img2.shape[0], :img2.shape[1], :] = \
			
 
				+            img2.astype('float32')
			
 
				+        img_1_pad[bby1:bby2, bbx1:bbx2, :] = img_2_pad[bby1:bby2, bbx1:bbx2, :]
			
 
				+        return img_1_pad
			
 
				+
			
 
				+    def __call__(self, sample, context=None):
			
 
				+        if not isinstance(sample, Sequence):
			
 
				+            return sample
			
 
				+
			
 
				+        assert len(sample) == 2, 'cutmix need two samples'
			
 
				+
			
 
				+        factor = np.random.beta(self.alpha, self.beta)
			
 
				+        factor = max(0.0, min(1.0, factor))
			
 
				+        if factor >= 1.0:
			
 
				+            return sample[0]
			
 
				+        if factor <= 0.0:
			
 
				+            return sample[1]
			
 
				+        img1 = sample[0]['image']
			
 
				+        img2 = sample[1]['image']
			
 
				+        img = self.apply_image(img1, img2, factor)
			
 
				+        gt_bbox1 = sample[0]['gt_bbox']
			
 
				+        gt_bbox2 = sample[1]['gt_bbox']
			
 
				+        gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
			
 
				+        gt_class1 = sample[0]['gt_class']
			
 
				+        gt_class2 = sample[1]['gt_class']
			
 
				+        gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
			
 
				+        gt_score1 = np.ones_like(sample[0]['gt_class'])
			
 
				+        gt_score2 = np.ones_like(sample[1]['gt_class'])
			
 
				+        gt_score = np.concatenate(
			
 
				+            (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
			
 
				+        result = copy.deepcopy(sample[0])
			
 
				+        result['image'] = img
			
 
				+        result['gt_bbox'] = gt_bbox
			
 
				+        result['gt_score'] = gt_score
			
 
				+        result['gt_class'] = gt_class
			
 
				+        if 'is_crowd' in sample[0]:
			
 
				+            is_crowd1 = sample[0]['is_crowd']
			
 
				+            is_crowd2 = sample[1]['is_crowd']
			
 
				+            is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
			
 
				+            result['is_crowd'] = is_crowd
			
 
				+        if 'difficult' in sample[0]:
			
 
				+            is_difficult1 = sample[0]['difficult']
			
 
				+            is_difficult2 = sample[1]['difficult']
			
 
				+            is_difficult = np.concatenate(
			
 
				+                (is_difficult1, is_difficult2), axis=0)
			
 
				+            result['difficult'] = is_difficult
			
 
				+        return result
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Mixup(BaseOperator):
			
 
				+    def __init__(self, alpha=1.5, beta=1.5):
			
 
				+        """ Mixup image and gt_bbbox/gt_score
			
 
				+        Args:
			
 
				+            alpha (float): alpha parameter of beta distribute
			
 
				+            beta (float): beta parameter of beta distribute
			
 
				+        """
			
 
				+        super(Mixup, self).__init__()
			
 
				+        self.alpha = alpha
			
 
				+        self.beta = beta
			
 
				+        if self.alpha <= 0.0:
			
 
				+            raise ValueError("alpha shold be positive in {}".format(self))
			
 
				+        if self.beta <= 0.0:
			
 
				+            raise ValueError("beta shold be positive in {}".format(self))
			
 
				+
			
 
				+    def apply_image(self, img1, img2, factor):
			
 
				+        h = max(img1.shape[0], img2.shape[0])
			
 
				+        w = max(img1.shape[1], img2.shape[1])
			
 
				+        img = np.zeros((h, w, img1.shape[2]), 'float32')
			
 
				+        img[:img1.shape[0], :img1.shape[1], :] = \
			
 
				+            img1.astype('float32') * factor
			
 
				+        img[:img2.shape[0], :img2.shape[1], :] += \
			
 
				+            img2.astype('float32') * (1.0 - factor)
			
 
				+        return img.astype('uint8')
			
 
				+
			
 
				+    def __call__(self, sample, context=None):
			
 
				+        if not isinstance(sample, Sequence):
			
 
				+            return sample
			
 
				+
			
 
				+        assert len(sample) == 2, 'mixup need two samples'
			
 
				+
			
 
				+        factor = np.random.beta(self.alpha, self.beta)
			
 
				+        factor = max(0.0, min(1.0, factor))
			
 
				+        if factor >= 1.0:
			
 
				+            return sample[0]
			
 
				+        if factor <= 0.0:
			
 
				+            return sample[1]
			
 
				+        im = self.apply_image(sample[0]['image'], sample[1]['image'], factor)
			
 
				+        result = copy.deepcopy(sample[0])
			
 
				+        result['image'] = im
			
 
				+        # apply bbox and score
			
 
				+        if 'gt_bbox' in sample[0]:
			
 
				+            gt_bbox1 = sample[0]['gt_bbox']
			
 
				+            gt_bbox2 = sample[1]['gt_bbox']
			
 
				+            gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
			
 
				+            result['gt_bbox'] = gt_bbox
			
 
				+        if 'gt_class' in sample[0]:
			
 
				+            gt_class1 = sample[0]['gt_class']
			
 
				+            gt_class2 = sample[1]['gt_class']
			
 
				+            gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
			
 
				+            result['gt_class'] = gt_class
			
 
				+
			
 
				+            gt_score1 = np.ones_like(sample[0]['gt_class'])
			
 
				+            gt_score2 = np.ones_like(sample[1]['gt_class'])
			
 
				+            gt_score = np.concatenate(
			
 
				+                (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
			
 
				+            result['gt_score'] = gt_score
			
 
				+        if 'is_crowd' in sample[0]:
			
 
				+            is_crowd1 = sample[0]['is_crowd']
			
 
				+            is_crowd2 = sample[1]['is_crowd']
			
 
				+            is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
			
 
				+            result['is_crowd'] = is_crowd
			
 
				+        if 'difficult' in sample[0]:
			
 
				+            is_difficult1 = sample[0]['difficult']
			
 
				+            is_difficult2 = sample[1]['difficult']
			
 
				+            is_difficult = np.concatenate(
			
 
				+                (is_difficult1, is_difficult2), axis=0)
			
 
				+            result['difficult'] = is_difficult
			
 
				+
			
 
				+        if 'gt_ide' in sample[0]:
			
 
				+            gt_ide1 = sample[0]['gt_ide']
			
 
				+            gt_ide2 = sample[1]['gt_ide']
			
 
				+            gt_ide = np.concatenate((gt_ide1, gt_ide2), axis=0)
			
 
				+            result['gt_ide'] = gt_ide
			
 
				+        return result
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class NormalizeBox(BaseOperator):
			
 
				+    """Transform the bounding box's coornidates to [0,1]."""
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(NormalizeBox, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context):
			
 
				+        im = sample['image']
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+        height, width, _ = im.shape
			
 
				+        for i in range(gt_bbox.shape[0]):
			
 
				+            gt_bbox[i][0] = gt_bbox[i][0] / width
			
 
				+            gt_bbox[i][1] = gt_bbox[i][1] / height
			
 
				+            gt_bbox[i][2] = gt_bbox[i][2] / width
			
 
				+            gt_bbox[i][3] = gt_bbox[i][3] / height
			
 
				+        sample['gt_bbox'] = gt_bbox
			
 
				+
			
 
				+        if 'gt_keypoint' in sample.keys():
			
 
				+            gt_keypoint = sample['gt_keypoint']
			
 
				+
			
 
				+            for i in range(gt_keypoint.shape[1]):
			
 
				+                if i % 2:
			
 
				+                    gt_keypoint[:, i] = gt_keypoint[:, i] / height
			
 
				+                else:
			
 
				+                    gt_keypoint[:, i] = gt_keypoint[:, i] / width
			
 
				+            sample['gt_keypoint'] = gt_keypoint
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class BboxXYXY2XYWH(BaseOperator):
			
 
				+    """
			
 
				+    Convert bbox XYXY format to XYWH format.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(BboxXYXY2XYWH, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        assert 'gt_bbox' in sample
			
 
				+        bbox = sample['gt_bbox']
			
 
				+        bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
			
 
				+        bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
			
 
				+        sample['gt_bbox'] = bbox
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class PadBox(BaseOperator):
			
 
				+    def __init__(self, num_max_boxes=50):
			
 
				+        """
			
 
				+        Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
			
 
				+        Args:
			
 
				+            num_max_boxes (int): the max number of bboxes
			
 
				+        """
			
 
				+        self.num_max_boxes = num_max_boxes
			
 
				+        super(PadBox, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        assert 'gt_bbox' in sample
			
 
				+        bbox = sample['gt_bbox']
			
 
				+        gt_num = min(self.num_max_boxes, len(bbox))
			
 
				+        num_max = self.num_max_boxes
			
 
				+        # fields = context['fields'] if context else []
			
 
				+        pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
			
 
				+        if gt_num > 0:
			
 
				+            pad_bbox[:gt_num, :] = bbox[:gt_num, :]
			
 
				+        sample['gt_bbox'] = pad_bbox
			
 
				+        if 'gt_class' in sample:
			
 
				+            pad_class = np.zeros((num_max, ), dtype=np.int32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
			
 
				+            sample['gt_class'] = pad_class
			
 
				+        if 'gt_score' in sample:
			
 
				+            pad_score = np.zeros((num_max, ), dtype=np.float32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
			
 
				+            sample['gt_score'] = pad_score
			
 
				+        # in training, for example in op ExpandImage,
			
 
				+        # the bbox and gt_class is expandded, but the difficult is not,
			
 
				+        # so, judging by it's length
			
 
				+        if 'difficult' in sample:
			
 
				+            pad_diff = np.zeros((num_max, ), dtype=np.int32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
			
 
				+            sample['difficult'] = pad_diff
			
 
				+        if 'is_crowd' in sample:
			
 
				+            pad_crowd = np.zeros((num_max, ), dtype=np.int32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
			
 
				+            sample['is_crowd'] = pad_crowd
			
 
				+        if 'gt_ide' in sample:
			
 
				+            pad_ide = np.zeros((num_max, ), dtype=np.int32)
			
 
				+            if gt_num > 0:
			
 
				+                pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
			
 
				+            sample['gt_ide'] = pad_ide
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class DebugVisibleImage(BaseOperator):
			
 
				+    """
			
 
				+    In debug mode, visualize images according to `gt_box`.
			
 
				+    (Currently only supported when not cropping and flipping image.)
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, output_dir='output/debug', is_normalized=False):
			
 
				+        super(DebugVisibleImage, self).__init__()
			
 
				+        self.is_normalized = is_normalized
			
 
				+        self.output_dir = output_dir
			
 
				+        if not os.path.isdir(output_dir):
			
 
				+            os.makedirs(output_dir)
			
 
				+        if not isinstance(self.is_normalized, bool):
			
 
				+            raise TypeError("{}: input type is invalid.".format(self))
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        image = Image.fromarray(sample['image'].astype(np.uint8))
			
 
				+        out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
			
 
				+        width = sample['w']
			
 
				+        height = sample['h']
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+        gt_class = sample['gt_class']
			
 
				+        draw = ImageDraw.Draw(image)
			
 
				+        for i in range(gt_bbox.shape[0]):
			
 
				+            if self.is_normalized:
			
 
				+                gt_bbox[i][0] = gt_bbox[i][0] * width
			
 
				+                gt_bbox[i][1] = gt_bbox[i][1] * height
			
 
				+                gt_bbox[i][2] = gt_bbox[i][2] * width
			
 
				+                gt_bbox[i][3] = gt_bbox[i][3] * height
			
 
				+
			
 
				+            xmin, ymin, xmax, ymax = gt_bbox[i]
			
 
				+            draw.line(
			
 
				+                [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
			
 
				+                 (xmin, ymin)],
			
 
				+                width=2,
			
 
				+                fill='green')
			
 
				+            # draw label
			
 
				+            text = str(gt_class[i][0])
			
 
				+            tw, th = draw.textsize(text)
			
 
				+            draw.rectangle(
			
 
				+                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
			
 
				+            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
			
 
				+
			
 
				+        if 'gt_keypoint' in sample.keys():
			
 
				+            gt_keypoint = sample['gt_keypoint']
			
 
				+            if self.is_normalized:
			
 
				+                for i in range(gt_keypoint.shape[1]):
			
 
				+                    if i % 2:
			
 
				+                        gt_keypoint[:, i] = gt_keypoint[:, i] * height
			
 
				+                    else:
			
 
				+                        gt_keypoint[:, i] = gt_keypoint[:, i] * width
			
 
				+            for i in range(gt_keypoint.shape[0]):
			
 
				+                keypoint = gt_keypoint[i]
			
 
				+                for j in range(int(keypoint.shape[0] / 2)):
			
 
				+                    x1 = round(keypoint[2 * j]).astype(np.int32)
			
 
				+                    y1 = round(keypoint[2 * j + 1]).astype(np.int32)
			
 
				+                    draw.ellipse(
			
 
				+                        (x1, y1, x1 + 5, y1 + 5),
			
 
				+                        fill='green',
			
 
				+                        outline='green')
			
 
				+        save_path = os.path.join(self.output_dir, out_file_name)
			
 
				+        image.save(save_path, quality=95)
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Pad(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 size=None,
			
 
				+                 size_divisor=32,
			
 
				+                 pad_mode=0,
			
 
				+                 offsets=None,
			
 
				+                 fill_value=(127.5, 127.5, 127.5)):
			
 
				+        """
			
 
				+        Pad image to a specified size or multiple of size_divisor.
			
 
				+        Args:
			
 
				+            size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
			
 
				+            size_divisor (int): size divisor, default 32
			
 
				+            pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
			
 
				+                if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
			
 
				+            offsets (list): [offset_x, offset_y], specify offset while padding, only supported pad_mode=-1
			
 
				+            fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
			
 
				+        """
			
 
				+        super(Pad, self).__init__()
			
 
				+
			
 
				+        if not isinstance(size, (int, Sequence)):
			
 
				+            raise TypeError(
			
 
				+                "Type of target_size is invalid when random_size is True. \
			
 
				+                            Must be List, now is {}".format(type(size)))
			
 
				+
			
 
				+        if isinstance(size, int):
			
 
				+            size = [size, size]
			
 
				+
			
 
				+        assert pad_mode in [
			
 
				+            -1, 0, 1, 2
			
 
				+        ], 'currently only supports four modes [-1, 0, 1, 2]'
			
 
				+        if pad_mode == -1:
			
 
				+            assert offsets, 'if pad_mode is -1, offsets should not be None'
			
 
				+
			
 
				+        self.size = size
			
 
				+        self.size_divisor = size_divisor
			
 
				+        self.pad_mode = pad_mode
			
 
				+        self.fill_value = fill_value
			
 
				+        self.offsets = offsets
			
 
				+
			
 
				+    def apply_segm(self, segms, offsets, im_size, size):
			
 
				+        def _expand_poly(poly, x, y):
			
 
				+            expanded_poly = np.array(poly)
			
 
				+            expanded_poly[0::2] += x
			
 
				+            expanded_poly[1::2] += y
			
 
				+            return expanded_poly.tolist()
			
 
				+
			
 
				+        def _expand_rle(rle, x, y, height, width, h, w):
			
 
				+            if 'counts' in rle and type(rle['counts']) == list:
			
 
				+                rle = mask_util.frPyObjects(rle, height, width)
			
 
				+            mask = mask_util.decode(rle)
			
 
				+            expanded_mask = np.full((h, w), 0).astype(mask.dtype)
			
 
				+            expanded_mask[y:y + height, x:x + width] = mask
			
 
				+            rle = mask_util.encode(
			
 
				+                np.array(
			
 
				+                    expanded_mask, order='F', dtype=np.uint8))
			
 
				+            return rle
			
 
				+
			
 
				+        x, y = offsets
			
 
				+        height, width = im_size
			
 
				+        h, w = size
			
 
				+        expanded_segms = []
			
 
				+        for segm in segms:
			
 
				+            if is_poly(segm):
			
 
				+                # Polygon format
			
 
				+                expanded_segms.append(
			
 
				+                    [_expand_poly(poly, x, y) for poly in segm])
			
 
				+            else:
			
 
				+                # RLE format
			
 
				+                import pycocotools.mask as mask_util
			
 
				+                expanded_segms.append(
			
 
				+                    _expand_rle(segm, x, y, height, width, h, w))
			
 
				+        return expanded_segms
			
 
				+
			
 
				+    def apply_bbox(self, bbox, offsets):
			
 
				+        return bbox + np.array(offsets * 2, dtype=np.float32)
			
 
				+
			
 
				+    def apply_keypoint(self, keypoints, offsets):
			
 
				+        n = len(keypoints[0]) // 2
			
 
				+        return keypoints + np.array(offsets * n, dtype=np.float32)
			
 
				+
			
 
				+    def apply_image(self, image, offsets, im_size, size):
			
 
				+        x, y = offsets
			
 
				+        im_h, im_w = im_size
			
 
				+        h, w = size
			
 
				+        canvas = np.ones((h, w, 3), dtype=np.float32)
			
 
				+        canvas *= np.array(self.fill_value, dtype=np.float32)
			
 
				+        canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
			
 
				+        return canvas
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        im = sample['image']
			
 
				+        im_h, im_w = im.shape[:2]
			
 
				+        if self.size:
			
 
				+            h, w = self.size
			
 
				+            assert (
			
 
				+                im_h < h and im_w < w
			
 
				+            ), '(h, w) of target size should be greater than (im_h, im_w)'
			
 
				+        else:
			
 
				+            h = np.ceil(im_h / self.size_divisor) * self.size_divisor
			
 
				+            w = np.ceil(im_w / self.size_divisor) * self.size_divisor
			
 
				+
			
 
				+        if h == im_h and w == im_w:
			
 
				+            return sample
			
 
				+
			
 
				+        if self.pad_mode == -1:
			
 
				+            offset_x, offset_y = self.offsets
			
 
				+        elif self.pad_mode == 0:
			
 
				+            offset_y, offset_x = 0, 0
			
 
				+        elif self.pad_mode == 1:
			
 
				+            offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2
			
 
				+        else:
			
 
				+            offset_y, offset_x = h - im_h, w - im_w
			
 
				+
			
 
				+        offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w]
			
 
				+
			
 
				+        sample['image'] = self.apply_image(im, offsets, im_size, size)
			
 
				+
			
 
				+        if self.pad_mode == 0:
			
 
				+            return sample
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], offsets)
			
 
				+
			
 
				+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				+            sample['gt_poly'] = self.apply_segm(sample['gt_poly'], offsets,
			
 
				+                                                im_size, size)
			
 
				+
			
 
				+        if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
			
 
				+            sample['gt_keypoint'] = self.apply_keypoint(sample['gt_keypoint'],
			
 
				+                                                        offsets)
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Poly2Mask(BaseOperator):
			
 
				+    """
			
 
				+    gt poly to mask annotations
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(Poly2Mask, self).__init__()
			
 
				+        import pycocotools.mask as maskUtils
			
 
				+        self.maskutils = maskUtils
			
 
				+
			
 
				+    def _poly2mask(self, mask_ann, img_h, img_w):
			
 
				+        if isinstance(mask_ann, list):
			
 
				+            # polygon -- a single object might consist of multiple parts
			
 
				+            # we merge all parts into one mask rle code
			
 
				+            rles = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
			
 
				+            rle = self.maskutils.merge(rles)
			
 
				+        elif isinstance(mask_ann['counts'], list):
			
 
				+            # uncompressed RLE
			
 
				+            rle = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
			
 
				+        else:
			
 
				+            # rle
			
 
				+            rle = mask_ann
			
 
				+        mask = self.maskutils.decode(rle)
			
 
				+        return mask
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        assert 'gt_poly' in sample
			
 
				+        im_h = sample['h']
			
 
				+        im_w = sample['w']
			
 
				+        masks = [
			
 
				+            self._poly2mask(gt_poly, im_h, im_w)
			
 
				+            for gt_poly in sample['gt_poly']
			
 
				+        ]
			
 
				+        sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Rbox2Poly(BaseOperator):
			
 
				+    """
			
 
				+    Convert rbbox format to poly format.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(Rbox2Poly, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        assert 'gt_rbox' in sample
			
 
				+        assert sample['gt_rbox'].shape[1] == 5
			
 
				+        rrects = sample['gt_rbox']
			
 
				+        x_ctr = rrects[:, 0]
			
 
				+        y_ctr = rrects[:, 1]
			
 
				+        width = rrects[:, 2]
			
 
				+        height = rrects[:, 3]
			
 
				+        x1 = x_ctr - width / 2.0
			
 
				+        y1 = y_ctr - height / 2.0
			
 
				+        x2 = x_ctr + width / 2.0
			
 
				+        y2 = y_ctr + height / 2.0
			
 
				+        sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
			
 
				+        polys = bbox_utils.rbox2poly_np(rrects)
			
 
				+        sample['gt_rbox2poly'] = polys
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class AugmentHSV(BaseOperator):
			
 
				+    def __init__(self, fraction=0.50, is_bgr=True):
			
 
				+        """
			
 
				+        Augment the SV channel of image data.
			
 
				+        Args:
			
 
				+            fraction (float): the fraction for augment. Default: 0.5.
			
 
				+            is_bgr (bool): whether the image is BGR mode. Default: True.
			
 
				+        """
			
 
				+        super(AugmentHSV, self).__init__()
			
 
				+        self.fraction = fraction
			
 
				+        self.is_bgr = is_bgr
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        img = sample['image']
			
 
				+        if self.is_bgr:
			
 
				+            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
			
 
				+        else:
			
 
				+            img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
			
 
				+        S = img_hsv[:, :, 1].astype(np.float32)
			
 
				+        V = img_hsv[:, :, 2].astype(np.float32)
			
 
				+
			
 
				+        a = (random.random() * 2 - 1) * self.fraction + 1
			
 
				+        S *= a
			
 
				+        if a > 1:
			
 
				+            np.clip(S, a_min=0, a_max=255, out=S)
			
 
				+
			
 
				+        a = (random.random() * 2 - 1) * self.fraction + 1
			
 
				+        V *= a
			
 
				+        if a > 1:
			
 
				+            np.clip(V, a_min=0, a_max=255, out=V)
			
 
				+
			
 
				+        img_hsv[:, :, 1] = S.astype(np.uint8)
			
 
				+        img_hsv[:, :, 2] = V.astype(np.uint8)
			
 
				+        if self.is_bgr:
			
 
				+            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
			
 
				+        else:
			
 
				+            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
			
 
				+
			
 
				+        sample['image'] = img
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class Norm2PixelBbox(BaseOperator):
			
 
				+    """
			
 
				+    Transform the bounding box's coornidates which is in [0,1] to pixels.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(Norm2PixelBbox, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        assert 'gt_bbox' in sample
			
 
				+        bbox = sample['gt_bbox']
			
 
				+        height, width = sample['image'].shape[:2]
			
 
				+        bbox[:, 0::2] = bbox[:, 0::2] * width
			
 
				+        bbox[:, 1::2] = bbox[:, 1::2] * height
			
 
				+        sample['gt_bbox'] = bbox
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class BboxCXCYWH2XYXY(BaseOperator):
			
 
				+    """
			
 
				+    Convert bbox CXCYWH format to XYXY format.
			
 
				+    [center_x, center_y, width, height] -> [x0, y0, x1, y1]
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(BboxCXCYWH2XYXY, self).__init__()
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        assert 'gt_bbox' in sample
			
 
				+        bbox0 = sample['gt_bbox']
			
 
				+        bbox = bbox0.copy()
			
 
				+
			
 
				+        bbox[:, :2] = bbox0[:, :2] - bbox0[:, 2:4] / 2.
			
 
				+        bbox[:, 2:4] = bbox0[:, :2] + bbox0[:, 2:4] / 2.
			
 
				+        sample['gt_bbox'] = bbox
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomResizeCrop(BaseOperator):
			
 
				+    """Random resize and crop image and bboxes.
			
 
				+    Args:
			
 
				+        resizes (list): resize image to one of resizes. if keep_ratio is True and mode is
			
 
				+        'long', resize the image's long side to the maximum of target_size, if keep_ratio is
			
 
				+        True and mode is 'short', resize the image's short side to the minimum of target_size.
			
 
				+        cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
			
 
				+        mode (str): resize mode, `long` or `short`. Details see resizes.
			
 
				+        prob (float): probability of this op.
			
 
				+        keep_ratio (bool): whether keep_ratio or not, default true
			
 
				+        interp (int): the interpolation method
			
 
				+        thresholds (list): iou thresholds for decide a valid bbox crop.
			
 
				+        num_attempts (int): number of tries before giving up.
			
 
				+        allow_no_crop (bool): allow return without actually cropping them.
			
 
				+        cover_all_box (bool): ensure all bboxes are covered in the final crop.
			
 
				+        is_mask_crop(bool): whether crop the segmentation.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            resizes,
			
 
				+            cropsizes,
			
 
				+            prob=0.5,
			
 
				+            mode='short',
			
 
				+            keep_ratio=True,
			
 
				+            interp=cv2.INTER_LINEAR,
			
 
				+            num_attempts=3,
			
 
				+            cover_all_box=False,
			
 
				+            allow_no_crop=False,
			
 
				+            thresholds=[0.3, 0.5, 0.7],
			
 
				+            is_mask_crop=False, ):
			
 
				+        super(RandomResizeCrop, self).__init__()
			
 
				+
			
 
				+        self.resizes = resizes
			
 
				+        self.cropsizes = cropsizes
			
 
				+        self.prob = prob
			
 
				+        self.mode = mode
			
 
				+
			
 
				+        self.resizer = Resize(0, keep_ratio=keep_ratio, interp=interp)
			
 
				+        self.croper = RandomCrop(
			
 
				+            num_attempts=num_attempts,
			
 
				+            cover_all_box=cover_all_box,
			
 
				+            thresholds=thresholds,
			
 
				+            allow_no_crop=allow_no_crop,
			
 
				+            is_mask_crop=is_mask_crop)
			
 
				+
			
 
				+    def _format_size(self, size):
			
 
				+        if isinstance(size, Integral):
			
 
				+            size = (size, size)
			
 
				+        return size
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        if random.random() < self.prob:
			
 
				+            _resize = self._format_size(random.choice(self.resizes))
			
 
				+            _cropsize = self._format_size(random.choice(self.cropsizes))
			
 
				+            sample = self._resize(
			
 
				+                self.resizer,
			
 
				+                sample,
			
 
				+                size=_resize,
			
 
				+                mode=self.mode,
			
 
				+                context=context)
			
 
				+            sample = self._random_crop(
			
 
				+                self.croper, sample, size=_cropsize, context=context)
			
 
				+        return sample
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _random_crop(croper, sample, size, context=None):
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
			
 
				+            return sample
			
 
				+
			
 
				+        self = croper
			
 
				+        h, w = sample['image'].shape[:2]
			
 
				+        gt_bbox = sample['gt_bbox']
			
 
				+        cropsize = size
			
 
				+        min_crop = min(cropsize)
			
 
				+        max_crop = max(cropsize)
			
 
				+
			
 
				+        thresholds = list(self.thresholds)
			
 
				+        np.random.shuffle(thresholds)
			
 
				+
			
 
				+        for thresh in thresholds:
			
 
				+            found = False
			
 
				+            for _ in range(self.num_attempts):
			
 
				+
			
 
				+                crop_h = random.randint(min_crop, min(h, max_crop))
			
 
				+                crop_w = random.randint(min_crop, min(w, max_crop))
			
 
				+
			
 
				+                crop_y = random.randint(0, h - crop_h)
			
 
				+                crop_x = random.randint(0, w - crop_w)
			
 
				+
			
 
				+                crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
			
 
				+                iou = self._iou_matrix(
			
 
				+                    gt_bbox, np.array(
			
 
				+                        [crop_box], dtype=np.float32))
			
 
				+                if iou.max() < thresh:
			
 
				+                    continue
			
 
				+
			
 
				+                if self.cover_all_box and iou.min() < thresh:
			
 
				+                    continue
			
 
				+
			
 
				+                cropped_box, valid_ids = self._crop_box_with_center_constraint(
			
 
				+                    gt_bbox, np.array(
			
 
				+                        crop_box, dtype=np.float32))
			
 
				+                if valid_ids.size > 0:
			
 
				+                    found = True
			
 
				+                    break
			
 
				+
			
 
				+            if found:
			
 
				+                if self.is_mask_crop and 'gt_poly' in sample and len(sample[
			
 
				+                        'gt_poly']) > 0:
			
 
				+                    crop_polys = self.crop_segms(
			
 
				+                        sample['gt_poly'],
			
 
				+                        valid_ids,
			
 
				+                        np.array(
			
 
				+                            crop_box, dtype=np.int64),
			
 
				+                        h,
			
 
				+                        w)
			
 
				+                    if [] in crop_polys:
			
 
				+                        delete_id = list()
			
 
				+                        valid_polys = list()
			
 
				+                        for id, crop_poly in enumerate(crop_polys):
			
 
				+                            if crop_poly == []:
			
 
				+                                delete_id.append(id)
			
 
				+                            else:
			
 
				+                                valid_polys.append(crop_poly)
			
 
				+                        valid_ids = np.delete(valid_ids, delete_id)
			
 
				+                        if len(valid_polys) == 0:
			
 
				+                            return sample
			
 
				+                        sample['gt_poly'] = valid_polys
			
 
				+                    else:
			
 
				+                        sample['gt_poly'] = crop_polys
			
 
				+
			
 
				+                if 'gt_segm' in sample:
			
 
				+                    sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
			
 
				+                                                        crop_box)
			
 
				+                    sample['gt_segm'] = np.take(
			
 
				+                        sample['gt_segm'], valid_ids, axis=0)
			
 
				+
			
 
				+                sample['image'] = self._crop_image(sample['image'], crop_box)
			
 
				+                sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
			
 
				+                sample['gt_class'] = np.take(
			
 
				+                    sample['gt_class'], valid_ids, axis=0)
			
 
				+                if 'gt_score' in sample:
			
 
				+                    sample['gt_score'] = np.take(
			
 
				+                        sample['gt_score'], valid_ids, axis=0)
			
 
				+
			
 
				+                if 'is_crowd' in sample:
			
 
				+                    sample['is_crowd'] = np.take(
			
 
				+                        sample['is_crowd'], valid_ids, axis=0)
			
 
				+                return sample
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _resize(resizer, sample, size, mode='short', context=None):
			
 
				+        self = resizer
			
 
				+        im = sample['image']
			
 
				+        target_size = size
			
 
				+
			
 
				+        if not isinstance(im, np.ndarray):
			
 
				+            raise TypeError("{}: image type is not numpy.".format(self))
			
 
				+        if len(im.shape) != 3:
			
 
				+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
			
 
				+
			
 
				+        # apply image
			
 
				+        im_shape = im.shape
			
 
				+        if self.keep_ratio:
			
 
				+
			
 
				+            im_size_min = np.min(im_shape[0:2])
			
 
				+            im_size_max = np.max(im_shape[0:2])
			
 
				+
			
 
				+            target_size_min = np.min(target_size)
			
 
				+            target_size_max = np.max(target_size)
			
 
				+
			
 
				+            if mode == 'long':
			
 
				+                im_scale = min(target_size_min / im_size_min,
			
 
				+                               target_size_max / im_size_max)
			
 
				+            else:
			
 
				+                im_scale = max(target_size_min / im_size_min,
			
 
				+                               target_size_max / im_size_max)
			
 
				+
			
 
				+            resize_h = im_scale * float(im_shape[0])
			
 
				+            resize_w = im_scale * float(im_shape[1])
			
 
				+
			
 
				+            im_scale_x = im_scale
			
 
				+            im_scale_y = im_scale
			
 
				+        else:
			
 
				+            resize_h, resize_w = target_size
			
 
				+            im_scale_y = resize_h / im_shape[0]
			
 
				+            im_scale_x = resize_w / im_shape[1]
			
 
				+
			
 
				+        im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
			
 
				+        sample['image'] = im
			
 
				+        sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
			
 
				+        if 'scale_factor' in sample:
			
 
				+            scale_factor = sample['scale_factor']
			
 
				+            sample['scale_factor'] = np.asarray(
			
 
				+                [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
			
 
				+                dtype=np.float32)
			
 
				+        else:
			
 
				+            sample['scale_factor'] = np.asarray(
			
 
				+                [im_scale_y, im_scale_x], dtype=np.float32)
			
 
				+
			
 
				+        # apply bbox
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
			
 
				+                                                [im_scale_x, im_scale_y],
			
 
				+                                                [resize_w, resize_h])
			
 
				+
			
 
				+        # apply rbox
			
 
				+        if 'gt_rbox2poly' in sample:
			
 
				+            if np.array(sample['gt_rbox2poly']).shape[1] != 8:
			
 
				+                logger.warn(
			
 
				+                    "gt_rbox2poly's length shoule be 8, but actually is {}".
			
 
				+                    format(len(sample['gt_rbox2poly'])))
			
 
				+            sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
			
 
				+                                                     [im_scale_x, im_scale_y],
			
 
				+                                                     [resize_w, resize_h])
			
 
				+
			
 
				+        # apply polygon
			
 
				+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				+            sample['gt_poly'] = self.apply_segm(
			
 
				+                sample['gt_poly'], im_shape[:2], [im_scale_x, im_scale_y])
			
 
				+
			
 
				+        # apply semantic
			
 
				+        if 'semantic' in sample and sample['semantic']:
			
 
				+            semantic = sample['semantic']
			
 
				+            semantic = cv2.resize(
			
 
				+                semantic.astype('float32'),
			
 
				+                None,
			
 
				+                None,
			
 
				+                fx=im_scale_x,
			
 
				+                fy=im_scale_y,
			
 
				+                interpolation=self.interp)
			
 
				+            semantic = np.asarray(semantic).astype('int32')
			
 
				+            semantic = np.expand_dims(semantic, 0)
			
 
				+            sample['semantic'] = semantic
			
 
				+
			
 
				+        # apply gt_segm
			
 
				+        if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
			
 
				+            masks = [
			
 
				+                cv2.resize(
			
 
				+                    gt_segm,
			
 
				+                    None,
			
 
				+                    None,
			
 
				+                    fx=im_scale_x,
			
 
				+                    fy=im_scale_y,
			
 
				+                    interpolation=cv2.INTER_NEAREST)
			
 
				+                for gt_segm in sample['gt_segm']
			
 
				+            ]
			
 
				+            sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomSelect(BaseOperator):
			
 
				+    """
			
 
				+    Randomly choose a transformation between transforms1 and transforms2,
			
 
				+    and the probability of choosing transforms1 is p.
			
 
				+
			
 
				+    The code is based on https://github.com/facebookresearch/detr/blob/main/datasets/transforms.py
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, transforms1, transforms2, p=0.5):
			
 
				+        super(RandomSelect, self).__init__()
			
 
				+        self.transforms1 = Compose(transforms1)
			
 
				+        self.transforms2 = Compose(transforms2)
			
 
				+        self.p = p
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        if random.random() < self.p:
			
 
				+            return self.transforms1(sample)
			
 
				+        return self.transforms2(sample)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomShortSideResize(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 short_side_sizes,
			
 
				+                 max_size=None,
			
 
				+                 interp=cv2.INTER_LINEAR,
			
 
				+                 random_interp=False):
			
 
				+        """
			
 
				+        Resize the image randomly according to the short side. If max_size is not None,
			
 
				+        the long side is scaled according to max_size. The whole process will be keep ratio.
			
 
				+        Args:
			
 
				+            short_side_sizes (list|tuple): Image target short side size.
			
 
				+            max_size (int): The size of the longest side of image after resize.
			
 
				+            interp (int): The interpolation method.
			
 
				+            random_interp (bool): Whether random select interpolation method.
			
 
				+        """
			
 
				+        super(RandomShortSideResize, self).__init__()
			
 
				+
			
 
				+        assert isinstance(short_side_sizes,
			
 
				+                          Sequence), "short_side_sizes must be List or Tuple"
			
 
				+
			
 
				+        self.short_side_sizes = short_side_sizes
			
 
				+        self.max_size = max_size
			
 
				+        self.interp = interp
			
 
				+        self.random_interp = random_interp
			
 
				+        self.interps = [
			
 
				+            cv2.INTER_NEAREST,
			
 
				+            cv2.INTER_LINEAR,
			
 
				+            cv2.INTER_AREA,
			
 
				+            cv2.INTER_CUBIC,
			
 
				+            cv2.INTER_LANCZOS4,
			
 
				+        ]
			
 
				+
			
 
				+    def get_size_with_aspect_ratio(self, image_shape, size, max_size=None):
			
 
				+        h, w = image_shape
			
 
				+        if max_size is not None:
			
 
				+            min_original_size = float(min((w, h)))
			
 
				+            max_original_size = float(max((w, h)))
			
 
				+            if max_original_size / min_original_size * size > max_size:
			
 
				+                size = int(
			
 
				+                    round(max_size * min_original_size / max_original_size))
			
 
				+
			
 
				+        if (w <= h and w == size) or (h <= w and h == size):
			
 
				+            return (w, h)
			
 
				+
			
 
				+        if w < h:
			
 
				+            ow = size
			
 
				+            oh = int(size * h / w)
			
 
				+        else:
			
 
				+            oh = size
			
 
				+            ow = int(size * w / h)
			
 
				+
			
 
				+        return (ow, oh)
			
 
				+
			
 
				+    def resize(self,
			
 
				+               sample,
			
 
				+               target_size,
			
 
				+               max_size=None,
			
 
				+               interp=cv2.INTER_LINEAR):
			
 
				+        im = sample['image']
			
 
				+        if not isinstance(im, np.ndarray):
			
 
				+            raise TypeError("{}: image type is not numpy.".format(self))
			
 
				+        if len(im.shape) != 3:
			
 
				+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
			
 
				+
			
 
				+        target_size = self.get_size_with_aspect_ratio(im.shape[:2],
			
 
				+                                                      target_size, max_size)
			
 
				+        im_scale_y, im_scale_x = target_size[1] / im.shape[0], target_size[
			
 
				+            0] / im.shape[1]
			
 
				+
			
 
				+        sample['image'] = cv2.resize(im, target_size, interpolation=interp)
			
 
				+        sample['im_shape'] = np.asarray(target_size[::-1], dtype=np.float32)
			
 
				+        if 'scale_factor' in sample:
			
 
				+            scale_factor = sample['scale_factor']
			
 
				+            sample['scale_factor'] = np.asarray(
			
 
				+                [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
			
 
				+                dtype=np.float32)
			
 
				+        else:
			
 
				+            sample['scale_factor'] = np.asarray(
			
 
				+                [im_scale_y, im_scale_x], dtype=np.float32)
			
 
				+
			
 
				+        # apply bbox
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            sample['gt_bbox'] = self.apply_bbox(
			
 
				+                sample['gt_bbox'], [im_scale_x, im_scale_y], target_size)
			
 
				+        # apply polygon
			
 
				+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				+            sample['gt_poly'] = self.apply_segm(
			
 
				+                sample['gt_poly'], im.shape[:2], [im_scale_x, im_scale_y])
			
 
				+        # apply semantic
			
 
				+        if 'semantic' in sample and sample['semantic']:
			
 
				+            semantic = sample['semantic']
			
 
				+            semantic = cv2.resize(
			
 
				+                semantic.astype('float32'),
			
 
				+                target_size,
			
 
				+                interpolation=self.interp)
			
 
				+            semantic = np.asarray(semantic).astype('int32')
			
 
				+            semantic = np.expand_dims(semantic, 0)
			
 
				+            sample['semantic'] = semantic
			
 
				+        # apply gt_segm
			
 
				+        if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
			
 
				+            masks = [
			
 
				+                cv2.resize(
			
 
				+                    gt_segm, target_size, interpolation=cv2.INTER_NEAREST)
			
 
				+                for gt_segm in sample['gt_segm']
			
 
				+            ]
			
 
				+            sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
			
 
				+        return sample
			
 
				+
			
 
				+    def apply_bbox(self, bbox, scale, size):
			
 
				+        im_scale_x, im_scale_y = scale
			
 
				+        resize_w, resize_h = size
			
 
				+        bbox[:, 0::2] *= im_scale_x
			
 
				+        bbox[:, 1::2] *= im_scale_y
			
 
				+        bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
			
 
				+        bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
			
 
				+        return bbox.astype('float32')
			
 
				+
			
 
				+    def apply_segm(self, segms, im_size, scale):
			
 
				+        def _resize_poly(poly, im_scale_x, im_scale_y):
			
 
				+            resized_poly = np.array(poly).astype('float32')
			
 
				+            resized_poly[0::2] *= im_scale_x
			
 
				+            resized_poly[1::2] *= im_scale_y
			
 
				+            return resized_poly.tolist()
			
 
				+
			
 
				+        def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
			
 
				+            if 'counts' in rle and type(rle['counts']) == list:
			
 
				+                rle = mask_util.frPyObjects(rle, im_h, im_w)
			
 
				+
			
 
				+            mask = mask_util.decode(rle)
			
 
				+            mask = cv2.resize(
			
 
				+                mask,
			
 
				+                None,
			
 
				+                None,
			
 
				+                fx=im_scale_x,
			
 
				+                fy=im_scale_y,
			
 
				+                interpolation=self.interp)
			
 
				+            rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
			
 
				+            return rle
			
 
				+
			
 
				+        im_h, im_w = im_size
			
 
				+        im_scale_x, im_scale_y = scale
			
 
				+        resized_segms = []
			
 
				+        for segm in segms:
			
 
				+            if is_poly(segm):
			
 
				+                # Polygon format
			
 
				+                resized_segms.append([
			
 
				+                    _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
			
 
				+                ])
			
 
				+            else:
			
 
				+                # RLE format
			
 
				+                import pycocotools.mask as mask_util
			
 
				+                resized_segms.append(
			
 
				+                    _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
			
 
				+
			
 
				+        return resized_segms
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        target_size = random.choice(self.short_side_sizes)
			
 
				+        interp = random.choice(
			
 
				+            self.interps) if self.random_interp else self.interp
			
 
				+
			
 
				+        return self.resize(sample, target_size, self.max_size, interp)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class RandomSizeCrop(BaseOperator):
			
 
				+    """
			
 
				+    Cut the image randomly according to `min_size` and `max_size`
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, min_size, max_size):
			
 
				+        super(RandomSizeCrop, self).__init__()
			
 
				+        self.min_size = min_size
			
 
				+        self.max_size = max_size
			
 
				+
			
 
				+        from paddle.vision.transforms.functional import crop as paddle_crop
			
 
				+        self.paddle_crop = paddle_crop
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def get_crop_params(img_shape, output_size):
			
 
				+        """Get parameters for ``crop`` for a random crop.
			
 
				+        Args:
			
 
				+            img_shape (list|tuple): Image's height and width.
			
 
				+            output_size (list|tuple): Expected output size of the crop.
			
 
				+        Returns:
			
 
				+            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
			
 
				+        """
			
 
				+        h, w = img_shape
			
 
				+        th, tw = output_size
			
 
				+
			
 
				+        if h + 1 < th or w + 1 < tw:
			
 
				+            raise ValueError(
			
 
				+                "Required crop size {} is larger then input image size {}".
			
 
				+                format((th, tw), (h, w)))
			
 
				+
			
 
				+        if w == tw and h == th:
			
 
				+            return 0, 0, h, w
			
 
				+
			
 
				+        i = random.randint(0, h - th + 1)
			
 
				+        j = random.randint(0, w - tw + 1)
			
 
				+        return i, j, th, tw
			
 
				+
			
 
				+    def crop(self, sample, region):
			
 
				+        image_shape = sample['image'].shape[:2]
			
 
				+        sample['image'] = self.paddle_crop(sample['image'], *region)
			
 
				+
			
 
				+        keep_index = None
			
 
				+        # apply bbox
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
			
 
				+            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], region)
			
 
				+            bbox = sample['gt_bbox'].reshape([-1, 2, 2])
			
 
				+            area = (bbox[:, 1, :] - bbox[:, 0, :]).prod(axis=1)
			
 
				+            keep_index = np.where(area > 0)[0]
			
 
				+            sample['gt_bbox'] = sample['gt_bbox'][keep_index] if len(
			
 
				+                keep_index) > 0 else np.zeros(
			
 
				+                    [0, 4], dtype=np.float32)
			
 
				+            sample['gt_class'] = sample['gt_class'][keep_index] if len(
			
 
				+                keep_index) > 0 else np.zeros(
			
 
				+                    [0, 1], dtype=np.float32)
			
 
				+            if 'gt_score' in sample:
			
 
				+                sample['gt_score'] = sample['gt_score'][keep_index] if len(
			
 
				+                    keep_index) > 0 else np.zeros(
			
 
				+                        [0, 1], dtype=np.float32)
			
 
				+            if 'is_crowd' in sample:
			
 
				+                sample['is_crowd'] = sample['is_crowd'][keep_index] if len(
			
 
				+                    keep_index) > 0 else np.zeros(
			
 
				+                        [0, 1], dtype=np.float32)
			
 
				+
			
 
				+        # apply polygon
			
 
				+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
			
 
				+            sample['gt_poly'] = self.apply_segm(sample['gt_poly'], region,
			
 
				+                                                image_shape)
			
 
				+            if keep_index is not None:
			
 
				+                sample['gt_poly'] = sample['gt_poly'][keep_index]
			
 
				+        # apply gt_segm
			
 
				+        if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
			
 
				+            i, j, h, w = region
			
 
				+            sample['gt_segm'] = sample['gt_segm'][:, i:i + h, j:j + w]
			
 
				+            if keep_index is not None:
			
 
				+                sample['gt_segm'] = sample['gt_segm'][keep_index]
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+    def apply_bbox(self, bbox, region):
			
 
				+        i, j, h, w = region
			
 
				+        region_size = np.asarray([w, h])
			
 
				+        crop_bbox = bbox - np.asarray([j, i, j, i])
			
 
				+        crop_bbox = np.minimum(crop_bbox.reshape([-1, 2, 2]), region_size)
			
 
				+        crop_bbox = crop_bbox.clip(min=0)
			
 
				+        return crop_bbox.reshape([-1, 4]).astype('float32')
			
 
				+
			
 
				+    def apply_segm(self, segms, region, image_shape):
			
 
				+        def _crop_poly(segm, crop):
			
 
				+            xmin, ymin, xmax, ymax = crop
			
 
				+            crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
			
 
				+            crop_p = np.array(crop_coord).reshape(4, 2)
			
 
				+            crop_p = Polygon(crop_p)
			
 
				+
			
 
				+            crop_segm = list()
			
 
				+            for poly in segm:
			
 
				+                poly = np.array(poly).reshape(len(poly) // 2, 2)
			
 
				+                polygon = Polygon(poly)
			
 
				+                if not polygon.is_valid:
			
 
				+                    exterior = polygon.exterior
			
 
				+                    multi_lines = exterior.intersection(exterior)
			
 
				+                    polygons = shapely.ops.polygonize(multi_lines)
			
 
				+                    polygon = MultiPolygon(polygons)
			
 
				+                multi_polygon = list()
			
 
				+                if isinstance(polygon, MultiPolygon):
			
 
				+                    multi_polygon = copy.deepcopy(polygon)
			
 
				+                else:
			
 
				+                    multi_polygon.append(copy.deepcopy(polygon))
			
 
				+                for per_polygon in multi_polygon:
			
 
				+                    inter = per_polygon.intersection(crop_p)
			
 
				+                    if not inter:
			
 
				+                        continue
			
 
				+                    if isinstance(inter, (MultiPolygon, GeometryCollection)):
			
 
				+                        for part in inter:
			
 
				+                            if not isinstance(part, Polygon):
			
 
				+                                continue
			
 
				+                            part = np.squeeze(
			
 
				+                                np.array(part.exterior.coords[:-1]).reshape(
			
 
				+                                    1, -1))
			
 
				+                            part[0::2] -= xmin
			
 
				+                            part[1::2] -= ymin
			
 
				+                            crop_segm.append(part.tolist())
			
 
				+                    elif isinstance(inter, Polygon):
			
 
				+                        crop_poly = np.squeeze(
			
 
				+                            np.array(inter.exterior.coords[:-1]).reshape(1,
			
 
				+                                                                         -1))
			
 
				+                        crop_poly[0::2] -= xmin
			
 
				+                        crop_poly[1::2] -= ymin
			
 
				+                        crop_segm.append(crop_poly.tolist())
			
 
				+                    else:
			
 
				+                        continue
			
 
				+            return crop_segm
			
 
				+
			
 
				+        def _crop_rle(rle, crop, height, width):
			
 
				+            if 'counts' in rle and type(rle['counts']) == list:
			
 
				+                rle = mask_util.frPyObjects(rle, height, width)
			
 
				+            mask = mask_util.decode(rle)
			
 
				+            mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
			
 
				+            rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
			
 
				+            return rle
			
 
				+
			
 
				+        i, j, h, w = region
			
 
				+        crop = [j, i, j + w, i + h]
			
 
				+        height, width = image_shape
			
 
				+        crop_segms = []
			
 
				+        for segm in segms:
			
 
				+            if is_poly(segm):
			
 
				+                import copy
			
 
				+                import shapely.ops
			
 
				+                from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
			
 
				+                # Polygon format
			
 
				+                crop_segms.append(_crop_poly(segm, crop))
			
 
				+            else:
			
 
				+                # RLE format
			
 
				+                import pycocotools.mask as mask_util
			
 
				+                crop_segms.append(_crop_rle(segm, crop, height, width))
			
 
				+        return crop_segms
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        h = random.randint(self.min_size,
			
 
				+                           min(sample['image'].shape[0], self.max_size))
			
 
				+        w = random.randint(self.min_size,
			
 
				+                           min(sample['image'].shape[1], self.max_size))
			
 
				+
			
 
				+        region = self.get_crop_params(sample['image'].shape[:2], [h, w])
			
 
				+        return self.crop(sample, region)
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class WarpAffine(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 keep_res=False,
			
 
				+                 pad=31,
			
 
				+                 input_h=512,
			
 
				+                 input_w=512,
			
 
				+                 scale=0.4,
			
 
				+                 shift=0.1):
			
 
				+        """WarpAffine
			
 
				+        Warp affine the image
			
 
				+
			
 
				+        The code is based on https://github.com/xingyizhou/CenterNet/blob/master/src/lib/datasets/sample/ctdet.py
			
 
				+
			
 
				+
			
 
				+        """
			
 
				+        super(WarpAffine, self).__init__()
			
 
				+        self.keep_res = keep_res
			
 
				+        self.pad = pad
			
 
				+        self.input_h = input_h
			
 
				+        self.input_w = input_w
			
 
				+        self.scale = scale
			
 
				+        self.shift = shift
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        img = sample['image']
			
 
				+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
			
 
				+            return sample
			
 
				+
			
 
				+        h, w = img.shape[:2]
			
 
				+
			
 
				+        if self.keep_res:
			
 
				+            input_h = (h | self.pad) + 1
			
 
				+            input_w = (w | self.pad) + 1
			
 
				+            s = np.array([input_w, input_h], dtype=np.float32)
			
 
				+            c = np.array([w // 2, h // 2], dtype=np.float32)
			
 
				+
			
 
				+        else:
			
 
				+            s = max(h, w) * 1.0
			
 
				+            input_h, input_w = self.input_h, self.input_w
			
 
				+            c = np.array([w / 2., h / 2.], dtype=np.float32)
			
 
				+
			
 
				+        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
			
 
				+        img = cv2.resize(img, (w, h))
			
 
				+        inp = cv2.warpAffine(
			
 
				+            img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
			
 
				+        sample['image'] = inp
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class FlipWarpAffine(BaseOperator):
			
 
				+    def __init__(self,
			
 
				+                 keep_res=False,
			
 
				+                 pad=31,
			
 
				+                 input_h=512,
			
 
				+                 input_w=512,
			
 
				+                 not_rand_crop=False,
			
 
				+                 scale=0.4,
			
 
				+                 shift=0.1,
			
 
				+                 flip=0.5,
			
 
				+                 is_scale=True,
			
 
				+                 use_random=True):
			
 
				+        """FlipWarpAffine
			
 
				+        1. Random Crop
			
 
				+        2. Flip the image horizontal
			
 
				+        3. Warp affine the image
			
 
				+        """
			
 
				+        super(FlipWarpAffine, self).__init__()
			
 
				+        self.keep_res = keep_res
			
 
				+        self.pad = pad
			
 
				+        self.input_h = input_h
			
 
				+        self.input_w = input_w
			
 
				+        self.not_rand_crop = not_rand_crop
			
 
				+        self.scale = scale
			
 
				+        self.shift = shift
			
 
				+        self.flip = flip
			
 
				+        self.is_scale = is_scale
			
 
				+        self.use_random = use_random
			
 
				+
			
 
				+    def apply(self, sample, context=None):
			
 
				+        img = sample['image']
			
 
				+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				+        if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
			
 
				+            return sample
			
 
				+
			
 
				+        h, w = img.shape[:2]
			
 
				+
			
 
				+        if self.keep_res:
			
 
				+            input_h = (h | self.pad) + 1
			
 
				+            input_w = (w | self.pad) + 1
			
 
				+            s = np.array([input_w, input_h], dtype=np.float32)
			
 
				+            c = np.array([w // 2, h // 2], dtype=np.float32)
			
 
				+
			
 
				+        else:
			
 
				+            s = max(h, w) * 1.0
			
 
				+            input_h, input_w = self.input_h, self.input_w
			
 
				+            c = np.array([w / 2., h / 2.], dtype=np.float32)
			
 
				+
			
 
				+        if self.use_random:
			
 
				+            gt_bbox = sample['gt_bbox']
			
 
				+            if not self.not_rand_crop:
			
 
				+                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
			
 
				+                w_border = get_border(128, w)
			
 
				+                h_border = get_border(128, h)
			
 
				+                c[0] = np.random.randint(low=w_border, high=w - w_border)
			
 
				+                c[1] = np.random.randint(low=h_border, high=h - h_border)
			
 
				+            else:
			
 
				+                sf = self.scale
			
 
				+                cf = self.shift
			
 
				+                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
			
 
				+                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
			
 
				+                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
			
 
				+
			
 
				+            if np.random.random() < self.flip:
			
 
				+                img = img[:, ::-1, :]
			
 
				+                c[0] = w - c[0] - 1
			
 
				+                oldx1 = gt_bbox[:, 0].copy()
			
 
				+                oldx2 = gt_bbox[:, 2].copy()
			
 
				+                gt_bbox[:, 0] = w - oldx2 - 1
			
 
				+                gt_bbox[:, 2] = w - oldx1 - 1
			
 
				+            sample['gt_bbox'] = gt_bbox
			
 
				+
			
 
				+        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
			
 
				+        if not self.use_random:
			
 
				+            img = cv2.resize(img, (w, h))
			
 
				+        inp = cv2.warpAffine(
			
 
				+            img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
			
 
				+        if self.is_scale:
			
 
				+            inp = (inp.astype(np.float32) / 255.)
			
 
				+        sample['image'] = inp
			
 
				+        sample['center'] = c
			
 
				+        sample['scale'] = s
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				+@register_op
			
 
				+class CenterRandColor(BaseOperator):
			
 
				+    """Random color for CenterNet series models.
			
 
				+    Args:
			
 
				+        saturation (float): saturation settings.
			
 
				+        contrast (float): contrast settings.
			
 
				+        brightness (float): brightness settings.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, saturation=0.4, contrast=0.4, brightness=0.4):
			
 
				+        super(CenterRandColor, self).__init__()
			
 
				+        self.saturation = saturation
			
 
				+        self.contrast = contrast
			
 
				+        self.brightness = brightness
			
 
				+
			
 
				+    def apply_saturation(self, img, img_gray):
			
 
				+        alpha = 1. + np.random.uniform(
			
 
				+            low=-self.saturation, high=self.saturation)
			
 
				+        self._blend(alpha, img, img_gray[:, :, None])
			
 
				+        return img
			
 
				+
			
 
				+    def apply_contrast(self, img, img_gray):
			
 
				+        alpha = 1. + np.random.uniform(low=-self.contrast, high=self.contrast)
			
 
				+        img_mean = img_gray.mean()
			
 
				+        self._blend(alpha, img, img_mean)
			
 
				+        return img
			
 
				+
			
 
				+    def apply_brightness(self, img, img_gray):
			
 
				+        alpha = 1 + np.random.uniform(
			
 
				+            low=-self.brightness, high=self.brightness)
			
 
				+        img *= alpha
			
 
				+        return img
			
 
				+
			
 
				+    def _blend(self, alpha, img, img_mean):
			
 
				+        img *= alpha
			
 
				+        img_mean *= (1 - alpha)
			
 
				+        img += img_mean
			
 
				+
			
 
				+    def __call__(self, sample, context=None):
			
 
				+        img = sample['image']
			
 
				+        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
			
 
				+        functions = [
			
 
				+            self.apply_brightness,
			
 
				+            self.apply_contrast,
			
 
				+            self.apply_saturation,
			
 
				+        ]
			
 
				+        distortions = np.random.permutation(functions)
			
 
				+        for func in distortions:
			
 
				+            img = func(img, img_gray)
			
 
				+        sample['image'] = img
			
 
				+        return sample
			
--- a/paddlers/models/ppdet/engine/__init__.py
+++ b/paddlers/models/ppdet/engine/__init__.py
@@ -0,0 +1,30 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import trainer
			
 
				+from .trainer import *
			
 
				+
			
 
				+from . import callbacks
			
 
				+from .callbacks import *
			
 
				+
			
 
				+from . import env
			
 
				+from .env import *
			
 
				+
			
 
				+__all__ = trainer.__all__ \
			
 
				+        + callbacks.__all__ \
			
 
				+        + env.__all__
			
 
				+
			
 
				+from . import tracker
			
 
				+from .tracker import *
			
 
				+__all__ = __all__ + tracker.__all__
			
--- a/paddlers/models/ppdet/engine/callbacks.py
+++ b/paddlers/models/ppdet/engine/callbacks.py
@@ -0,0 +1,340 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import datetime
			
 
				+import six
			
 
				+import copy
			
 
				+import json
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.distributed as dist
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.checkpoint import save_model
			
 
				+from paddlers.models.ppdet.metrics import get_infer_results
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger('ppdet.engine')
			
 
				+
			
 
				+__all__ = [
			
 
				+    'Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer',
			
 
				+    'VisualDLWriter', 'SniperProposalsGenerator'
			
 
				+]
			
 
				+
			
 
				+
			
 
				+class Callback(object):
			
 
				+    def __init__(self, model):
			
 
				+        self.model = model
			
 
				+
			
 
				+    def on_step_begin(self, status):
			
 
				+        pass
			
 
				+
			
 
				+    def on_step_end(self, status):
			
 
				+        pass
			
 
				+
			
 
				+    def on_epoch_begin(self, status):
			
 
				+        pass
			
 
				+
			
 
				+    def on_epoch_end(self, status):
			
 
				+        pass
			
 
				+
			
 
				+    def on_train_begin(self, status):
			
 
				+        pass
			
 
				+
			
 
				+    def on_train_end(self, status):
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+class ComposeCallback(object):
			
 
				+    def __init__(self, callbacks):
			
 
				+        callbacks = [c for c in list(callbacks) if c is not None]
			
 
				+        for c in callbacks:
			
 
				+            assert isinstance(
			
 
				+                c, Callback), "callback should be subclass of Callback"
			
 
				+        self._callbacks = callbacks
			
 
				+
			
 
				+    def on_step_begin(self, status):
			
 
				+        for c in self._callbacks:
			
 
				+            c.on_step_begin(status)
			
 
				+
			
 
				+    def on_step_end(self, status):
			
 
				+        for c in self._callbacks:
			
 
				+            c.on_step_end(status)
			
 
				+
			
 
				+    def on_epoch_begin(self, status):
			
 
				+        for c in self._callbacks:
			
 
				+            c.on_epoch_begin(status)
			
 
				+
			
 
				+    def on_epoch_end(self, status):
			
 
				+        for c in self._callbacks:
			
 
				+            c.on_epoch_end(status)
			
 
				+
			
 
				+    def on_train_begin(self, status):
			
 
				+        for c in self._callbacks:
			
 
				+            c.on_train_begin(status)
			
 
				+
			
 
				+    def on_train_end(self, status):
			
 
				+        for c in self._callbacks:
			
 
				+            c.on_train_end(status)
			
 
				+
			
 
				+
			
 
				+class LogPrinter(Callback):
			
 
				+    def __init__(self, model):
			
 
				+        super(LogPrinter, self).__init__(model)
			
 
				+
			
 
				+    def on_step_end(self, status):
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            mode = status['mode']
			
 
				+            if mode == 'train':
			
 
				+                epoch_id = status['epoch_id']
			
 
				+                step_id = status['step_id']
			
 
				+                steps_per_epoch = status['steps_per_epoch']
			
 
				+                training_staus = status['training_staus']
			
 
				+                batch_time = status['batch_time']
			
 
				+                data_time = status['data_time']
			
 
				+
			
 
				+                epoches = self.model.cfg.epoch
			
 
				+                batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
			
 
				+                ))]['batch_size']
			
 
				+
			
 
				+                logs = training_staus.log()
			
 
				+                space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
			
 
				+                if step_id % self.model.cfg.log_iter == 0:
			
 
				+                    eta_steps = (epoches - epoch_id
			
 
				+                                 ) * steps_per_epoch - step_id
			
 
				+                    eta_sec = eta_steps * batch_time.global_avg
			
 
				+                    eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
			
 
				+                    ips = float(batch_size) / batch_time.avg
			
 
				+                    fmt = ' '.join([
			
 
				+                        'Epoch: [{}]',
			
 
				+                        '[{' + space_fmt + '}/{}]',
			
 
				+                        'learning_rate: {lr:.6f}',
			
 
				+                        '{meters}',
			
 
				+                        'eta: {eta}',
			
 
				+                        'batch_cost: {btime}',
			
 
				+                        'data_cost: {dtime}',
			
 
				+                        'ips: {ips:.4f} images/s',
			
 
				+                    ])
			
 
				+                    fmt = fmt.format(
			
 
				+                        epoch_id,
			
 
				+                        step_id,
			
 
				+                        steps_per_epoch,
			
 
				+                        lr=status['learning_rate'],
			
 
				+                        meters=logs,
			
 
				+                        eta=eta_str,
			
 
				+                        btime=str(batch_time),
			
 
				+                        dtime=str(data_time),
			
 
				+                        ips=ips)
			
 
				+                    logger.info(fmt)
			
 
				+            if mode == 'eval':
			
 
				+                step_id = status['step_id']
			
 
				+                if step_id % 100 == 0:
			
 
				+                    logger.info("Eval iter: {}".format(step_id))
			
 
				+
			
 
				+    def on_epoch_end(self, status):
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            mode = status['mode']
			
 
				+            if mode == 'eval':
			
 
				+                sample_num = status['sample_num']
			
 
				+                cost_time = status['cost_time']
			
 
				+                logger.info('Total sample number: {}, averge FPS: {}'.format(
			
 
				+                    sample_num, sample_num / cost_time))
			
 
				+
			
 
				+
			
 
				+class Checkpointer(Callback):
			
 
				+    def __init__(self, model):
			
 
				+        super(Checkpointer, self).__init__(model)
			
 
				+        cfg = self.model.cfg
			
 
				+        self.best_ap = 0.
			
 
				+        self.save_dir = os.path.join(self.model.cfg.save_dir,
			
 
				+                                     self.model.cfg.filename)
			
 
				+        if hasattr(self.model.model, 'student_model'):
			
 
				+            self.weight = self.model.model.student_model
			
 
				+        else:
			
 
				+            self.weight = self.model.model
			
 
				+
			
 
				+    def on_epoch_end(self, status):
			
 
				+        # Checkpointer only performed during training
			
 
				+        mode = status['mode']
			
 
				+        epoch_id = status['epoch_id']
			
 
				+        weight = None
			
 
				+        save_name = None
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            if mode == 'train':
			
 
				+                end_epoch = self.model.cfg.epoch
			
 
				+                if (
			
 
				+                        epoch_id + 1
			
 
				+                ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
			
 
				+                    save_name = str(
			
 
				+                        epoch_id
			
 
				+                    ) if epoch_id != end_epoch - 1 else "model_final"
			
 
				+                    weight = self.weight
			
 
				+            elif mode == 'eval':
			
 
				+                if 'save_best_model' in status and status['save_best_model']:
			
 
				+                    for metric in self.model._metrics:
			
 
				+                        map_res = metric.get_results()
			
 
				+                        if 'bbox' in map_res:
			
 
				+                            key = 'bbox'
			
 
				+                        elif 'keypoint' in map_res:
			
 
				+                            key = 'keypoint'
			
 
				+                        else:
			
 
				+                            key = 'mask'
			
 
				+                        if key not in map_res:
			
 
				+                            logger.warning("Evaluation results empty, this may be due to " \
			
 
				+                                        "training iterations being too few or not " \
			
 
				+                                        "loading the correct weights.")
			
 
				+                            return
			
 
				+                        if map_res[key][0] > self.best_ap:
			
 
				+                            self.best_ap = map_res[key][0]
			
 
				+                            save_name = 'best_model'
			
 
				+                            weight = self.weight
			
 
				+                        logger.info("Best test {} ap is {:0.3f}.".format(
			
 
				+                            key, self.best_ap))
			
 
				+            if weight:
			
 
				+                save_model(weight, self.model.optimizer, self.save_dir,
			
 
				+                           save_name, epoch_id + 1)
			
 
				+
			
 
				+
			
 
				+class WiferFaceEval(Callback):
			
 
				+    def __init__(self, model):
			
 
				+        super(WiferFaceEval, self).__init__(model)
			
 
				+
			
 
				+    def on_epoch_begin(self, status):
			
 
				+        assert self.model.mode == 'eval', \
			
 
				+            "WiferFaceEval can only be set during evaluation"
			
 
				+        for metric in self.model._metrics:
			
 
				+            metric.update(self.model.model)
			
 
				+        sys.exit()
			
 
				+
			
 
				+
			
 
				+class VisualDLWriter(Callback):
			
 
				+    """
			
 
				+    Use VisualDL to log data or image
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, model):
			
 
				+        super(VisualDLWriter, self).__init__(model)
			
 
				+
			
 
				+        assert six.PY3, "VisualDL requires Python >= 3.5"
			
 
				+        try:
			
 
				+            from visualdl import LogWriter
			
 
				+        except Exception as e:
			
 
				+            logger.error('visualdl not found, plaese install visualdl. '
			
 
				+                         'for example: `pip install visualdl`.')
			
 
				+            raise e
			
 
				+        self.vdl_writer = LogWriter(
			
 
				+            model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
			
 
				+        self.vdl_loss_step = 0
			
 
				+        self.vdl_mAP_step = 0
			
 
				+        self.vdl_image_step = 0
			
 
				+        self.vdl_image_frame = 0
			
 
				+
			
 
				+    def on_step_end(self, status):
			
 
				+        mode = status['mode']
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            if mode == 'train':
			
 
				+                training_staus = status['training_staus']
			
 
				+                for loss_name, loss_value in training_staus.get().items():
			
 
				+                    self.vdl_writer.add_scalar(loss_name, loss_value,
			
 
				+                                               self.vdl_loss_step)
			
 
				+                    self.vdl_loss_step += 1
			
 
				+            elif mode == 'test':
			
 
				+                ori_image = status['original_image']
			
 
				+                result_image = status['result_image']
			
 
				+                self.vdl_writer.add_image(
			
 
				+                    "original/frame_{}".format(self.vdl_image_frame),
			
 
				+                    ori_image, self.vdl_image_step)
			
 
				+                self.vdl_writer.add_image(
			
 
				+                    "result/frame_{}".format(self.vdl_image_frame),
			
 
				+                    result_image, self.vdl_image_step)
			
 
				+                self.vdl_image_step += 1
			
 
				+                # each frame can display ten pictures at most.
			
 
				+                if self.vdl_image_step % 10 == 0:
			
 
				+                    self.vdl_image_step = 0
			
 
				+                    self.vdl_image_frame += 1
			
 
				+
			
 
				+    def on_epoch_end(self, status):
			
 
				+        mode = status['mode']
			
 
				+        if dist.get_world_size() < 2 or dist.get_rank() == 0:
			
 
				+            if mode == 'eval':
			
 
				+                for metric in self.model._metrics:
			
 
				+                    for key, map_value in metric.get_results().items():
			
 
				+                        self.vdl_writer.add_scalar("{}-mAP".format(key),
			
 
				+                                                   map_value[0],
			
 
				+                                                   self.vdl_mAP_step)
			
 
				+                self.vdl_mAP_step += 1
			
 
				+
			
 
				+
			
 
				+class SniperProposalsGenerator(Callback):
			
 
				+    def __init__(self, model):
			
 
				+        super(SniperProposalsGenerator, self).__init__(model)
			
 
				+        ori_dataset = self.model.dataset
			
 
				+        self.dataset = self._create_new_dataset(ori_dataset)
			
 
				+        self.loader = self.model.loader
			
 
				+        self.cfg = self.model.cfg
			
 
				+        self.infer_model = self.model.model
			
 
				+
			
 
				+    def _create_new_dataset(self, ori_dataset):
			
 
				+        dataset = copy.deepcopy(ori_dataset)
			
 
				+        # init anno_cropper
			
 
				+        dataset.init_anno_cropper()
			
 
				+        # generate infer roidbs
			
 
				+        ori_roidbs = dataset.get_ori_roidbs()
			
 
				+        roidbs = dataset.anno_cropper.crop_infer_anno_records(ori_roidbs)
			
 
				+        # set new roidbs
			
 
				+        dataset.set_roidbs(roidbs)
			
 
				+
			
 
				+        return dataset
			
 
				+
			
 
				+    def _eval_with_loader(self, loader):
			
 
				+        results = []
			
 
				+        with paddle.no_grad():
			
 
				+            self.infer_model.eval()
			
 
				+            for step_id, data in enumerate(loader):
			
 
				+                outs = self.infer_model(data)
			
 
				+                for key in ['im_shape', 'scale_factor', 'im_id']:
			
 
				+                    outs[key] = data[key]
			
 
				+                for key, value in outs.items():
			
 
				+                    if hasattr(value, 'numpy'):
			
 
				+                        outs[key] = value.numpy()
			
 
				+
			
 
				+                results.append(outs)
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    def on_train_end(self, status):
			
 
				+        self.loader.dataset = self.dataset
			
 
				+        results = self._eval_with_loader(self.loader)
			
 
				+        results = self.dataset.anno_cropper.aggregate_chips_detections(results)
			
 
				+        # sniper
			
 
				+        proposals = []
			
 
				+        clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()}
			
 
				+        for outs in results:
			
 
				+            batch_res = get_infer_results(outs, clsid2catid)
			
 
				+            start = 0
			
 
				+            for i, im_id in enumerate(outs['im_id']):
			
 
				+                bbox_num = outs['bbox_num']
			
 
				+                end = start + bbox_num[i]
			
 
				+                bbox_res = batch_res['bbox'][start:end] \
			
 
				+                    if 'bbox' in batch_res else None
			
 
				+                if bbox_res:
			
 
				+                    proposals += bbox_res
			
 
				+        logger.info("save proposals in {}".format(self.cfg.proposals_path))
			
 
				+        with open(self.cfg.proposals_path, 'w') as f:
			
 
				+            json.dump(proposals, f)
			
--- a/paddlers/models/ppdet/engine/env.py
+++ b/paddlers/models/ppdet/engine/env.py
@@ -0,0 +1,50 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import random
			
 
				+import numpy as np
			
 
				+
			
 
				+import paddle
			
 
				+from paddle.distributed import fleet
			
 
				+
			
 
				+__all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
			
 
				+
			
 
				+
			
 
				+def init_fleet_env(find_unused_parameters=False):
			
 
				+    strategy = fleet.DistributedStrategy()
			
 
				+    strategy.find_unused_parameters = find_unused_parameters
			
 
				+    fleet.init(is_collective=True, strategy=strategy)
			
 
				+
			
 
				+
			
 
				+def init_parallel_env():
			
 
				+    env = os.environ
			
 
				+    dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
			
 
				+    if dist:
			
 
				+        trainer_id = int(env['PADDLE_TRAINER_ID'])
			
 
				+        local_seed = (99 + trainer_id)
			
 
				+        random.seed(local_seed)
			
 
				+        np.random.seed(local_seed)
			
 
				+
			
 
				+    paddle.distributed.init_parallel_env()
			
 
				+
			
 
				+
			
 
				+def set_random_seed(seed):
			
 
				+    paddle.seed(seed)
			
 
				+    random.seed(seed)
			
 
				+    np.random.seed(seed)
			
--- a/paddlers/models/ppdet/engine/export_utils.py
+++ b/paddlers/models/ppdet/engine/export_utils.py
@@ -0,0 +1,177 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import yaml
			
 
				+from collections import OrderedDict
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.data.source.category import get_categories
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger('ppdet.engine')
			
 
				+
			
 
				+# Global dictionary
			
 
				+TRT_MIN_SUBGRAPH = {
			
 
				+    'YOLO': 3,
			
 
				+    'SSD': 60,
			
 
				+    'RCNN': 40,
			
 
				+    'RetinaNet': 40,
			
 
				+    'S2ANet': 80,
			
 
				+    'EfficientDet': 40,
			
 
				+    'Face': 3,
			
 
				+    'TTFNet': 60,
			
 
				+    'FCOS': 16,
			
 
				+    'SOLOv2': 60,
			
 
				+    'HigherHRNet': 3,
			
 
				+    'HRNet': 3,
			
 
				+    'DeepSORT': 3,
			
 
				+    'JDE': 10,
			
 
				+    'FairMOT': 5,
			
 
				+    'GFL': 16,
			
 
				+    'PicoDet': 3,
			
 
				+    'CenterNet': 5,
			
 
				+    'TOOD': 5,
			
 
				+}
			
 
				+
			
 
				+KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
			
 
				+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
			
 
				+
			
 
				+
			
 
				+def _prune_input_spec(input_spec, program, targets):
			
 
				+    # try to prune static program to figure out pruned input spec
			
 
				+    # so we perform following operations in static mode
			
 
				+    paddle.enable_static()
			
 
				+    pruned_input_spec = [{}]
			
 
				+    program = program.clone()
			
 
				+    program = program._prune(targets=targets)
			
 
				+    global_block = program.global_block()
			
 
				+    for name, spec in input_spec[0].items():
			
 
				+        try:
			
 
				+            v = global_block.var(name)
			
 
				+            pruned_input_spec[0][name] = spec
			
 
				+        except Exception:
			
 
				+            pass
			
 
				+    paddle.disable_static()
			
 
				+    return pruned_input_spec
			
 
				+
			
 
				+
			
 
				+def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
			
 
				+    preprocess_list = []
			
 
				+
			
 
				+    anno_file = dataset_cfg.get_anno()
			
 
				+
			
 
				+    clsid2catid, catid2name = get_categories(metric, anno_file, arch)
			
 
				+
			
 
				+    label_list = [str(cat) for cat in catid2name.values()]
			
 
				+
			
 
				+    fuse_normalize = reader_cfg.get('fuse_normalize', False)
			
 
				+    sample_transforms = reader_cfg['sample_transforms']
			
 
				+    for st in sample_transforms[1:]:
			
 
				+        for key, value in st.items():
			
 
				+            p = {'type': key}
			
 
				+            if key == 'Resize':
			
 
				+                if int(image_shape[1]) != -1:
			
 
				+                    value['target_size'] = image_shape[1:]
			
 
				+            if fuse_normalize and key == 'NormalizeImage':
			
 
				+                continue
			
 
				+            p.update(value)
			
 
				+            preprocess_list.append(p)
			
 
				+    batch_transforms = reader_cfg.get('batch_transforms', None)
			
 
				+    if batch_transforms:
			
 
				+        for bt in batch_transforms:
			
 
				+            for key, value in bt.items():
			
 
				+                # for deploy/infer, use PadStride(stride) instead PadBatch(pad_to_stride)
			
 
				+                if key == 'PadBatch':
			
 
				+                    preprocess_list.append({
			
 
				+                        'type': 'PadStride',
			
 
				+                        'stride': value['pad_to_stride']
			
 
				+                    })
			
 
				+                    break
			
 
				+
			
 
				+    return preprocess_list, label_list
			
 
				+
			
 
				+
			
 
				+def _parse_tracker(tracker_cfg):
			
 
				+    tracker_params = {}
			
 
				+    for k, v in tracker_cfg.items():
			
 
				+        tracker_params.update({k: v})
			
 
				+    return tracker_params
			
 
				+
			
 
				+
			
 
				+def _dump_infer_config(config, path, image_shape, model):
			
 
				+    arch_state = False
			
 
				+    from paddlers.models.ppdet.core.config.yaml_helpers import setup_orderdict
			
 
				+    setup_orderdict()
			
 
				+    use_dynamic_shape = True if image_shape[2] == -1 else False
			
 
				+    infer_cfg = OrderedDict({
			
 
				+        'mode': 'fluid',
			
 
				+        'draw_threshold': 0.5,
			
 
				+        'metric': config['metric'],
			
 
				+        'use_dynamic_shape': use_dynamic_shape
			
 
				+    })
			
 
				+    infer_arch = config['architecture']
			
 
				+
			
 
				+    if infer_arch in MOT_ARCH:
			
 
				+        if infer_arch == 'DeepSORT':
			
 
				+            tracker_cfg = config['DeepSORTTracker']
			
 
				+        else:
			
 
				+            tracker_cfg = config['JDETracker']
			
 
				+        infer_cfg['tracker'] = _parse_tracker(tracker_cfg)
			
 
				+
			
 
				+    for arch, min_subgraph_size in TRT_MIN_SUBGRAPH.items():
			
 
				+        if arch in infer_arch:
			
 
				+            infer_cfg['arch'] = arch
			
 
				+            infer_cfg['min_subgraph_size'] = min_subgraph_size
			
 
				+            arch_state = True
			
 
				+            break
			
 
				+    if not arch_state:
			
 
				+        logger.error(
			
 
				+            'Architecture: {} is not supported for exporting model now.\n'.
			
 
				+            format(infer_arch) +
			
 
				+            'Please set TRT_MIN_SUBGRAPH in ppdet/engine/export_utils.py')
			
 
				+        os._exit(0)
			
 
				+    if 'mask_head' in config[config['architecture']] and config[config[
			
 
				+            'architecture']]['mask_head']:
			
 
				+        infer_cfg['mask'] = True
			
 
				+    label_arch = 'detection_arch'
			
 
				+    if infer_arch in KEYPOINT_ARCH:
			
 
				+        label_arch = 'keypoint_arch'
			
 
				+
			
 
				+    if infer_arch in MOT_ARCH:
			
 
				+        label_arch = 'mot_arch'
			
 
				+        reader_cfg = config['TestMOTReader']
			
 
				+        dataset_cfg = config['TestMOTDataset']
			
 
				+    else:
			
 
				+        reader_cfg = config['TestReader']
			
 
				+        dataset_cfg = config['TestDataset']
			
 
				+
			
 
				+    infer_cfg['Preprocess'], infer_cfg['label_list'] = _parse_reader(
			
 
				+        reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape[1:])
			
 
				+
			
 
				+    if infer_arch == 'PicoDet':
			
 
				+        infer_cfg['NMS'] = config['PicoHead']['nms']
			
 
				+        # In order to speed up the prediction, the threshold of nms
			
 
				+        # is adjusted here, which can be changed in infer_cfg.yml
			
 
				+        config['PicoHead']['nms']["score_threshold"] = 0.3
			
 
				+        config['PicoHead']['nms']["nms_threshold"] = 0.5
			
 
				+        infer_cfg['fpn_stride'] = config['PicoHead']['fpn_stride']
			
 
				+
			
 
				+    yaml.dump(infer_cfg, open(path, 'w'))
			
 
				+    logger.info("Export inference config file to {}".format(
			
 
				+        os.path.join(path)))
			
--- a/paddlers/models/ppdet/engine/tracker.py
+++ b/paddlers/models/ppdet/engine/tracker.py
@@ -0,0 +1,538 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import cv2
			
 
				+import glob
			
 
				+import re
			
 
				+import paddle
			
 
				+import numpy as np
			
 
				+import os.path as osp
			
 
				+from collections import defaultdict
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import create
			
 
				+from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
			
 
				+from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
			
 
				+from paddlers.models.ppdet.modeling.mot.utils import MOTTimer, load_det_results, write_mot_results, save_vis_results
			
 
				+
			
 
				+from paddlers.models.ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric
			
 
				+from paddlers.models.ppdet.metrics import MCMOTMetric
			
 
				+
			
 
				+from .callbacks import Callback, ComposeCallback
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = ['Tracker']
			
 
				+
			
 
				+
			
 
				+class Tracker(object):
			
 
				+    def __init__(self, cfg, mode='eval'):
			
 
				+        self.cfg = cfg
			
 
				+        assert mode.lower() in ['test', 'eval'], \
			
 
				+                "mode should be 'test' or 'eval'"
			
 
				+        self.mode = mode.lower()
			
 
				+        self.optimizer = None
			
 
				+
			
 
				+        # build MOT data loader
			
 
				+        self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
			
 
				+
			
 
				+        # build model
			
 
				+        self.model = create(cfg.architecture)
			
 
				+
			
 
				+        self.status = {}
			
 
				+        self.start_epoch = 0
			
 
				+
			
 
				+        # initial default callbacks
			
 
				+        self._init_callbacks()
			
 
				+
			
 
				+        # initial default metrics
			
 
				+        self._init_metrics()
			
 
				+        self._reset_metrics()
			
 
				+
			
 
				+    def _init_callbacks(self):
			
 
				+        self._callbacks = []
			
 
				+        self._compose_callback = None
			
 
				+
			
 
				+    def _init_metrics(self):
			
 
				+        if self.mode in ['test']:
			
 
				+            self._metrics = []
			
 
				+            return
			
 
				+
			
 
				+        if self.cfg.metric == 'MOT':
			
 
				+            self._metrics = [MOTMetric(), ]
			
 
				+        elif self.cfg.metric == 'MCMOT':
			
 
				+            self._metrics = [MCMOTMetric(self.cfg.num_classes), ]
			
 
				+        elif self.cfg.metric == 'KITTI':
			
 
				+            self._metrics = [KITTIMOTMetric(), ]
			
 
				+        else:
			
 
				+            logger.warning("Metric not support for metric type {}".format(
			
 
				+                self.cfg.metric))
			
 
				+            self._metrics = []
			
 
				+
			
 
				+    def _reset_metrics(self):
			
 
				+        for metric in self._metrics:
			
 
				+            metric.reset()
			
 
				+
			
 
				+    def register_callbacks(self, callbacks):
			
 
				+        callbacks = [h for h in list(callbacks) if h is not None]
			
 
				+        for c in callbacks:
			
 
				+            assert isinstance(c, Callback), \
			
 
				+                    "metrics shoule be instances of subclass of Metric"
			
 
				+        self._callbacks.extend(callbacks)
			
 
				+        self._compose_callback = ComposeCallback(self._callbacks)
			
 
				+
			
 
				+    def register_metrics(self, metrics):
			
 
				+        metrics = [m for m in list(metrics) if m is not None]
			
 
				+        for m in metrics:
			
 
				+            assert isinstance(m, Metric), \
			
 
				+                    "metrics shoule be instances of subclass of Metric"
			
 
				+        self._metrics.extend(metrics)
			
 
				+
			
 
				+    def load_weights_jde(self, weights):
			
 
				+        load_weight(self.model, weights, self.optimizer)
			
 
				+
			
 
				+    def load_weights_sde(self, det_weights, reid_weights):
			
 
				+        if self.model.detector:
			
 
				+            load_weight(self.model.detector, det_weights)
			
 
				+            load_weight(self.model.reid, reid_weights)
			
 
				+        else:
			
 
				+            load_weight(self.model.reid, reid_weights, self.optimizer)
			
 
				+
			
 
				+    def _eval_seq_jde(self,
			
 
				+                      dataloader,
			
 
				+                      save_dir=None,
			
 
				+                      show_image=False,
			
 
				+                      frame_rate=30,
			
 
				+                      draw_threshold=0):
			
 
				+        if save_dir:
			
 
				+            if not os.path.exists(save_dir): os.makedirs(save_dir)
			
 
				+        tracker = self.model.tracker
			
 
				+        tracker.max_time_lost = int(frame_rate / 30.0 * tracker.track_buffer)
			
 
				+
			
 
				+        timer = MOTTimer()
			
 
				+        frame_id = 0
			
 
				+        self.status['mode'] = 'track'
			
 
				+        self.model.eval()
			
 
				+        results = defaultdict(list)  # support single class and multi classes
			
 
				+
			
 
				+        for step_id, data in enumerate(dataloader):
			
 
				+            self.status['step_id'] = step_id
			
 
				+            if frame_id % 40 == 0:
			
 
				+                logger.info('Processing frame {} ({:.2f} fps)'.format(
			
 
				+                    frame_id, 1. / max(1e-5, timer.average_time)))
			
 
				+            # forward
			
 
				+            timer.tic()
			
 
				+            pred_dets, pred_embs = self.model(data)
			
 
				+
			
 
				+            pred_dets, pred_embs = pred_dets.numpy(), pred_embs.numpy()
			
 
				+            online_targets_dict = self.model.tracker.update(pred_dets,
			
 
				+                                                            pred_embs)
			
 
				+            online_tlwhs = defaultdict(list)
			
 
				+            online_scores = defaultdict(list)
			
 
				+            online_ids = defaultdict(list)
			
 
				+            for cls_id in range(self.cfg.num_classes):
			
 
				+                online_targets = online_targets_dict[cls_id]
			
 
				+                for t in online_targets:
			
 
				+                    tlwh = t.tlwh
			
 
				+                    tid = t.track_id
			
 
				+                    tscore = t.score
			
 
				+                    if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
			
 
				+                    if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
			
 
				+                            3] > tracker.vertical_ratio:
			
 
				+                        continue
			
 
				+                    online_tlwhs[cls_id].append(tlwh)
			
 
				+                    online_ids[cls_id].append(tid)
			
 
				+                    online_scores[cls_id].append(tscore)
			
 
				+                # save results
			
 
				+                results[cls_id].append(
			
 
				+                    (frame_id + 1, online_tlwhs[cls_id], online_scores[cls_id],
			
 
				+                     online_ids[cls_id]))
			
 
				+
			
 
				+            timer.toc()
			
 
				+            save_vis_results(data, frame_id, online_ids, online_tlwhs,
			
 
				+                             online_scores, timer.average_time, show_image,
			
 
				+                             save_dir, self.cfg.num_classes)
			
 
				+            frame_id += 1
			
 
				+
			
 
				+        return results, frame_id, timer.average_time, timer.calls
			
 
				+
			
 
				+    def _eval_seq_sde(self,
			
 
				+                      dataloader,
			
 
				+                      save_dir=None,
			
 
				+                      show_image=False,
			
 
				+                      frame_rate=30,
			
 
				+                      seq_name='',
			
 
				+                      scaled=False,
			
 
				+                      det_file='',
			
 
				+                      draw_threshold=0):
			
 
				+        if save_dir:
			
 
				+            if not os.path.exists(save_dir): os.makedirs(save_dir)
			
 
				+        use_detector = False if not self.model.detector else True
			
 
				+
			
 
				+        timer = MOTTimer()
			
 
				+        results = defaultdict(list)
			
 
				+        frame_id = 0
			
 
				+        self.status['mode'] = 'track'
			
 
				+        self.model.eval()
			
 
				+        self.model.reid.eval()
			
 
				+        if not use_detector:
			
 
				+            dets_list = load_det_results(det_file, len(dataloader))
			
 
				+            logger.info('Finish loading detection results file {}.'.format(
			
 
				+                det_file))
			
 
				+
			
 
				+        for step_id, data in enumerate(dataloader):
			
 
				+            self.status['step_id'] = step_id
			
 
				+            if frame_id % 40 == 0:
			
 
				+                logger.info('Processing frame {} ({:.2f} fps)'.format(
			
 
				+                    frame_id, 1. / max(1e-5, timer.average_time)))
			
 
				+
			
 
				+            ori_image = data['ori_image']  # [bs, H, W, 3]
			
 
				+            ori_image_shape = data['ori_image'].shape[1:3]
			
 
				+            # ori_image_shape: [H, W]
			
 
				+
			
 
				+            input_shape = data['image'].shape[2:]
			
 
				+            # input_shape: [h, w], before data transforms, set in model config
			
 
				+
			
 
				+            im_shape = data['im_shape'][0].numpy()
			
 
				+            # im_shape: [new_h, new_w], after data transforms
			
 
				+            scale_factor = data['scale_factor'][0].numpy()
			
 
				+
			
 
				+            empty_detections = False
			
 
				+            # when it has no detected bboxes, will not inference reid model
			
 
				+            # and if visualize, use original image instead
			
 
				+
			
 
				+            # forward
			
 
				+            timer.tic()
			
 
				+            if not use_detector:
			
 
				+                dets = dets_list[frame_id]
			
 
				+                bbox_tlwh = np.array(dets['bbox'], dtype='float32')
			
 
				+                if bbox_tlwh.shape[0] > 0:
			
 
				+                    # detector outputs: pred_cls_ids, pred_scores, pred_bboxes
			
 
				+                    pred_cls_ids = np.array(dets['cls_id'], dtype='float32')
			
 
				+                    pred_scores = np.array(dets['score'], dtype='float32')
			
 
				+                    pred_bboxes = np.concatenate(
			
 
				+                        (bbox_tlwh[:, 0:2],
			
 
				+                         bbox_tlwh[:, 2:4] + bbox_tlwh[:, 0:2]),
			
 
				+                        axis=1)
			
 
				+                else:
			
 
				+                    logger.warning(
			
 
				+                        'Frame {} has not object, try to modify score threshold.'.
			
 
				+                        format(frame_id))
			
 
				+                    empty_detections = True
			
 
				+            else:
			
 
				+                outs = self.model.detector(data)
			
 
				+                outs['bbox'] = outs['bbox'].numpy()
			
 
				+                outs['bbox_num'] = outs['bbox_num'].numpy()
			
 
				+
			
 
				+                if outs['bbox_num'] > 0 and empty_detections == False:
			
 
				+                    # detector outputs: pred_cls_ids, pred_scores, pred_bboxes
			
 
				+                    pred_cls_ids = outs['bbox'][:, 0:1]
			
 
				+                    pred_scores = outs['bbox'][:, 1:2]
			
 
				+                    if not scaled:
			
 
				+                        # Note: scaled=False only in JDE YOLOv3 or other detectors
			
 
				+                        # with LetterBoxResize and JDEBBoxPostProcess.
			
 
				+                        #
			
 
				+                        # 'scaled' means whether the coords after detector outputs
			
 
				+                        # have been scaled back to the original image, set True
			
 
				+                        # in general detector, set False in JDE YOLOv3.
			
 
				+                        pred_bboxes = scale_coords(outs['bbox'][:, 2:],
			
 
				+                                                   input_shape, im_shape,
			
 
				+                                                   scale_factor)
			
 
				+                    else:
			
 
				+                        pred_bboxes = outs['bbox'][:, 2:]
			
 
				+                else:
			
 
				+                    logger.warning(
			
 
				+                        'Frame {} has not detected object, try to modify score threshold.'.
			
 
				+                        format(frame_id))
			
 
				+                    empty_detections = True
			
 
				+
			
 
				+            if not empty_detections:
			
 
				+                pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape)
			
 
				+                if len(keep_idx[0]) == 0:
			
 
				+                    logger.warning(
			
 
				+                        'Frame {} has not detected object left after clip_box.'.
			
 
				+                        format(frame_id))
			
 
				+                    empty_detections = True
			
 
				+
			
 
				+            if empty_detections:
			
 
				+                timer.toc()
			
 
				+                # if visualize, use original image instead
			
 
				+                online_ids, online_tlwhs, online_scores = None, None, None
			
 
				+                save_vis_results(data, frame_id, online_ids, online_tlwhs,
			
 
				+                                 online_scores, timer.average_time, show_image,
			
 
				+                                 save_dir, self.cfg.num_classes)
			
 
				+                frame_id += 1
			
 
				+                # thus will not inference reid model
			
 
				+                continue
			
 
				+
			
 
				+            pred_scores = pred_scores[keep_idx[0]]
			
 
				+            pred_cls_ids = pred_cls_ids[keep_idx[0]]
			
 
				+            pred_tlwhs = np.concatenate(
			
 
				+                (pred_xyxys[:, 0:2],
			
 
				+                 pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
			
 
				+                axis=1)
			
 
				+            pred_dets = np.concatenate(
			
 
				+                (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
			
 
				+
			
 
				+            tracker = self.model.tracker
			
 
				+            crops = get_crops(
			
 
				+                pred_xyxys,
			
 
				+                ori_image,
			
 
				+                w=tracker.input_size[0],
			
 
				+                h=tracker.input_size[1])
			
 
				+            crops = paddle.to_tensor(crops)
			
 
				+
			
 
				+            data.update({'crops': crops})
			
 
				+            pred_embs = self.model(data).numpy()
			
 
				+
			
 
				+            tracker.predict()
			
 
				+            online_targets = tracker.update(pred_dets, pred_embs)
			
 
				+
			
 
				+            online_tlwhs, online_scores, online_ids = [], [], []
			
 
				+            for t in online_targets:
			
 
				+                if not t.is_confirmed() or t.time_since_update > 1:
			
 
				+                    continue
			
 
				+                tlwh = t.to_tlwh()
			
 
				+                tscore = t.score
			
 
				+                tid = t.track_id
			
 
				+                if tscore < draw_threshold: continue
			
 
				+                if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
			
 
				+                if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
			
 
				+                        3] > tracker.vertical_ratio:
			
 
				+                    continue
			
 
				+                online_tlwhs.append(tlwh)
			
 
				+                online_scores.append(tscore)
			
 
				+                online_ids.append(tid)
			
 
				+            timer.toc()
			
 
				+
			
 
				+            # save results
			
 
				+            results[0].append(
			
 
				+                (frame_id + 1, online_tlwhs, online_scores, online_ids))
			
 
				+            save_vis_results(data, frame_id, online_ids, online_tlwhs,
			
 
				+                             online_scores, timer.average_time, show_image,
			
 
				+                             save_dir, self.cfg.num_classes)
			
 
				+            frame_id += 1
			
 
				+
			
 
				+        return results, frame_id, timer.average_time, timer.calls
			
 
				+
			
 
				+    def mot_evaluate(self,
			
 
				+                     data_root,
			
 
				+                     seqs,
			
 
				+                     output_dir,
			
 
				+                     data_type='mot',
			
 
				+                     model_type='JDE',
			
 
				+                     save_images=False,
			
 
				+                     save_videos=False,
			
 
				+                     show_image=False,
			
 
				+                     scaled=False,
			
 
				+                     det_results_dir=''):
			
 
				+        if not os.path.exists(output_dir): os.makedirs(output_dir)
			
 
				+        result_root = os.path.join(output_dir, 'mot_results')
			
 
				+        if not os.path.exists(result_root): os.makedirs(result_root)
			
 
				+        assert data_type in ['mot', 'mcmot', 'kitti'], \
			
 
				+            "data_type should be 'mot', 'mcmot' or 'kitti'"
			
 
				+        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
			
 
				+            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
			
 
				+
			
 
				+        # run tracking
			
 
				+        n_frame = 0
			
 
				+        timer_avgs, timer_calls = [], []
			
 
				+        for seq in seqs:
			
 
				+            infer_dir = os.path.join(data_root, seq)
			
 
				+            if not os.path.exists(infer_dir) or not os.path.isdir(infer_dir):
			
 
				+                logger.warning("Seq {} error, {} has no images.".format(
			
 
				+                    seq, infer_dir))
			
 
				+                continue
			
 
				+            if os.path.exists(os.path.join(infer_dir, 'img1')):
			
 
				+                infer_dir = os.path.join(infer_dir, 'img1')
			
 
				+
			
 
				+            frame_rate = 30
			
 
				+            seqinfo = os.path.join(data_root, seq, 'seqinfo.ini')
			
 
				+            if os.path.exists(seqinfo):
			
 
				+                meta_info = open(seqinfo).read()
			
 
				+                frame_rate = int(meta_info[meta_info.find('frameRate') + 10:
			
 
				+                                           meta_info.find('\nseqLength')])
			
 
				+
			
 
				+            save_dir = os.path.join(
			
 
				+                output_dir, 'mot_outputs',
			
 
				+                seq) if save_images or save_videos else None
			
 
				+            logger.info('start seq: {}'.format(seq))
			
 
				+
			
 
				+            self.dataset.set_images(self.get_infer_images(infer_dir))
			
 
				+            dataloader = create('EvalMOTReader')(self.dataset, 0)
			
 
				+
			
 
				+            result_filename = os.path.join(result_root, '{}.txt'.format(seq))
			
 
				+
			
 
				+            with paddle.no_grad():
			
 
				+                if model_type in ['JDE', 'FairMOT']:
			
 
				+                    results, nf, ta, tc = self._eval_seq_jde(
			
 
				+                        dataloader,
			
 
				+                        save_dir=save_dir,
			
 
				+                        show_image=show_image,
			
 
				+                        frame_rate=frame_rate)
			
 
				+                elif model_type in ['DeepSORT']:
			
 
				+                    results, nf, ta, tc = self._eval_seq_sde(
			
 
				+                        dataloader,
			
 
				+                        save_dir=save_dir,
			
 
				+                        show_image=show_image,
			
 
				+                        frame_rate=frame_rate,
			
 
				+                        seq_name=seq,
			
 
				+                        scaled=scaled,
			
 
				+                        det_file=os.path.join(det_results_dir,
			
 
				+                                              '{}.txt'.format(seq)))
			
 
				+                else:
			
 
				+                    raise ValueError(model_type)
			
 
				+
			
 
				+            write_mot_results(result_filename, results, data_type,
			
 
				+                              self.cfg.num_classes)
			
 
				+            n_frame += nf
			
 
				+            timer_avgs.append(ta)
			
 
				+            timer_calls.append(tc)
			
 
				+
			
 
				+            if save_videos:
			
 
				+                output_video_path = os.path.join(save_dir, '..',
			
 
				+                                                 '{}_vis.mp4'.format(seq))
			
 
				+                cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
			
 
				+                    save_dir, output_video_path)
			
 
				+                os.system(cmd_str)
			
 
				+                logger.info('Save video in {}.'.format(output_video_path))
			
 
				+
			
 
				+            logger.info('Evaluate seq: {}'.format(seq))
			
 
				+            # update metrics
			
 
				+            for metric in self._metrics:
			
 
				+                metric.update(data_root, seq, data_type, result_root,
			
 
				+                              result_filename)
			
 
				+
			
 
				+        timer_avgs = np.asarray(timer_avgs)
			
 
				+        timer_calls = np.asarray(timer_calls)
			
 
				+        all_time = np.dot(timer_avgs, timer_calls)
			
 
				+        avg_time = all_time / np.sum(timer_calls)
			
 
				+        logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(
			
 
				+            all_time, 1.0 / avg_time))
			
 
				+
			
 
				+        # accumulate metric to log out
			
 
				+        for metric in self._metrics:
			
 
				+            metric.accumulate()
			
 
				+            metric.log()
			
 
				+        # reset metric states for metric may performed multiple times
			
 
				+        self._reset_metrics()
			
 
				+
			
 
				+    def get_infer_images(self, infer_dir):
			
 
				+        assert infer_dir is None or os.path.isdir(infer_dir), \
			
 
				+            "{} is not a directory".format(infer_dir)
			
 
				+        images = set()
			
 
				+        assert os.path.isdir(infer_dir), \
			
 
				+            "infer_dir {} is not a directory".format(infer_dir)
			
 
				+        exts = ['jpg', 'jpeg', 'png', 'bmp']
			
 
				+        exts += [ext.upper() for ext in exts]
			
 
				+        for ext in exts:
			
 
				+            images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
			
 
				+        images = list(images)
			
 
				+        images.sort()
			
 
				+        assert len(images) > 0, "no image found in {}".format(infer_dir)
			
 
				+        logger.info("Found {} inference images in total.".format(len(images)))
			
 
				+        return images
			
 
				+
			
 
				+    def mot_predict_seq(self,
			
 
				+                        video_file,
			
 
				+                        frame_rate,
			
 
				+                        image_dir,
			
 
				+                        output_dir,
			
 
				+                        data_type='mot',
			
 
				+                        model_type='JDE',
			
 
				+                        save_images=False,
			
 
				+                        save_videos=True,
			
 
				+                        show_image=False,
			
 
				+                        scaled=False,
			
 
				+                        det_results_dir='',
			
 
				+                        draw_threshold=0.5):
			
 
				+        assert video_file is not None or image_dir is not None, \
			
 
				+            "--video_file or --image_dir should be set."
			
 
				+        assert video_file is None or os.path.isfile(video_file), \
			
 
				+                "{} is not a file".format(video_file)
			
 
				+        assert image_dir is None or os.path.isdir(image_dir), \
			
 
				+                "{} is not a directory".format(image_dir)
			
 
				+
			
 
				+        if not os.path.exists(output_dir): os.makedirs(output_dir)
			
 
				+        result_root = os.path.join(output_dir, 'mot_results')
			
 
				+        if not os.path.exists(result_root): os.makedirs(result_root)
			
 
				+        assert data_type in ['mot', 'mcmot', 'kitti'], \
			
 
				+            "data_type should be 'mot', 'mcmot' or 'kitti'"
			
 
				+        assert model_type in ['JDE', 'DeepSORT', 'FairMOT'], \
			
 
				+            "model_type should be 'JDE', 'DeepSORT' or 'FairMOT'"
			
 
				+
			
 
				+        # run tracking
			
 
				+        if video_file:
			
 
				+            seq = video_file.split('/')[-1].split('.')[0]
			
 
				+            self.dataset.set_video(video_file, frame_rate)
			
 
				+            logger.info('Starting tracking video {}'.format(video_file))
			
 
				+        elif image_dir:
			
 
				+            seq = image_dir.split('/')[-1].split('.')[0]
			
 
				+            if os.path.exists(os.path.join(image_dir, 'img1')):
			
 
				+                image_dir = os.path.join(image_dir, 'img1')
			
 
				+            images = [
			
 
				+                '{}/{}'.format(image_dir, x) for x in os.listdir(image_dir)
			
 
				+            ]
			
 
				+            images.sort()
			
 
				+            self.dataset.set_images(images)
			
 
				+            logger.info('Starting tracking folder {}, found {} images'.format(
			
 
				+                image_dir, len(images)))
			
 
				+        else:
			
 
				+            raise ValueError('--video_file or --image_dir should be set.')
			
 
				+
			
 
				+        save_dir = os.path.join(output_dir, 'mot_outputs',
			
 
				+                                seq) if save_images or save_videos else None
			
 
				+
			
 
				+        dataloader = create('TestMOTReader')(self.dataset, 0)
			
 
				+        result_filename = os.path.join(result_root, '{}.txt'.format(seq))
			
 
				+        if frame_rate == -1:
			
 
				+            frame_rate = self.dataset.frame_rate
			
 
				+
			
 
				+        with paddle.no_grad():
			
 
				+            if model_type in ['JDE', 'FairMOT']:
			
 
				+                results, nf, ta, tc = self._eval_seq_jde(
			
 
				+                    dataloader,
			
 
				+                    save_dir=save_dir,
			
 
				+                    show_image=show_image,
			
 
				+                    frame_rate=frame_rate,
			
 
				+                    draw_threshold=draw_threshold)
			
 
				+            elif model_type in ['DeepSORT']:
			
 
				+                results, nf, ta, tc = self._eval_seq_sde(
			
 
				+                    dataloader,
			
 
				+                    save_dir=save_dir,
			
 
				+                    show_image=show_image,
			
 
				+                    frame_rate=frame_rate,
			
 
				+                    seq_name=seq,
			
 
				+                    scaled=scaled,
			
 
				+                    det_file=os.path.join(det_results_dir,
			
 
				+                                          '{}.txt'.format(seq)),
			
 
				+                    draw_threshold=draw_threshold)
			
 
				+            else:
			
 
				+                raise ValueError(model_type)
			
 
				+
			
 
				+        if save_videos:
			
 
				+            output_video_path = os.path.join(save_dir, '..',
			
 
				+                                             '{}_vis.mp4'.format(seq))
			
 
				+            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
			
 
				+                save_dir, output_video_path)
			
 
				+            os.system(cmd_str)
			
 
				+            logger.info('Save video in {}'.format(output_video_path))
			
 
				+
			
 
				+        write_mot_results(result_filename, results, data_type,
			
 
				+                          self.cfg.num_classes)
			
--- a/paddlers/models/ppdet/engine/trainer.py
+++ b/paddlers/models/ppdet/engine/trainer.py
@@ -0,0 +1,742 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import copy
			
 
				+import time
			
 
				+
			
 
				+import numpy as np
			
 
				+import typing
			
 
				+from PIL import Image, ImageOps
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.distributed as dist
			
 
				+from paddle.distributed import fleet
			
 
				+from paddle import amp
			
 
				+from paddle.static import InputSpec
			
 
				+from paddlers.models.ppdet.optimizer import ModelEMA
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import create
			
 
				+from paddlers.models.ppdet.modeling.architectures.meta_arch import BaseArch
			
 
				+from paddlers.models.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
			
 
				+from paddlers.models.ppdet.utils.visualizer import visualize_results, save_result
			
 
				+from paddlers.models.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
			
 
				+from paddlers.models.ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
			
 
				+from paddlers.models.ppdet.data.source.sniper_coco import SniperCOCODataSet
			
 
				+from paddlers.models.ppdet.data.source.category import get_categories
			
 
				+from paddlers.models.ppdet.utils import stats
			
 
				+from paddlers.models.ppdet.utils import profiler
			
 
				+
			
 
				+from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter, SniperProposalsGenerator
			
 
				+from .export_utils import _dump_infer_config, _prune_input_spec
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger('ppdet.engine')
			
 
				+
			
 
				+__all__ = ['Trainer']
			
 
				+
			
 
				+MOT_ARCH = ['DeepSORT', 'JDE', 'FairMOT']
			
 
				+
			
 
				+
			
 
				+class Trainer(object):
			
 
				+    def __init__(self, cfg, mode='train'):
			
 
				+        self.cfg = cfg
			
 
				+        assert mode.lower() in ['train', 'eval', 'test'], \
			
 
				+                "mode should be 'train', 'eval' or 'test'"
			
 
				+        self.mode = mode.lower()
			
 
				+        self.optimizer = None
			
 
				+        self.is_loaded_weights = False
			
 
				+
			
 
				+        # build data loader
			
 
				+        if cfg.architecture in MOT_ARCH and self.mode in ['eval', 'test']:
			
 
				+            self.dataset = cfg['{}MOTDataset'.format(self.mode.capitalize())]
			
 
				+        else:
			
 
				+            self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
			
 
				+
			
 
				+        if cfg.architecture == 'DeepSORT' and self.mode == 'train':
			
 
				+            logger.error('DeepSORT has no need of training on mot dataset.')
			
 
				+            sys.exit(1)
			
 
				+
			
 
				+        if self.mode == 'train':
			
 
				+            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
			
 
				+                self.dataset, cfg.worker_num)
			
 
				+
			
 
				+        if cfg.architecture == 'JDE' and self.mode == 'train':
			
 
				+            cfg['JDEEmbeddingHead'][
			
 
				+                'num_identities'] = self.dataset.num_identities_dict[0]
			
 
				+            # JDE only support single class MOT now.
			
 
				+
			
 
				+        if cfg.architecture == 'FairMOT' and self.mode == 'train':
			
 
				+            cfg['FairMOTEmbeddingHead'][
			
 
				+                'num_identities_dict'] = self.dataset.num_identities_dict
			
 
				+            # FairMOT support single class and multi-class MOT now.
			
 
				+
			
 
				+        # build model
			
 
				+        if 'model' not in self.cfg:
			
 
				+            self.model = create(cfg.architecture)
			
 
				+        else:
			
 
				+            self.model = self.cfg.model
			
 
				+            self.is_loaded_weights = True
			
 
				+
			
 
				+        #normalize params for deploy
			
 
				+        self.model.load_meanstd(cfg['TestReader']['sample_transforms'])
			
 
				+
			
 
				+        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
			
 
				+        if self.use_ema:
			
 
				+            ema_decay = self.cfg.get('ema_decay', 0.9998)
			
 
				+            cycle_epoch = self.cfg.get('cycle_epoch', -1)
			
 
				+            self.ema = ModelEMA(
			
 
				+                self.model,
			
 
				+                decay=ema_decay,
			
 
				+                use_thres_step=True,
			
 
				+                cycle_epoch=cycle_epoch)
			
 
				+
			
 
				+        # EvalDataset build with BatchSampler to evaluate in single device
			
 
				+        # TODO: multi-device evaluate
			
 
				+        if self.mode == 'eval':
			
 
				+            self._eval_batch_sampler = paddle.io.BatchSampler(
			
 
				+                self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
			
 
				+            reader_name = '{}Reader'.format(self.mode.capitalize())
			
 
				+            # If metric is VOC, need to be set collate_batch=False.
			
 
				+            if cfg.metric == 'VOC':
			
 
				+                cfg[reader_name]['collate_batch'] = False
			
 
				+            self.loader = create(reader_name)(self.dataset, cfg.worker_num,
			
 
				+                                              self._eval_batch_sampler)
			
 
				+        # TestDataset build after user set images, skip loader creation here
			
 
				+
			
 
				+        # build optimizer in train mode
			
 
				+        if self.mode == 'train':
			
 
				+            steps_per_epoch = len(self.loader)
			
 
				+            self.lr = create('LearningRate')(steps_per_epoch)
			
 
				+            self.optimizer = create('OptimizerBuilder')(self.lr, self.model)
			
 
				+
			
 
				+        if self.cfg.get('unstructured_prune'):
			
 
				+            self.pruner = create('UnstructuredPruner')(self.model,
			
 
				+                                                       steps_per_epoch)
			
 
				+
			
 
				+        self._nranks = dist.get_world_size()
			
 
				+        self._local_rank = dist.get_rank()
			
 
				+
			
 
				+        self.status = {}
			
 
				+
			
 
				+        self.start_epoch = 0
			
 
				+        self.end_epoch = 0 if 'epoch' not in cfg else cfg.epoch
			
 
				+
			
 
				+        # initial default callbacks
			
 
				+        self._init_callbacks()
			
 
				+
			
 
				+        # initial default metrics
			
 
				+        self._init_metrics()
			
 
				+        self._reset_metrics()
			
 
				+
			
 
				+    def _init_callbacks(self):
			
 
				+        if self.mode == 'train':
			
 
				+            self._callbacks = [LogPrinter(self), Checkpointer(self)]
			
 
				+            if self.cfg.get('use_vdl', False):
			
 
				+                self._callbacks.append(VisualDLWriter(self))
			
 
				+            if self.cfg.get('save_proposals', False):
			
 
				+                self._callbacks.append(SniperProposalsGenerator(self))
			
 
				+            self._compose_callback = ComposeCallback(self._callbacks)
			
 
				+        elif self.mode == 'eval':
			
 
				+            self._callbacks = [LogPrinter(self)]
			
 
				+            if self.cfg.metric == 'WiderFace':
			
 
				+                self._callbacks.append(WiferFaceEval(self))
			
 
				+            self._compose_callback = ComposeCallback(self._callbacks)
			
 
				+        elif self.mode == 'test' and self.cfg.get('use_vdl', False):
			
 
				+            self._callbacks = [VisualDLWriter(self)]
			
 
				+            self._compose_callback = ComposeCallback(self._callbacks)
			
 
				+        else:
			
 
				+            self._callbacks = []
			
 
				+            self._compose_callback = None
			
 
				+
			
 
				+    def _init_metrics(self, validate=False):
			
 
				+        if self.mode == 'test' or (self.mode == 'train' and not validate):
			
 
				+            self._metrics = []
			
 
				+            return
			
 
				+        classwise = self.cfg['classwise'] if 'classwise' in self.cfg else False
			
 
				+        if self.cfg.metric == 'COCO' or self.cfg.metric == "SNIPERCOCO":
			
 
				+            # TODO: bias should be unified
			
 
				+            bias = self.cfg['bias'] if 'bias' in self.cfg else 0
			
 
				+            output_eval = self.cfg['output_eval'] \
			
 
				+                if 'output_eval' in self.cfg else None
			
 
				+            save_prediction_only = self.cfg.get('save_prediction_only', False)
			
 
				+
			
 
				+            # pass clsid2catid info to metric instance to avoid multiple loading
			
 
				+            # annotation file
			
 
				+            clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
			
 
				+                                if self.mode == 'eval' else None
			
 
				+
			
 
				+            # when do validation in train, annotation file should be get from
			
 
				+            # EvalReader instead of self.dataset(which is TrainReader)
			
 
				+            anno_file = self.dataset.get_anno()
			
 
				+            dataset = self.dataset
			
 
				+            if self.mode == 'train' and validate:
			
 
				+                eval_dataset = self.cfg['EvalDataset']
			
 
				+                eval_dataset.check_or_download_dataset()
			
 
				+                anno_file = eval_dataset.get_anno()
			
 
				+                dataset = eval_dataset
			
 
				+
			
 
				+            IouType = self.cfg['IouType'] if 'IouType' in self.cfg else 'bbox'
			
 
				+            if self.cfg.metric == "COCO":
			
 
				+                self._metrics = [
			
 
				+                    COCOMetric(
			
 
				+                        anno_file=anno_file,
			
 
				+                        clsid2catid=clsid2catid,
			
 
				+                        classwise=classwise,
			
 
				+                        output_eval=output_eval,
			
 
				+                        bias=bias,
			
 
				+                        IouType=IouType,
			
 
				+                        save_prediction_only=save_prediction_only)
			
 
				+                ]
			
 
				+            elif self.cfg.metric == "SNIPERCOCO":  # sniper
			
 
				+                self._metrics = [
			
 
				+                    SNIPERCOCOMetric(
			
 
				+                        anno_file=anno_file,
			
 
				+                        dataset=dataset,
			
 
				+                        clsid2catid=clsid2catid,
			
 
				+                        classwise=classwise,
			
 
				+                        output_eval=output_eval,
			
 
				+                        bias=bias,
			
 
				+                        IouType=IouType,
			
 
				+                        save_prediction_only=save_prediction_only)
			
 
				+                ]
			
 
				+        elif self.cfg.metric == 'RBOX':
			
 
				+            # TODO: bias should be unified
			
 
				+            bias = self.cfg['bias'] if 'bias' in self.cfg else 0
			
 
				+            output_eval = self.cfg['output_eval'] \
			
 
				+                if 'output_eval' in self.cfg else None
			
 
				+            save_prediction_only = self.cfg.get('save_prediction_only', False)
			
 
				+
			
 
				+            # pass clsid2catid info to metric instance to avoid multiple loading
			
 
				+            # annotation file
			
 
				+            clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()} \
			
 
				+                                if self.mode == 'eval' else None
			
 
				+
			
 
				+            # when do validation in train, annotation file should be get from
			
 
				+            # EvalReader instead of self.dataset(which is TrainReader)
			
 
				+            anno_file = self.dataset.get_anno()
			
 
				+            if self.mode == 'train' and validate:
			
 
				+                eval_dataset = self.cfg['EvalDataset']
			
 
				+                eval_dataset.check_or_download_dataset()
			
 
				+                anno_file = eval_dataset.get_anno()
			
 
				+
			
 
				+            self._metrics = [
			
 
				+                RBoxMetric(
			
 
				+                    anno_file=anno_file,
			
 
				+                    clsid2catid=clsid2catid,
			
 
				+                    classwise=classwise,
			
 
				+                    output_eval=output_eval,
			
 
				+                    bias=bias,
			
 
				+                    save_prediction_only=save_prediction_only)
			
 
				+            ]
			
 
				+        elif self.cfg.metric == 'VOC':
			
 
				+            self._metrics = [
			
 
				+                VOCMetric(
			
 
				+                    label_list=self.dataset.get_label_list(),
			
 
				+                    class_num=self.cfg.num_classes,
			
 
				+                    map_type=self.cfg.map_type,
			
 
				+                    classwise=classwise)
			
 
				+            ]
			
 
				+        elif self.cfg.metric == 'WiderFace':
			
 
				+            multi_scale = self.cfg.multi_scale_eval if 'multi_scale_eval' in self.cfg else True
			
 
				+            self._metrics = [
			
 
				+                WiderFaceMetric(
			
 
				+                    image_dir=os.path.join(self.dataset.dataset_dir,
			
 
				+                                           self.dataset.image_dir),
			
 
				+                    anno_file=self.dataset.get_anno(),
			
 
				+                    multi_scale=multi_scale)
			
 
				+            ]
			
 
				+        elif self.cfg.metric == 'KeyPointTopDownCOCOEval':
			
 
				+            eval_dataset = self.cfg['EvalDataset']
			
 
				+            eval_dataset.check_or_download_dataset()
			
 
				+            anno_file = eval_dataset.get_anno()
			
 
				+            save_prediction_only = self.cfg.get('save_prediction_only', False)
			
 
				+            self._metrics = [
			
 
				+                KeyPointTopDownCOCOEval(
			
 
				+                    anno_file,
			
 
				+                    len(eval_dataset),
			
 
				+                    self.cfg.num_joints,
			
 
				+                    self.cfg.save_dir,
			
 
				+                    save_prediction_only=save_prediction_only)
			
 
				+            ]
			
 
				+        elif self.cfg.metric == 'KeyPointTopDownMPIIEval':
			
 
				+            eval_dataset = self.cfg['EvalDataset']
			
 
				+            eval_dataset.check_or_download_dataset()
			
 
				+            anno_file = eval_dataset.get_anno()
			
 
				+            save_prediction_only = self.cfg.get('save_prediction_only', False)
			
 
				+            self._metrics = [
			
 
				+                KeyPointTopDownMPIIEval(
			
 
				+                    anno_file,
			
 
				+                    len(eval_dataset),
			
 
				+                    self.cfg.num_joints,
			
 
				+                    self.cfg.save_dir,
			
 
				+                    save_prediction_only=save_prediction_only)
			
 
				+            ]
			
 
				+        elif self.cfg.metric == 'MOTDet':
			
 
				+            self._metrics = [JDEDetMetric(), ]
			
 
				+        else:
			
 
				+            logger.warning("Metric not support for metric type {}".format(
			
 
				+                self.cfg.metric))
			
 
				+            self._metrics = []
			
 
				+
			
 
				+    def _reset_metrics(self):
			
 
				+        for metric in self._metrics:
			
 
				+            metric.reset()
			
 
				+
			
 
				+    def register_callbacks(self, callbacks):
			
 
				+        callbacks = [c for c in list(callbacks) if c is not None]
			
 
				+        for c in callbacks:
			
 
				+            assert isinstance(c, Callback), \
			
 
				+                    "metrics shoule be instances of subclass of Metric"
			
 
				+        self._callbacks.extend(callbacks)
			
 
				+        self._compose_callback = ComposeCallback(self._callbacks)
			
 
				+
			
 
				+    def register_metrics(self, metrics):
			
 
				+        metrics = [m for m in list(metrics) if m is not None]
			
 
				+        for m in metrics:
			
 
				+            assert isinstance(m, Metric), \
			
 
				+                    "metrics shoule be instances of subclass of Metric"
			
 
				+        self._metrics.extend(metrics)
			
 
				+
			
 
				+    def load_weights(self, weights):
			
 
				+        if self.is_loaded_weights:
			
 
				+            return
			
 
				+        self.start_epoch = 0
			
 
				+        load_pretrain_weight(self.model, weights)
			
 
				+        logger.debug("Load weights {} to start training".format(weights))
			
 
				+
			
 
				+    def load_weights_sde(self, det_weights, reid_weights):
			
 
				+        if self.model.detector:
			
 
				+            load_weight(self.model.detector, det_weights)
			
 
				+            load_weight(self.model.reid, reid_weights)
			
 
				+        else:
			
 
				+            load_weight(self.model.reid, reid_weights)
			
 
				+
			
 
				+    def resume_weights(self, weights):
			
 
				+        # support Distill resume weights
			
 
				+        if hasattr(self.model, 'student_model'):
			
 
				+            self.start_epoch = load_weight(self.model.student_model, weights,
			
 
				+                                           self.optimizer)
			
 
				+        else:
			
 
				+            self.start_epoch = load_weight(self.model, weights, self.optimizer)
			
 
				+        logger.debug("Resume weights of epoch {}".format(self.start_epoch))
			
 
				+
			
 
				+    def train(self, validate=False):
			
 
				+        assert self.mode == 'train', "Model not in 'train' mode"
			
 
				+        Init_mark = False
			
 
				+
			
 
				+        sync_bn = (
			
 
				+            getattr(self.cfg, 'norm_type', None) in [None, 'sync_bn'] and
			
 
				+            self.cfg.use_gpu and self._nranks > 1)
			
 
				+        if sync_bn:
			
 
				+            self.model = BaseArch.convert_sync_batchnorm(self.model)
			
 
				+
			
 
				+        model = self.model
			
 
				+        if self.cfg.get('fleet', False):
			
 
				+            model = fleet.distributed_model(model)
			
 
				+            self.optimizer = fleet.distributed_optimizer(self.optimizer)
			
 
				+        elif self._nranks > 1:
			
 
				+            find_unused_parameters = self.cfg[
			
 
				+                'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
			
 
				+            model = paddle.DataParallel(
			
 
				+                self.model, find_unused_parameters=find_unused_parameters)
			
 
				+
			
 
				+        # initial fp16
			
 
				+        if self.cfg.get('fp16', False):
			
 
				+            scaler = amp.GradScaler(
			
 
				+                enable=self.cfg.use_gpu, init_loss_scaling=1024)
			
 
				+
			
 
				+        self.status.update({
			
 
				+            'epoch_id': self.start_epoch,
			
 
				+            'step_id': 0,
			
 
				+            'steps_per_epoch': len(self.loader)
			
 
				+        })
			
 
				+
			
 
				+        self.status['batch_time'] = stats.SmoothedValue(
			
 
				+            self.cfg.log_iter, fmt='{avg:.4f}')
			
 
				+        self.status['data_time'] = stats.SmoothedValue(
			
 
				+            self.cfg.log_iter, fmt='{avg:.4f}')
			
 
				+        self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
			
 
				+
			
 
				+        if self.cfg.get('print_flops', False):
			
 
				+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
			
 
				+                self.dataset, self.cfg.worker_num)
			
 
				+            self._flops(flops_loader)
			
 
				+        profiler_options = self.cfg.get('profiler_options', None)
			
 
				+
			
 
				+        self._compose_callback.on_train_begin(self.status)
			
 
				+
			
 
				+        for epoch_id in range(self.start_epoch, self.cfg.epoch):
			
 
				+            self.status['mode'] = 'train'
			
 
				+            self.status['epoch_id'] = epoch_id
			
 
				+            self._compose_callback.on_epoch_begin(self.status)
			
 
				+            self.loader.dataset.set_epoch(epoch_id)
			
 
				+            model.train()
			
 
				+            iter_tic = time.time()
			
 
				+            for step_id, data in enumerate(self.loader):
			
 
				+                self.status['data_time'].update(time.time() - iter_tic)
			
 
				+                self.status['step_id'] = step_id
			
 
				+                profiler.add_profiler_step(profiler_options)
			
 
				+                self._compose_callback.on_step_begin(self.status)
			
 
				+                data['epoch_id'] = epoch_id
			
 
				+
			
 
				+                if self.cfg.get('fp16', False):
			
 
				+                    with amp.auto_cast(enable=self.cfg.use_gpu):
			
 
				+                        # model forward
			
 
				+                        outputs = model(data)
			
 
				+                        loss = outputs['loss']
			
 
				+
			
 
				+                    # model backward
			
 
				+                    scaled_loss = scaler.scale(loss)
			
 
				+                    scaled_loss.backward()
			
 
				+                    # in dygraph mode, optimizer.minimize is equal to optimizer.step
			
 
				+                    scaler.minimize(self.optimizer, scaled_loss)
			
 
				+                else:
			
 
				+                    # model forward
			
 
				+                    outputs = model(data)
			
 
				+                    loss = outputs['loss']
			
 
				+                    # model backward
			
 
				+                    loss.backward()
			
 
				+                    self.optimizer.step()
			
 
				+                curr_lr = self.optimizer.get_lr()
			
 
				+                self.lr.step()
			
 
				+                if self.cfg.get('unstructured_prune'):
			
 
				+                    self.pruner.step()
			
 
				+                self.optimizer.clear_grad()
			
 
				+                self.status['learning_rate'] = curr_lr
			
 
				+
			
 
				+                if self._nranks < 2 or self._local_rank == 0:
			
 
				+                    self.status['training_staus'].update(outputs)
			
 
				+
			
 
				+                self.status['batch_time'].update(time.time() - iter_tic)
			
 
				+                self._compose_callback.on_step_end(self.status)
			
 
				+                if self.use_ema:
			
 
				+                    self.ema.update(self.model)
			
 
				+                iter_tic = time.time()
			
 
				+
			
 
				+            # apply ema weight on model
			
 
				+            if self.use_ema:
			
 
				+                weight = copy.deepcopy(self.model.state_dict())
			
 
				+                self.model.set_dict(self.ema.apply())
			
 
				+            if self.cfg.get('unstructured_prune'):
			
 
				+                self.pruner.update_params()
			
 
				+
			
 
				+            self._compose_callback.on_epoch_end(self.status)
			
 
				+
			
 
				+            if validate and (self._nranks < 2 or self._local_rank == 0) \
			
 
				+                    and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \
			
 
				+                             or epoch_id == self.end_epoch - 1):
			
 
				+                if not hasattr(self, '_eval_loader'):
			
 
				+                    # build evaluation dataset and loader
			
 
				+                    self._eval_dataset = self.cfg.EvalDataset
			
 
				+                    self._eval_batch_sampler = \
			
 
				+                        paddle.io.BatchSampler(
			
 
				+                            self._eval_dataset,
			
 
				+                            batch_size=self.cfg.EvalReader['batch_size'])
			
 
				+                    # If metric is VOC, need to be set collate_batch=False.
			
 
				+                    if self.cfg.metric == 'VOC':
			
 
				+                        self.cfg['EvalReader']['collate_batch'] = False
			
 
				+                    self._eval_loader = create('EvalReader')(
			
 
				+                        self._eval_dataset,
			
 
				+                        self.cfg.worker_num,
			
 
				+                        batch_sampler=self._eval_batch_sampler)
			
 
				+                # if validation in training is enabled, metrics should be re-init
			
 
				+                # Init_mark makes sure this code will only execute once
			
 
				+                if validate and Init_mark == False:
			
 
				+                    Init_mark = True
			
 
				+                    self._init_metrics(validate=validate)
			
 
				+                    self._reset_metrics()
			
 
				+                with paddle.no_grad():
			
 
				+                    self.status['save_best_model'] = True
			
 
				+                    self._eval_with_loader(self._eval_loader)
			
 
				+
			
 
				+            # restore origin weight on model
			
 
				+            if self.use_ema:
			
 
				+                self.model.set_dict(weight)
			
 
				+
			
 
				+        self._compose_callback.on_train_end(self.status)
			
 
				+
			
 
				+    def _eval_with_loader(self, loader):
			
 
				+        sample_num = 0
			
 
				+        tic = time.time()
			
 
				+        self._compose_callback.on_epoch_begin(self.status)
			
 
				+        self.status['mode'] = 'eval'
			
 
				+        self.model.eval()
			
 
				+        if self.cfg.get('print_flops', False):
			
 
				+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
			
 
				+                self.dataset, self.cfg.worker_num, self._eval_batch_sampler)
			
 
				+            self._flops(flops_loader)
			
 
				+        for step_id, data in enumerate(loader):
			
 
				+            self.status['step_id'] = step_id
			
 
				+            self._compose_callback.on_step_begin(self.status)
			
 
				+            # forward
			
 
				+            outs = self.model(data)
			
 
				+
			
 
				+            # update metrics
			
 
				+            for metric in self._metrics:
			
 
				+                metric.update(data, outs)
			
 
				+
			
 
				+            # multi-scale inputs: all inputs have same im_id
			
 
				+            if isinstance(data, typing.Sequence):
			
 
				+                sample_num += data[0]['im_id'].numpy().shape[0]
			
 
				+            else:
			
 
				+                sample_num += data['im_id'].numpy().shape[0]
			
 
				+            self._compose_callback.on_step_end(self.status)
			
 
				+
			
 
				+        self.status['sample_num'] = sample_num
			
 
				+        self.status['cost_time'] = time.time() - tic
			
 
				+
			
 
				+        # accumulate metric to log out
			
 
				+        for metric in self._metrics:
			
 
				+            metric.accumulate()
			
 
				+            metric.log()
			
 
				+        self._compose_callback.on_epoch_end(self.status)
			
 
				+        # reset metric states for metric may performed multiple times
			
 
				+        self._reset_metrics()
			
 
				+
			
 
				+    def evaluate(self):
			
 
				+        with paddle.no_grad():
			
 
				+            self._eval_with_loader(self.loader)
			
 
				+
			
 
				+    def predict(self,
			
 
				+                images,
			
 
				+                draw_threshold=0.5,
			
 
				+                output_dir='output',
			
 
				+                save_txt=False):
			
 
				+        self.dataset.set_images(images)
			
 
				+        loader = create('TestReader')(self.dataset, 0)
			
 
				+
			
 
				+        imid2path = self.dataset.get_imid2path()
			
 
				+
			
 
				+        anno_file = self.dataset.get_anno()
			
 
				+        clsid2catid, catid2name = get_categories(
			
 
				+            self.cfg.metric, anno_file=anno_file)
			
 
				+
			
 
				+        # Run Infer
			
 
				+        self.status['mode'] = 'test'
			
 
				+        self.model.eval()
			
 
				+        if self.cfg.get('print_flops', False):
			
 
				+            flops_loader = create('TestReader')(self.dataset, 0)
			
 
				+            self._flops(flops_loader)
			
 
				+        results = []
			
 
				+        for step_id, data in enumerate(loader):
			
 
				+            self.status['step_id'] = step_id
			
 
				+            # forward
			
 
				+            outs = self.model(data)
			
 
				+
			
 
				+            for key in ['im_shape', 'scale_factor', 'im_id']:
			
 
				+                if isinstance(data, typing.Sequence):
			
 
				+                    outs[key] = data[0][key]
			
 
				+                else:
			
 
				+                    outs[key] = data[key]
			
 
				+            for key, value in outs.items():
			
 
				+                if hasattr(value, 'numpy'):
			
 
				+                    outs[key] = value.numpy()
			
 
				+            results.append(outs)
			
 
				+        # sniper
			
 
				+        if type(self.dataset) == SniperCOCODataSet:
			
 
				+            results = self.dataset.anno_cropper.aggregate_chips_detections(
			
 
				+                results)
			
 
				+
			
 
				+        for outs in results:
			
 
				+            batch_res = get_infer_results(outs, clsid2catid)
			
 
				+            bbox_num = outs['bbox_num']
			
 
				+
			
 
				+            start = 0
			
 
				+            for i, im_id in enumerate(outs['im_id']):
			
 
				+                image_path = imid2path[int(im_id)]
			
 
				+                image = Image.open(image_path).convert('RGB')
			
 
				+                image = ImageOps.exif_transpose(image)
			
 
				+                self.status['original_image'] = np.array(image.copy())
			
 
				+
			
 
				+                end = start + bbox_num[i]
			
 
				+                bbox_res = batch_res['bbox'][start:end] \
			
 
				+                        if 'bbox' in batch_res else None
			
 
				+                mask_res = batch_res['mask'][start:end] \
			
 
				+                        if 'mask' in batch_res else None
			
 
				+                segm_res = batch_res['segm'][start:end] \
			
 
				+                        if 'segm' in batch_res else None
			
 
				+                keypoint_res = batch_res['keypoint'][start:end] \
			
 
				+                        if 'keypoint' in batch_res else None
			
 
				+                image = visualize_results(
			
 
				+                    image, bbox_res, mask_res, segm_res, keypoint_res,
			
 
				+                    int(im_id), catid2name, draw_threshold)
			
 
				+                self.status['result_image'] = np.array(image.copy())
			
 
				+                if self._compose_callback:
			
 
				+                    self._compose_callback.on_step_end(self.status)
			
 
				+                # save image with detection
			
 
				+                save_name = self._get_save_image_name(output_dir, image_path)
			
 
				+                logger.info("Detection bbox results save in {}".format(
			
 
				+                    save_name))
			
 
				+                image.save(save_name, quality=95)
			
 
				+                if save_txt:
			
 
				+                    save_path = os.path.splitext(save_name)[0] + '.txt'
			
 
				+                    results = {}
			
 
				+                    results["im_id"] = im_id
			
 
				+                    if bbox_res:
			
 
				+                        results["bbox_res"] = bbox_res
			
 
				+                    if keypoint_res:
			
 
				+                        results["keypoint_res"] = keypoint_res
			
 
				+                    save_result(save_path, results, catid2name, draw_threshold)
			
 
				+                start = end
			
 
				+
			
 
				+    def _get_save_image_name(self, output_dir, image_path):
			
 
				+        """
			
 
				+        Get save image name from source image path.
			
 
				+        """
			
 
				+        if not os.path.exists(output_dir):
			
 
				+            os.makedirs(output_dir)
			
 
				+        image_name = os.path.split(image_path)[-1]
			
 
				+        name, ext = os.path.splitext(image_name)
			
 
				+        return os.path.join(output_dir, "{}".format(name)) + ext
			
 
				+
			
 
				+    def _get_infer_cfg_and_input_spec(self, save_dir, prune_input=True):
			
 
				+        image_shape = None
			
 
				+        im_shape = [None, 2]
			
 
				+        scale_factor = [None, 2]
			
 
				+        if self.cfg.architecture in MOT_ARCH:
			
 
				+            test_reader_name = 'TestMOTReader'
			
 
				+        else:
			
 
				+            test_reader_name = 'TestReader'
			
 
				+        if 'inputs_def' in self.cfg[test_reader_name]:
			
 
				+            inputs_def = self.cfg[test_reader_name]['inputs_def']
			
 
				+            image_shape = inputs_def.get('image_shape', None)
			
 
				+        # set image_shape=[None, 3, -1, -1] as default
			
 
				+        if image_shape is None:
			
 
				+            image_shape = [None, 3, -1, -1]
			
 
				+
			
 
				+        if len(image_shape) == 3:
			
 
				+            image_shape = [None] + image_shape
			
 
				+        else:
			
 
				+            im_shape = [image_shape[0], 2]
			
 
				+            scale_factor = [image_shape[0], 2]
			
 
				+
			
 
				+        if hasattr(self.model, 'deploy'):
			
 
				+            self.model.deploy = True
			
 
				+        if hasattr(self.model, 'fuse_norm'):
			
 
				+            self.model.fuse_norm = self.cfg['TestReader'].get('fuse_normalize',
			
 
				+                                                              False)
			
 
				+
			
 
				+        # Save infer cfg
			
 
				+        _dump_infer_config(self.cfg,
			
 
				+                           os.path.join(save_dir, 'infer_cfg.yml'),
			
 
				+                           image_shape, self.model)
			
 
				+
			
 
				+        input_spec = [{
			
 
				+            "image": InputSpec(
			
 
				+                shape=image_shape, name='image'),
			
 
				+            "im_shape": InputSpec(
			
 
				+                shape=im_shape, name='im_shape'),
			
 
				+            "scale_factor": InputSpec(
			
 
				+                shape=scale_factor, name='scale_factor')
			
 
				+        }]
			
 
				+        if self.cfg.architecture == 'DeepSORT':
			
 
				+            input_spec[0].update({
			
 
				+                "crops": InputSpec(
			
 
				+                    shape=[None, 3, 192, 64], name='crops')
			
 
				+            })
			
 
				+        if prune_input:
			
 
				+            static_model = paddle.jit.to_static(
			
 
				+                self.model, input_spec=input_spec)
			
 
				+            # NOTE: dy2st do not pruned program, but jit.save will prune program
			
 
				+            # input spec, prune input spec here and save with pruned input spec
			
 
				+            pruned_input_spec = _prune_input_spec(
			
 
				+                input_spec, static_model.forward.main_program,
			
 
				+                static_model.forward.outputs)
			
 
				+        else:
			
 
				+            static_model = None
			
 
				+            pruned_input_spec = input_spec
			
 
				+
			
 
				+        # TODO: Hard code, delete it when support prune input_spec.
			
 
				+        if self.cfg.architecture == 'PicoDet':
			
 
				+            pruned_input_spec = [{
			
 
				+                "image": InputSpec(
			
 
				+                    shape=image_shape, name='image')
			
 
				+            }]
			
 
				+
			
 
				+        return static_model, pruned_input_spec
			
 
				+
			
 
				+    def export(self, output_dir='output_inference'):
			
 
				+        self.model.eval()
			
 
				+        model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
			
 
				+        save_dir = os.path.join(output_dir, model_name)
			
 
				+        if not os.path.exists(save_dir):
			
 
				+            os.makedirs(save_dir)
			
 
				+
			
 
				+        static_model, pruned_input_spec = self._get_infer_cfg_and_input_spec(
			
 
				+            save_dir)
			
 
				+
			
 
				+        # dy2st and save model
			
 
				+        if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
			
 
				+            paddle.jit.save(
			
 
				+                static_model,
			
 
				+                os.path.join(save_dir, 'model'),
			
 
				+                input_spec=pruned_input_spec)
			
 
				+        else:
			
 
				+            self.cfg.slim.save_quantized_model(
			
 
				+                self.model,
			
 
				+                os.path.join(save_dir, 'model'),
			
 
				+                input_spec=pruned_input_spec)
			
 
				+        logger.info("Export model and saved in {}".format(save_dir))
			
 
				+
			
 
				+    def post_quant(self, output_dir='output_inference'):
			
 
				+        model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
			
 
				+        save_dir = os.path.join(output_dir, model_name)
			
 
				+        if not os.path.exists(save_dir):
			
 
				+            os.makedirs(save_dir)
			
 
				+
			
 
				+        for idx, data in enumerate(self.loader):
			
 
				+            self.model(data)
			
 
				+            if idx == int(self.cfg.get('quant_batch_num', 10)):
			
 
				+                break
			
 
				+
			
 
				+        # TODO: support prune input_spec
			
 
				+        _, pruned_input_spec = self._get_infer_cfg_and_input_spec(
			
 
				+            save_dir, prune_input=False)
			
 
				+
			
 
				+        self.cfg.slim.save_quantized_model(
			
 
				+            self.model,
			
 
				+            os.path.join(save_dir, 'model'),
			
 
				+            input_spec=pruned_input_spec)
			
 
				+        logger.info("Export Post-Quant model and saved in {}".format(save_dir))
			
 
				+
			
 
				+    def _flops(self, loader):
			
 
				+        self.model.eval()
			
 
				+        try:
			
 
				+            import paddleslim
			
 
				+        except Exception as e:
			
 
				+            logger.warning(
			
 
				+                'Unable to calculate flops, please install paddleslim, for example: `pip install paddleslim`'
			
 
				+            )
			
 
				+            return
			
 
				+
			
 
				+        from paddleslim.analysis import dygraph_flops as flops
			
 
				+        input_data = None
			
 
				+        for data in loader:
			
 
				+            input_data = data
			
 
				+            break
			
 
				+
			
 
				+        input_spec = [{
			
 
				+            "image": input_data['image'][0].unsqueeze(0),
			
 
				+            "im_shape": input_data['im_shape'][0].unsqueeze(0),
			
 
				+            "scale_factor": input_data['scale_factor'][0].unsqueeze(0)
			
 
				+        }]
			
 
				+        flops = flops(self.model, input_spec) / (1000**3)
			
 
				+        logger.info(" Model FLOPs : {:.6f}G. (image shape is {})".format(
			
 
				+            flops, input_data['image'][0].unsqueeze(0).shape))
			
--- a/paddlers/models/ppdet/metrics/__init__.py
+++ b/paddlers/models/ppdet/metrics/__init__.py
@@ -0,0 +1,29 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import metrics
			
 
				+from . import keypoint_metrics
			
 
				+
			
 
				+from .metrics import *
			
 
				+from .keypoint_metrics import *
			
 
				+
			
 
				+__all__ = metrics.__all__ + keypoint_metrics.__all__
			
 
				+
			
 
				+from . import mot_metrics
			
 
				+from .mot_metrics import *
			
 
				+__all__ = metrics.__all__ + mot_metrics.__all__
			
 
				+
			
 
				+from . import mcmot_metrics
			
 
				+from .mcmot_metrics import *
			
 
				+__all__ = metrics.__all__ + mcmot_metrics.__all__
			
--- a/paddlers/models/ppdet/metrics/coco_utils.py
+++ b/paddlers/models/ppdet/metrics/coco_utils.py
@@ -0,0 +1,184 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import numpy as np
			
 
				+import itertools
			
 
				+
			
 
				+from paddlers.models.ppdet.metrics.json_results import get_det_res, get_det_poly_res, get_seg_res, get_solov2_segm_res, get_keypoint_res
			
 
				+from paddlers.models.ppdet.metrics.map_utils import draw_pr_curve
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+
			
 
				+def get_infer_results(outs, catid, bias=0):
			
 
				+    """
			
 
				+    Get result at the stage of inference.
			
 
				+    The output format is dictionary containing bbox or mask result.
			
 
				+
			
 
				+    For example, bbox result is a list and each element contains
			
 
				+    image_id, category_id, bbox and score.
			
 
				+    """
			
 
				+    if outs is None or len(outs) == 0:
			
 
				+        raise ValueError(
			
 
				+            'The number of valid detection result if zero. Please use reasonable model and check input data.'
			
 
				+        )
			
 
				+
			
 
				+    im_id = outs['im_id']
			
 
				+
			
 
				+    infer_res = {}
			
 
				+    if 'bbox' in outs:
			
 
				+        if len(outs['bbox']) > 0 and len(outs['bbox'][0]) > 6:
			
 
				+            infer_res['bbox'] = get_det_poly_res(
			
 
				+                outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
			
 
				+        else:
			
 
				+            infer_res['bbox'] = get_det_res(
			
 
				+                outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
			
 
				+
			
 
				+    if 'mask' in outs:
			
 
				+        # mask post process
			
 
				+        infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox'],
			
 
				+                                        outs['bbox_num'], im_id, catid)
			
 
				+
			
 
				+    if 'segm' in outs:
			
 
				+        infer_res['segm'] = get_solov2_segm_res(outs, im_id, catid)
			
 
				+
			
 
				+    if 'keypoint' in outs:
			
 
				+        infer_res['keypoint'] = get_keypoint_res(outs, im_id)
			
 
				+        outs['bbox_num'] = [len(infer_res['keypoint'])]
			
 
				+
			
 
				+    return infer_res
			
 
				+
			
 
				+
			
 
				+def cocoapi_eval(jsonfile,
			
 
				+                 style,
			
 
				+                 coco_gt=None,
			
 
				+                 anno_file=None,
			
 
				+                 max_dets=(100, 300, 1000),
			
 
				+                 classwise=False,
			
 
				+                 sigmas=None,
			
 
				+                 use_area=True):
			
 
				+    """
			
 
				+    Args:
			
 
				+        jsonfile (str): Evaluation json file, eg: bbox.json, mask.json.
			
 
				+        style (str): COCOeval style, can be `bbox` , `segm` , `proposal`, `keypoints` and `keypoints_crowd`.
			
 
				+        coco_gt (str): Whether to load COCOAPI through anno_file,
			
 
				+                 eg: coco_gt = COCO(anno_file)
			
 
				+        anno_file (str): COCO annotations file.
			
 
				+        max_dets (tuple): COCO evaluation maxDets.
			
 
				+        classwise (bool): Whether per-category AP and draw P-R Curve or not.
			
 
				+        sigmas (nparray): keypoint labelling sigmas.
			
 
				+        use_area (bool): If gt annotations (eg. CrowdPose, AIC)
			
 
				+                         do not have 'area', please set use_area=False.
			
 
				+    """
			
 
				+    assert coco_gt != None or anno_file != None
			
 
				+    if style == 'keypoints_crowd':
			
 
				+        #please install xtcocotools==1.6
			
 
				+        from xtcocotools.coco import COCO
			
 
				+        from xtcocotools.cocoeval import COCOeval
			
 
				+    else:
			
 
				+        from pycocotools.coco import COCO
			
 
				+        from pycocotools.cocoeval import COCOeval
			
 
				+
			
 
				+    if coco_gt == None:
			
 
				+        coco_gt = COCO(anno_file)
			
 
				+    logger.info("Start evaluate...")
			
 
				+    coco_dt = coco_gt.loadRes(jsonfile)
			
 
				+    if style == 'proposal':
			
 
				+        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
			
 
				+        coco_eval.params.useCats = 0
			
 
				+        coco_eval.params.maxDets = list(max_dets)
			
 
				+    elif style == 'keypoints_crowd':
			
 
				+        coco_eval = COCOeval(coco_gt, coco_dt, style, sigmas, use_area)
			
 
				+    else:
			
 
				+        coco_eval = COCOeval(coco_gt, coco_dt, style)
			
 
				+    coco_eval.evaluate()
			
 
				+    coco_eval.accumulate()
			
 
				+    coco_eval.summarize()
			
 
				+    if classwise:
			
 
				+        # Compute per-category AP and PR curve
			
 
				+        try:
			
 
				+            from terminaltables import AsciiTable
			
 
				+        except Exception as e:
			
 
				+            logger.error(
			
 
				+                'terminaltables not found, plaese install terminaltables. '
			
 
				+                'for example: `pip install terminaltables`.')
			
 
				+            raise e
			
 
				+        precisions = coco_eval.eval['precision']
			
 
				+        cat_ids = coco_gt.getCatIds()
			
 
				+        # precision: (iou, recall, cls, area range, max dets)
			
 
				+        assert len(cat_ids) == precisions.shape[2]
			
 
				+        results_per_category = []
			
 
				+        for idx, catId in enumerate(cat_ids):
			
 
				+            # area range index 0: all area ranges
			
 
				+            # max dets index -1: typically 100 per image
			
 
				+            nm = coco_gt.loadCats(catId)[0]
			
 
				+            precision = precisions[:, :, idx, 0, -1]
			
 
				+            precision = precision[precision > -1]
			
 
				+            if precision.size:
			
 
				+                ap = np.mean(precision)
			
 
				+            else:
			
 
				+                ap = float('nan')
			
 
				+            results_per_category.append(
			
 
				+                (str(nm["name"]), '{:0.3f}'.format(float(ap))))
			
 
				+            pr_array = precisions[0, :, idx, 0, 2]
			
 
				+            recall_array = np.arange(0.0, 1.01, 0.01)
			
 
				+            draw_pr_curve(
			
 
				+                pr_array,
			
 
				+                recall_array,
			
 
				+                out_dir=style + '_pr_curve',
			
 
				+                file_name='{}_precision_recall_curve.jpg'.format(nm["name"]))
			
 
				+
			
 
				+        num_columns = min(6, len(results_per_category) * 2)
			
 
				+        results_flatten = list(itertools.chain(*results_per_category))
			
 
				+        headers = ['category', 'AP'] * (num_columns // 2)
			
 
				+        results_2d = itertools.zip_longest(
			
 
				+            *[results_flatten[i::num_columns] for i in range(num_columns)])
			
 
				+        table_data = [headers]
			
 
				+        table_data += [result for result in results_2d]
			
 
				+        table = AsciiTable(table_data)
			
 
				+        logger.info('Per-category of {} AP: \n{}'.format(style, table.table))
			
 
				+        logger.info("per-category PR curve has output to {} folder.".format(
			
 
				+            style + '_pr_curve'))
			
 
				+    # flush coco evaluation result
			
 
				+    sys.stdout.flush()
			
 
				+    return coco_eval.stats
			
 
				+
			
 
				+
			
 
				+def json_eval_results(metric, json_directory, dataset):
			
 
				+    """
			
 
				+    cocoapi eval with already exists proposal.json, bbox.json or mask.json
			
 
				+    """
			
 
				+    assert metric == 'COCO'
			
 
				+    anno_file = dataset.get_anno()
			
 
				+    json_file_list = ['proposal.json', 'bbox.json', 'mask.json']
			
 
				+    if json_directory:
			
 
				+        assert os.path.exists(
			
 
				+            json_directory), "The json directory:{} does not exist".format(
			
 
				+                json_directory)
			
 
				+        for k, v in enumerate(json_file_list):
			
 
				+            json_file_list[k] = os.path.join(str(json_directory), v)
			
 
				+
			
 
				+    coco_eval_style = ['proposal', 'bbox', 'segm']
			
 
				+    for i, v_json in enumerate(json_file_list):
			
 
				+        if os.path.exists(v_json):
			
 
				+            cocoapi_eval(v_json, coco_eval_style[i], anno_file=anno_file)
			
 
				+        else:
			
 
				+            logger.info("{} not exists!".format(v_json))
			
--- a/paddlers/models/ppdet/metrics/json_results.py
+++ b/paddlers/models/ppdet/metrics/json_results.py
@@ -0,0 +1,149 @@
 
				+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+import six
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
			
 
				+    det_res = []
			
 
				+    k = 0
			
 
				+    for i in range(len(bbox_nums)):
			
 
				+        cur_image_id = int(image_id[i][0])
			
 
				+        det_nums = bbox_nums[i]
			
 
				+        for j in range(det_nums):
			
 
				+            dt = bboxes[k]
			
 
				+            k = k + 1
			
 
				+            num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
			
 
				+            if int(num_id) < 0:
			
 
				+                continue
			
 
				+            category_id = label_to_cat_id_map[int(num_id)]
			
 
				+            w = xmax - xmin + bias
			
 
				+            h = ymax - ymin + bias
			
 
				+            bbox = [xmin, ymin, w, h]
			
 
				+            dt_res = {
			
 
				+                'image_id': cur_image_id,
			
 
				+                'category_id': category_id,
			
 
				+                'bbox': bbox,
			
 
				+                'score': score
			
 
				+            }
			
 
				+            det_res.append(dt_res)
			
 
				+    return det_res
			
 
				+
			
 
				+
			
 
				+def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
			
 
				+    det_res = []
			
 
				+    k = 0
			
 
				+    for i in range(len(bbox_nums)):
			
 
				+        cur_image_id = int(image_id[i][0])
			
 
				+        det_nums = bbox_nums[i]
			
 
				+        for j in range(det_nums):
			
 
				+            dt = bboxes[k]
			
 
				+            k = k + 1
			
 
				+            num_id, score, x1, y1, x2, y2, x3, y3, x4, y4 = dt.tolist()
			
 
				+            if int(num_id) < 0:
			
 
				+                continue
			
 
				+            category_id = label_to_cat_id_map[int(num_id)]
			
 
				+            rbox = [x1, y1, x2, y2, x3, y3, x4, y4]
			
 
				+            dt_res = {
			
 
				+                'image_id': cur_image_id,
			
 
				+                'category_id': category_id,
			
 
				+                'bbox': rbox,
			
 
				+                'score': score
			
 
				+            }
			
 
				+            det_res.append(dt_res)
			
 
				+    return det_res
			
 
				+
			
 
				+
			
 
				+def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
			
 
				+    import pycocotools.mask as mask_util
			
 
				+    seg_res = []
			
 
				+    k = 0
			
 
				+    for i in range(len(mask_nums)):
			
 
				+        cur_image_id = int(image_id[i][0])
			
 
				+        det_nums = mask_nums[i]
			
 
				+        for j in range(det_nums):
			
 
				+            mask = masks[k].astype(np.uint8)
			
 
				+            score = float(bboxes[k][1])
			
 
				+            label = int(bboxes[k][0])
			
 
				+            k = k + 1
			
 
				+            if label == -1:
			
 
				+                continue
			
 
				+            cat_id = label_to_cat_id_map[label]
			
 
				+            rle = mask_util.encode(
			
 
				+                np.array(
			
 
				+                    mask[:, :, None], order="F", dtype="uint8"))[0]
			
 
				+            if six.PY3:
			
 
				+                if 'counts' in rle:
			
 
				+                    rle['counts'] = rle['counts'].decode("utf8")
			
 
				+            sg_res = {
			
 
				+                'image_id': cur_image_id,
			
 
				+                'category_id': cat_id,
			
 
				+                'segmentation': rle,
			
 
				+                'score': score
			
 
				+            }
			
 
				+            seg_res.append(sg_res)
			
 
				+    return seg_res
			
 
				+
			
 
				+
			
 
				+def get_solov2_segm_res(results, image_id, num_id_to_cat_id_map):
			
 
				+    import pycocotools.mask as mask_util
			
 
				+    segm_res = []
			
 
				+    # for each batch
			
 
				+    segms = results['segm'].astype(np.uint8)
			
 
				+    clsid_labels = results['cate_label']
			
 
				+    clsid_scores = results['cate_score']
			
 
				+    lengths = segms.shape[0]
			
 
				+    im_id = int(image_id[0][0])
			
 
				+    if lengths == 0 or segms is None:
			
 
				+        return None
			
 
				+    # for each sample
			
 
				+    for i in range(lengths - 1):
			
 
				+        clsid = int(clsid_labels[i])
			
 
				+        catid = num_id_to_cat_id_map[clsid]
			
 
				+        score = float(clsid_scores[i])
			
 
				+        mask = segms[i]
			
 
				+        segm = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
			
 
				+        segm['counts'] = segm['counts'].decode('utf8')
			
 
				+        coco_res = {
			
 
				+            'image_id': im_id,
			
 
				+            'category_id': catid,
			
 
				+            'segmentation': segm,
			
 
				+            'score': score
			
 
				+        }
			
 
				+        segm_res.append(coco_res)
			
 
				+    return segm_res
			
 
				+
			
 
				+
			
 
				+def get_keypoint_res(results, im_id):
			
 
				+    anns = []
			
 
				+    preds = results['keypoint']
			
 
				+    for idx in range(im_id.shape[0]):
			
 
				+        image_id = im_id[idx].item()
			
 
				+        kpts, scores = preds[idx]
			
 
				+        for kpt, score in zip(kpts, scores):
			
 
				+            kpt = kpt.flatten()
			
 
				+            ann = {
			
 
				+                'image_id': image_id,
			
 
				+                'category_id': 1,  # XXX hard code
			
 
				+                'keypoints': kpt.tolist(),
			
 
				+                'score': float(score)
			
 
				+            }
			
 
				+            x = kpt[0::3]
			
 
				+            y = kpt[1::3]
			
 
				+            x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(
			
 
				+                y).item(), np.max(y).item()
			
 
				+            ann['area'] = (x1 - x0) * (y1 - y0)
			
 
				+            ann['bbox'] = [x0, y0, x1 - x0, y1 - y0]
			
 
				+            anns.append(ann)
			
 
				+    return anns
			
--- a/paddlers/models/ppdet/metrics/keypoint_metrics.py
+++ b/paddlers/models/ppdet/metrics/keypoint_metrics.py
@@ -0,0 +1,401 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+from collections import defaultdict, OrderedDict
			
 
				+import numpy as np
			
 
				+from pycocotools.coco import COCO
			
 
				+from pycocotools.cocoeval import COCOeval
			
 
				+from ..modeling.keypoint_utils import oks_nms
			
 
				+from scipy.io import loadmat, savemat
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval']
			
 
				+
			
 
				+
			
 
				+class KeyPointTopDownCOCOEval(object):
			
 
				+    """refer to
			
 
				+        https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
			
 
				+        Copyright (c) Microsoft, under the MIT License.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 anno_file,
			
 
				+                 num_samples,
			
 
				+                 num_joints,
			
 
				+                 output_eval,
			
 
				+                 iou_type='keypoints',
			
 
				+                 in_vis_thre=0.2,
			
 
				+                 oks_thre=0.9,
			
 
				+                 save_prediction_only=False):
			
 
				+        super(KeyPointTopDownCOCOEval, self).__init__()
			
 
				+        self.coco = COCO(anno_file)
			
 
				+        self.num_samples = num_samples
			
 
				+        self.num_joints = num_joints
			
 
				+        self.iou_type = iou_type
			
 
				+        self.in_vis_thre = in_vis_thre
			
 
				+        self.oks_thre = oks_thre
			
 
				+        self.output_eval = output_eval
			
 
				+        self.res_file = os.path.join(output_eval, "keypoints_results.json")
			
 
				+        self.save_prediction_only = save_prediction_only
			
 
				+        self.reset()
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.results = {
			
 
				+            'all_preds': np.zeros(
			
 
				+                (self.num_samples, self.num_joints, 3), dtype=np.float32),
			
 
				+            'all_boxes': np.zeros((self.num_samples, 6)),
			
 
				+            'image_path': []
			
 
				+        }
			
 
				+        self.eval_results = {}
			
 
				+        self.idx = 0
			
 
				+
			
 
				+    def update(self, inputs, outputs):
			
 
				+        kpts, _ = outputs['keypoint'][0]
			
 
				+
			
 
				+        num_images = inputs['image'].shape[0]
			
 
				+        self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
			
 
				+                                  3] = kpts[:, :, 0:3]
			
 
				+        self.results['all_boxes'][self.idx:self.idx + num_images, 0:
			
 
				+                                  2] = inputs['center'].numpy()[:, 0:2]
			
 
				+        self.results['all_boxes'][self.idx:self.idx + num_images, 2:
			
 
				+                                  4] = inputs['scale'].numpy()[:, 0:2]
			
 
				+        self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
			
 
				+            inputs['scale'].numpy() * 200, 1)
			
 
				+        self.results['all_boxes'][self.idx:self.idx + num_images,
			
 
				+                                  5] = np.squeeze(inputs['score'].numpy())
			
 
				+        self.results['image_path'].extend(inputs['im_id'].numpy())
			
 
				+
			
 
				+        self.idx += num_images
			
 
				+
			
 
				+    def _write_coco_keypoint_results(self, keypoints):
			
 
				+        data_pack = [{
			
 
				+            'cat_id': 1,
			
 
				+            'cls': 'person',
			
 
				+            'ann_type': 'keypoints',
			
 
				+            'keypoints': keypoints
			
 
				+        }]
			
 
				+        results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
			
 
				+        if not os.path.exists(self.output_eval):
			
 
				+            os.makedirs(self.output_eval)
			
 
				+        with open(self.res_file, 'w') as f:
			
 
				+            json.dump(results, f, sort_keys=True, indent=4)
			
 
				+            logger.info(f'The keypoint result is saved to {self.res_file}.')
			
 
				+        try:
			
 
				+            json.load(open(self.res_file))
			
 
				+        except Exception:
			
 
				+            content = []
			
 
				+            with open(self.res_file, 'r') as f:
			
 
				+                for line in f:
			
 
				+                    content.append(line)
			
 
				+            content[-1] = ']'
			
 
				+            with open(self.res_file, 'w') as f:
			
 
				+                for c in content:
			
 
				+                    f.write(c)
			
 
				+
			
 
				+    def _coco_keypoint_results_one_category_kernel(self, data_pack):
			
 
				+        cat_id = data_pack['cat_id']
			
 
				+        keypoints = data_pack['keypoints']
			
 
				+        cat_results = []
			
 
				+
			
 
				+        for img_kpts in keypoints:
			
 
				+            if len(img_kpts) == 0:
			
 
				+                continue
			
 
				+
			
 
				+            _key_points = np.array(
			
 
				+                [img_kpts[k]['keypoints'] for k in range(len(img_kpts))])
			
 
				+            _key_points = _key_points.reshape(_key_points.shape[0], -1)
			
 
				+
			
 
				+            result = [{
			
 
				+                'image_id': img_kpts[k]['image'],
			
 
				+                'category_id': cat_id,
			
 
				+                'keypoints': _key_points[k].tolist(),
			
 
				+                'score': img_kpts[k]['score'],
			
 
				+                'center': list(img_kpts[k]['center']),
			
 
				+                'scale': list(img_kpts[k]['scale'])
			
 
				+            } for k in range(len(img_kpts))]
			
 
				+            cat_results.extend(result)
			
 
				+
			
 
				+        return cat_results
			
 
				+
			
 
				+    def get_final_results(self, preds, all_boxes, img_path):
			
 
				+        _kpts = []
			
 
				+        for idx, kpt in enumerate(preds):
			
 
				+            _kpts.append({
			
 
				+                'keypoints': kpt,
			
 
				+                'center': all_boxes[idx][0:2],
			
 
				+                'scale': all_boxes[idx][2:4],
			
 
				+                'area': all_boxes[idx][4],
			
 
				+                'score': all_boxes[idx][5],
			
 
				+                'image': int(img_path[idx])
			
 
				+            })
			
 
				+        # image x person x (keypoints)
			
 
				+        kpts = defaultdict(list)
			
 
				+        for kpt in _kpts:
			
 
				+            kpts[kpt['image']].append(kpt)
			
 
				+
			
 
				+        # rescoring and oks nms
			
 
				+        num_joints = preds.shape[1]
			
 
				+        in_vis_thre = self.in_vis_thre
			
 
				+        oks_thre = self.oks_thre
			
 
				+        oks_nmsed_kpts = []
			
 
				+        for img in kpts.keys():
			
 
				+            img_kpts = kpts[img]
			
 
				+            for n_p in img_kpts:
			
 
				+                box_score = n_p['score']
			
 
				+                kpt_score = 0
			
 
				+                valid_num = 0
			
 
				+                for n_jt in range(0, num_joints):
			
 
				+                    t_s = n_p['keypoints'][n_jt][2]
			
 
				+                    if t_s > in_vis_thre:
			
 
				+                        kpt_score = kpt_score + t_s
			
 
				+                        valid_num = valid_num + 1
			
 
				+                if valid_num != 0:
			
 
				+                    kpt_score = kpt_score / valid_num
			
 
				+                # rescoring
			
 
				+                n_p['score'] = kpt_score * box_score
			
 
				+
			
 
				+            keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
			
 
				+                           oks_thre)
			
 
				+
			
 
				+            if len(keep) == 0:
			
 
				+                oks_nmsed_kpts.append(img_kpts)
			
 
				+            else:
			
 
				+                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
			
 
				+
			
 
				+        self._write_coco_keypoint_results(oks_nmsed_kpts)
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        self.get_final_results(self.results['all_preds'],
			
 
				+                               self.results['all_boxes'],
			
 
				+                               self.results['image_path'])
			
 
				+        if self.save_prediction_only:
			
 
				+            logger.info(f'The keypoint result is saved to {self.res_file} '
			
 
				+                        'and do not evaluate the mAP.')
			
 
				+            return
			
 
				+        coco_dt = self.coco.loadRes(self.res_file)
			
 
				+        coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
			
 
				+        coco_eval.params.useSegm = None
			
 
				+        coco_eval.evaluate()
			
 
				+        coco_eval.accumulate()
			
 
				+        coco_eval.summarize()
			
 
				+
			
 
				+        keypoint_stats = []
			
 
				+        for ind in range(len(coco_eval.stats)):
			
 
				+            keypoint_stats.append((coco_eval.stats[ind]))
			
 
				+        self.eval_results['keypoint'] = keypoint_stats
			
 
				+
			
 
				+    def log(self):
			
 
				+        if self.save_prediction_only:
			
 
				+            return
			
 
				+        stats_names = [
			
 
				+            'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
			
 
				+            'AR .75', 'AR (M)', 'AR (L)'
			
 
				+        ]
			
 
				+        num_values = len(stats_names)
			
 
				+        print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |')
			
 
				+        print('|---' * (num_values + 1) + '|')
			
 
				+
			
 
				+        print(' '.join([
			
 
				+            '| {:.3f}'.format(value) for value in self.eval_results['keypoint']
			
 
				+        ]) + ' |')
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return self.eval_results
			
 
				+
			
 
				+
			
 
				+class KeyPointTopDownMPIIEval(object):
			
 
				+    def __init__(self,
			
 
				+                 anno_file,
			
 
				+                 num_samples,
			
 
				+                 num_joints,
			
 
				+                 output_eval,
			
 
				+                 oks_thre=0.9,
			
 
				+                 save_prediction_only=False):
			
 
				+        super(KeyPointTopDownMPIIEval, self).__init__()
			
 
				+        self.ann_file = anno_file
			
 
				+        self.res_file = os.path.join(output_eval, "keypoints_results.json")
			
 
				+        self.save_prediction_only = save_prediction_only
			
 
				+        self.reset()
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.results = []
			
 
				+        self.eval_results = {}
			
 
				+        self.idx = 0
			
 
				+
			
 
				+    def update(self, inputs, outputs):
			
 
				+        kpts, _ = outputs['keypoint'][0]
			
 
				+
			
 
				+        num_images = inputs['image'].shape[0]
			
 
				+        results = {}
			
 
				+        results['preds'] = kpts[:, :, 0:3]
			
 
				+        results['boxes'] = np.zeros((num_images, 6))
			
 
				+        results['boxes'][:, 0:2] = inputs['center'].numpy()[:, 0:2]
			
 
				+        results['boxes'][:, 2:4] = inputs['scale'].numpy()[:, 0:2]
			
 
				+        results['boxes'][:, 4] = np.prod(inputs['scale'].numpy() * 200, 1)
			
 
				+        results['boxes'][:, 5] = np.squeeze(inputs['score'].numpy())
			
 
				+        results['image_path'] = inputs['image_file']
			
 
				+
			
 
				+        self.results.append(results)
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        self._mpii_keypoint_results_save()
			
 
				+        if self.save_prediction_only:
			
 
				+            logger.info(f'The keypoint result is saved to {self.res_file} '
			
 
				+                        'and do not evaluate the mAP.')
			
 
				+            return
			
 
				+
			
 
				+        self.eval_results = self.evaluate(self.results)
			
 
				+
			
 
				+    def _mpii_keypoint_results_save(self):
			
 
				+        results = []
			
 
				+        for res in self.results:
			
 
				+            if len(res) == 0:
			
 
				+                continue
			
 
				+            result = [{
			
 
				+                'preds': res['preds'][k].tolist(),
			
 
				+                'boxes': res['boxes'][k].tolist(),
			
 
				+                'image_path': res['image_path'][k],
			
 
				+            } for k in range(len(res))]
			
 
				+            results.extend(result)
			
 
				+        with open(self.res_file, 'w') as f:
			
 
				+            json.dump(results, f, sort_keys=True, indent=4)
			
 
				+            logger.info(f'The keypoint result is saved to {self.res_file}.')
			
 
				+
			
 
				+    def log(self):
			
 
				+        if self.save_prediction_only:
			
 
				+            return
			
 
				+        for item, value in self.eval_results.items():
			
 
				+            print("{} : {}".format(item, value))
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return self.eval_results
			
 
				+
			
 
				+    def evaluate(self, outputs, savepath=None):
			
 
				+        """Evaluate PCKh for MPII dataset. refer to
			
 
				+        https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
			
 
				+        Copyright (c) Microsoft, under the MIT License.
			
 
				+
			
 
				+        Args:
			
 
				+            outputs(list(preds, boxes)):
			
 
				+
			
 
				+                * preds (np.ndarray[N,K,3]): The first two dimensions are
			
 
				+                  coordinates, score is the third dimension of the array.
			
 
				+                * boxes (np.ndarray[N,6]): [center[0], center[1], scale[0]
			
 
				+                  , scale[1],area, score]
			
 
				+
			
 
				+        Returns:
			
 
				+            dict: PCKh for each joint
			
 
				+        """
			
 
				+
			
 
				+        kpts = []
			
 
				+        for output in outputs:
			
 
				+            preds = output['preds']
			
 
				+            batch_size = preds.shape[0]
			
 
				+            for i in range(batch_size):
			
 
				+                kpts.append({'keypoints': preds[i]})
			
 
				+
			
 
				+        preds = np.stack([kpt['keypoints'] for kpt in kpts])
			
 
				+
			
 
				+        # convert 0-based index to 1-based index,
			
 
				+        # and get the first two dimensions.
			
 
				+        preds = preds[..., :2] + 1.0
			
 
				+
			
 
				+        if savepath is not None:
			
 
				+            pred_file = os.path.join(savepath, 'pred.mat')
			
 
				+            savemat(pred_file, mdict={'preds': preds})
			
 
				+
			
 
				+        SC_BIAS = 0.6
			
 
				+        threshold = 0.5
			
 
				+
			
 
				+        gt_file = os.path.join(
			
 
				+            os.path.dirname(self.ann_file), 'mpii_gt_val.mat')
			
 
				+        gt_dict = loadmat(gt_file)
			
 
				+        dataset_joints = gt_dict['dataset_joints']
			
 
				+        jnt_missing = gt_dict['jnt_missing']
			
 
				+        pos_gt_src = gt_dict['pos_gt_src']
			
 
				+        headboxes_src = gt_dict['headboxes_src']
			
 
				+
			
 
				+        pos_pred_src = np.transpose(preds, [1, 2, 0])
			
 
				+
			
 
				+        head = np.where(dataset_joints == 'head')[1][0]
			
 
				+        lsho = np.where(dataset_joints == 'lsho')[1][0]
			
 
				+        lelb = np.where(dataset_joints == 'lelb')[1][0]
			
 
				+        lwri = np.where(dataset_joints == 'lwri')[1][0]
			
 
				+        lhip = np.where(dataset_joints == 'lhip')[1][0]
			
 
				+        lkne = np.where(dataset_joints == 'lkne')[1][0]
			
 
				+        lank = np.where(dataset_joints == 'lank')[1][0]
			
 
				+
			
 
				+        rsho = np.where(dataset_joints == 'rsho')[1][0]
			
 
				+        relb = np.where(dataset_joints == 'relb')[1][0]
			
 
				+        rwri = np.where(dataset_joints == 'rwri')[1][0]
			
 
				+        rkne = np.where(dataset_joints == 'rkne')[1][0]
			
 
				+        rank = np.where(dataset_joints == 'rank')[1][0]
			
 
				+        rhip = np.where(dataset_joints == 'rhip')[1][0]
			
 
				+
			
 
				+        jnt_visible = 1 - jnt_missing
			
 
				+        uv_error = pos_pred_src - pos_gt_src
			
 
				+        uv_err = np.linalg.norm(uv_error, axis=1)
			
 
				+        headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
			
 
				+        headsizes = np.linalg.norm(headsizes, axis=0)
			
 
				+        headsizes *= SC_BIAS
			
 
				+        scale = headsizes * np.ones((len(uv_err), 1), dtype=np.float32)
			
 
				+        scaled_uv_err = uv_err / scale
			
 
				+        scaled_uv_err = scaled_uv_err * jnt_visible
			
 
				+        jnt_count = np.sum(jnt_visible, axis=1)
			
 
				+        less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
			
 
				+        PCKh = 100. * np.sum(less_than_threshold, axis=1) / jnt_count
			
 
				+
			
 
				+        # save
			
 
				+        rng = np.arange(0, 0.5 + 0.01, 0.01)
			
 
				+        pckAll = np.zeros((len(rng), 16), dtype=np.float32)
			
 
				+
			
 
				+        for r, threshold in enumerate(rng):
			
 
				+            less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
			
 
				+            pckAll[r, :] = 100. * np.sum(less_than_threshold,
			
 
				+                                         axis=1) / jnt_count
			
 
				+
			
 
				+        PCKh = np.ma.array(PCKh, mask=False)
			
 
				+        PCKh.mask[6:8] = True
			
 
				+
			
 
				+        jnt_count = np.ma.array(jnt_count, mask=False)
			
 
				+        jnt_count.mask[6:8] = True
			
 
				+        jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
			
 
				+
			
 
				+        name_value = [  #noqa
			
 
				+            ('Head', PCKh[head]),
			
 
				+            ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
			
 
				+            ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
			
 
				+            ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
			
 
				+            ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
			
 
				+            ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
			
 
				+            ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
			
 
				+            ('PCKh', np.sum(PCKh * jnt_ratio)),
			
 
				+            ('PCKh@0.1', np.sum(pckAll[11, :] * jnt_ratio))
			
 
				+        ]
			
 
				+        name_value = OrderedDict(name_value)
			
 
				+
			
 
				+        return name_value
			
 
				+
			
 
				+    def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
			
 
				+        """sort kpts and remove the repeated ones."""
			
 
				+        kpts = sorted(kpts, key=lambda x: x[key])
			
 
				+        num = len(kpts)
			
 
				+        for i in range(num - 1, 0, -1):
			
 
				+            if kpts[i][key] == kpts[i - 1][key]:
			
 
				+                del kpts[i]
			
 
				+
			
 
				+        return kpts
			
--- a/paddlers/models/ppdet/metrics/map_utils.py
+++ b/paddlers/models/ppdet/metrics/map_utils.py
@@ -0,0 +1,444 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+from __future__ import unicode_literals
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import numpy as np
			
 
				+import itertools
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.modeling.bbox_utils import poly2rbox, rbox2poly_np
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = [
			
 
				+    'draw_pr_curve',
			
 
				+    'bbox_area',
			
 
				+    'jaccard_overlap',
			
 
				+    'prune_zero_padding',
			
 
				+    'DetectionMAP',
			
 
				+    'ap_per_class',
			
 
				+    'compute_ap',
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def draw_pr_curve(precision,
			
 
				+                  recall,
			
 
				+                  iou=0.5,
			
 
				+                  out_dir='pr_curve',
			
 
				+                  file_name='precision_recall_curve.jpg'):
			
 
				+    if not os.path.exists(out_dir):
			
 
				+        os.makedirs(out_dir)
			
 
				+    output_path = os.path.join(out_dir, file_name)
			
 
				+    try:
			
 
				+        import matplotlib.pyplot as plt
			
 
				+    except Exception as e:
			
 
				+        logger.error('Matplotlib not found, plaese install matplotlib.'
			
 
				+                     'for example: `pip install matplotlib`.')
			
 
				+        raise e
			
 
				+    plt.cla()
			
 
				+    plt.figure('P-R Curve')
			
 
				+    plt.title('Precision/Recall Curve(IoU={})'.format(iou))
			
 
				+    plt.xlabel('Recall')
			
 
				+    plt.ylabel('Precision')
			
 
				+    plt.grid(True)
			
 
				+    plt.plot(recall, precision)
			
 
				+    plt.savefig(output_path)
			
 
				+
			
 
				+
			
 
				+def bbox_area(bbox, is_bbox_normalized):
			
 
				+    """
			
 
				+    Calculate area of a bounding box
			
 
				+    """
			
 
				+    norm = 1. - float(is_bbox_normalized)
			
 
				+    width = bbox[2] - bbox[0] + norm
			
 
				+    height = bbox[3] - bbox[1] + norm
			
 
				+    return width * height
			
 
				+
			
 
				+
			
 
				+def jaccard_overlap(pred, gt, is_bbox_normalized=False):
			
 
				+    """
			
 
				+    Calculate jaccard overlap ratio between two bounding box
			
 
				+    """
			
 
				+    if pred[0] >= gt[2] or pred[2] <= gt[0] or \
			
 
				+        pred[1] >= gt[3] or pred[3] <= gt[1]:
			
 
				+        return 0.
			
 
				+    inter_xmin = max(pred[0], gt[0])
			
 
				+    inter_ymin = max(pred[1], gt[1])
			
 
				+    inter_xmax = min(pred[2], gt[2])
			
 
				+    inter_ymax = min(pred[3], gt[3])
			
 
				+    inter_size = bbox_area([inter_xmin, inter_ymin, inter_xmax, inter_ymax],
			
 
				+                           is_bbox_normalized)
			
 
				+    pred_size = bbox_area(pred, is_bbox_normalized)
			
 
				+    gt_size = bbox_area(gt, is_bbox_normalized)
			
 
				+    overlap = float(inter_size) / (pred_size + gt_size - inter_size)
			
 
				+    return overlap
			
 
				+
			
 
				+
			
 
				+def calc_rbox_iou(pred, gt_rbox):
			
 
				+    """
			
 
				+    calc iou between rotated bbox
			
 
				+    """
			
 
				+    # calc iou of bounding box for speedup
			
 
				+    pred = np.array(pred, np.float32).reshape(-1, 8)
			
 
				+    pred = pred.reshape(-1, 2)
			
 
				+    gt_poly = rbox2poly_np(np.array(gt_rbox).reshape(-1, 5))[0]
			
 
				+    gt_poly = gt_poly.reshape(-1, 2)
			
 
				+    pred_rect = [
			
 
				+        np.min(pred[:, 0]), np.min(pred[:, 1]), np.max(pred[:, 0]),
			
 
				+        np.max(pred[:, 1])
			
 
				+    ]
			
 
				+    gt_rect = [
			
 
				+        np.min(gt_poly[:, 0]), np.min(gt_poly[:, 1]), np.max(gt_poly[:, 0]),
			
 
				+        np.max(gt_poly[:, 1])
			
 
				+    ]
			
 
				+    iou = jaccard_overlap(pred_rect, gt_rect, False)
			
 
				+
			
 
				+    if iou <= 0:
			
 
				+        return iou
			
 
				+
			
 
				+    # calc rbox iou
			
 
				+    pred = pred.reshape(-1, 8)
			
 
				+
			
 
				+    pred = np.array(pred, np.float32).reshape(-1, 8)
			
 
				+    pred_rbox = poly2rbox(pred)
			
 
				+    pred_rbox = pred_rbox.reshape(-1, 5)
			
 
				+    pred_rbox = pred_rbox.reshape(-1, 5)
			
 
				+    try:
			
 
				+        from rbox_iou_ops import rbox_iou
			
 
				+    except Exception as e:
			
 
				+        print("import custom_ops error, try install rbox_iou_ops " \
			
 
				+                  "following ppdet/ext_op/README.md", e)
			
 
				+        sys.stdout.flush()
			
 
				+        sys.exit(-1)
			
 
				+    gt_rbox = np.array(gt_rbox, np.float32).reshape(-1, 5)
			
 
				+    pd_gt_rbox = paddle.to_tensor(gt_rbox, dtype='float32')
			
 
				+    pd_pred_rbox = paddle.to_tensor(pred_rbox, dtype='float32')
			
 
				+    iou = rbox_iou(pd_gt_rbox, pd_pred_rbox)
			
 
				+    iou = iou.numpy()
			
 
				+    return iou[0][0]
			
 
				+
			
 
				+
			
 
				+def prune_zero_padding(gt_box, gt_label, difficult=None):
			
 
				+    valid_cnt = 0
			
 
				+    for i in range(len(gt_box)):
			
 
				+        if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \
			
 
				+                gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
			
 
				+            break
			
 
				+        valid_cnt += 1
			
 
				+    return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
			
 
				+            if difficult is not None else None)
			
 
				+
			
 
				+
			
 
				+class DetectionMAP(object):
			
 
				+    """
			
 
				+    Calculate detection mean average precision.
			
 
				+    Currently support two types: 11point and integral
			
 
				+
			
 
				+    Args:
			
 
				+        class_num (int): The class number.
			
 
				+        overlap_thresh (float): The threshold of overlap
			
 
				+            ratio between prediction bounding box and
			
 
				+            ground truth bounding box for deciding
			
 
				+            true/false positive. Default 0.5.
			
 
				+        map_type (str): Calculation method of mean average
			
 
				+            precision, currently support '11point' and
			
 
				+            'integral'. Default '11point'.
			
 
				+        is_bbox_normalized (bool): Whether bounding boxes
			
 
				+            is normalized to range[0, 1]. Default False.
			
 
				+        evaluate_difficult (bool): Whether to evaluate
			
 
				+            difficult bounding boxes. Default False.
			
 
				+        catid2name (dict): Mapping between category id and category name.
			
 
				+        classwise (bool): Whether per-category AP and draw
			
 
				+            P-R Curve or not.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 class_num,
			
 
				+                 overlap_thresh=0.5,
			
 
				+                 map_type='11point',
			
 
				+                 is_bbox_normalized=False,
			
 
				+                 evaluate_difficult=False,
			
 
				+                 catid2name=None,
			
 
				+                 classwise=False):
			
 
				+        self.class_num = class_num
			
 
				+        self.overlap_thresh = overlap_thresh
			
 
				+        assert map_type in ['11point', 'integral'], \
			
 
				+                "map_type currently only support '11point' "\
			
 
				+                "and 'integral'"
			
 
				+        self.map_type = map_type
			
 
				+        self.is_bbox_normalized = is_bbox_normalized
			
 
				+        self.evaluate_difficult = evaluate_difficult
			
 
				+        self.classwise = classwise
			
 
				+        self.classes = []
			
 
				+        for cname in catid2name.values():
			
 
				+            self.classes.append(cname)
			
 
				+        self.reset()
			
 
				+
			
 
				+    def update(self, bbox, score, label, gt_box, gt_label, difficult=None):
			
 
				+        """
			
 
				+        Update metric statics from given prediction and ground
			
 
				+        truth infomations.
			
 
				+        """
			
 
				+        if difficult is None:
			
 
				+            difficult = np.zeros_like(gt_label)
			
 
				+
			
 
				+        # record class gt count
			
 
				+        for gtl, diff in zip(gt_label, difficult):
			
 
				+            if self.evaluate_difficult or int(diff) == 0:
			
 
				+                self.class_gt_counts[int(np.array(gtl))] += 1
			
 
				+
			
 
				+        # record class score positive
			
 
				+        visited = [False] * len(gt_label)
			
 
				+        for b, s, l in zip(bbox, score, label):
			
 
				+            pred = b.tolist() if isinstance(b, np.ndarray) else b
			
 
				+            max_idx = -1
			
 
				+            max_overlap = -1.0
			
 
				+            for i, gl in enumerate(gt_label):
			
 
				+                if int(gl) == int(l):
			
 
				+                    if len(gt_box[i]) == 5:
			
 
				+                        overlap = calc_rbox_iou(pred, gt_box[i])
			
 
				+                    else:
			
 
				+                        overlap = jaccard_overlap(pred, gt_box[i],
			
 
				+                                                  self.is_bbox_normalized)
			
 
				+                    if overlap > max_overlap:
			
 
				+                        max_overlap = overlap
			
 
				+                        max_idx = i
			
 
				+
			
 
				+            if max_overlap > self.overlap_thresh:
			
 
				+                if self.evaluate_difficult or \
			
 
				+                        int(np.array(difficult[max_idx])) == 0:
			
 
				+                    if not visited[max_idx]:
			
 
				+                        self.class_score_poss[int(l)].append([s, 1.0])
			
 
				+                        visited[max_idx] = True
			
 
				+                    else:
			
 
				+                        self.class_score_poss[int(l)].append([s, 0.0])
			
 
				+            else:
			
 
				+                self.class_score_poss[int(l)].append([s, 0.0])
			
 
				+
			
 
				+    def reset(self):
			
 
				+        """
			
 
				+        Reset metric statics
			
 
				+        """
			
 
				+        self.class_score_poss = [[] for _ in range(self.class_num)]
			
 
				+        self.class_gt_counts = [0] * self.class_num
			
 
				+        self.mAP = 0.0
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        """
			
 
				+        Accumulate metric results and calculate mAP
			
 
				+        """
			
 
				+        mAP = 0.
			
 
				+        valid_cnt = 0
			
 
				+        eval_results = []
			
 
				+        for score_pos, count in zip(self.class_score_poss,
			
 
				+                                    self.class_gt_counts):
			
 
				+            if count == 0: continue
			
 
				+            if len(score_pos) == 0:
			
 
				+                valid_cnt += 1
			
 
				+                continue
			
 
				+
			
 
				+            accum_tp_list, accum_fp_list = \
			
 
				+                    self._get_tp_fp_accum(score_pos)
			
 
				+            precision = []
			
 
				+            recall = []
			
 
				+            for ac_tp, ac_fp in zip(accum_tp_list, accum_fp_list):
			
 
				+                precision.append(float(ac_tp) / (ac_tp + ac_fp))
			
 
				+                recall.append(float(ac_tp) / count)
			
 
				+
			
 
				+            one_class_ap = 0.0
			
 
				+            if self.map_type == '11point':
			
 
				+                max_precisions = [0.] * 11
			
 
				+                start_idx = len(precision) - 1
			
 
				+                for j in range(10, -1, -1):
			
 
				+                    for i in range(start_idx, -1, -1):
			
 
				+                        if recall[i] < float(j) / 10.:
			
 
				+                            start_idx = i
			
 
				+                            if j > 0:
			
 
				+                                max_precisions[j - 1] = max_precisions[j]
			
 
				+                                break
			
 
				+                        else:
			
 
				+                            if max_precisions[j] < precision[i]:
			
 
				+                                max_precisions[j] = precision[i]
			
 
				+                one_class_ap = sum(max_precisions) / 11.
			
 
				+                mAP += one_class_ap
			
 
				+                valid_cnt += 1
			
 
				+            elif self.map_type == 'integral':
			
 
				+                import math
			
 
				+                prev_recall = 0.
			
 
				+                for i in range(len(precision)):
			
 
				+                    recall_gap = math.fabs(recall[i] - prev_recall)
			
 
				+                    if recall_gap > 1e-6:
			
 
				+                        one_class_ap += precision[i] * recall_gap
			
 
				+                        prev_recall = recall[i]
			
 
				+                mAP += one_class_ap
			
 
				+                valid_cnt += 1
			
 
				+            else:
			
 
				+                logger.error("Unspported mAP type {}".format(self.map_type))
			
 
				+                sys.exit(1)
			
 
				+            eval_results.append({
			
 
				+                'class': self.classes[valid_cnt - 1],
			
 
				+                'ap': one_class_ap,
			
 
				+                'precision': precision,
			
 
				+                'recall': recall,
			
 
				+            })
			
 
				+        self.eval_results = eval_results
			
 
				+        self.mAP = mAP / float(valid_cnt) if valid_cnt > 0 else mAP
			
 
				+
			
 
				+    def get_map(self):
			
 
				+        """
			
 
				+        Get mAP result
			
 
				+        """
			
 
				+        if self.mAP is None:
			
 
				+            logger.error("mAP is not calculated.")
			
 
				+        if self.classwise:
			
 
				+            # Compute per-category AP and PR curve
			
 
				+            try:
			
 
				+                from terminaltables import AsciiTable
			
 
				+            except Exception as e:
			
 
				+                logger.error(
			
 
				+                    'terminaltables not found, plaese install terminaltables. '
			
 
				+                    'for example: `pip install terminaltables`.')
			
 
				+                raise e
			
 
				+            results_per_category = []
			
 
				+            for eval_result in self.eval_results:
			
 
				+                results_per_category.append(
			
 
				+                    (str(eval_result['class']),
			
 
				+                     '{:0.3f}'.format(float(eval_result['ap']))))
			
 
				+                draw_pr_curve(
			
 
				+                    eval_result['precision'],
			
 
				+                    eval_result['recall'],
			
 
				+                    out_dir='voc_pr_curve',
			
 
				+                    file_name='{}_precision_recall_curve.jpg'.format(
			
 
				+                        eval_result['class']))
			
 
				+
			
 
				+            num_columns = min(6, len(results_per_category) * 2)
			
 
				+            results_flatten = list(itertools.chain(*results_per_category))
			
 
				+            headers = ['category', 'AP'] * (num_columns // 2)
			
 
				+            results_2d = itertools.zip_longest(*[
			
 
				+                results_flatten[i::num_columns] for i in range(num_columns)
			
 
				+            ])
			
 
				+            table_data = [headers]
			
 
				+            table_data += [result for result in results_2d]
			
 
				+            table = AsciiTable(table_data)
			
 
				+            logger.info('Per-category of VOC AP: \n{}'.format(table.table))
			
 
				+            logger.info(
			
 
				+                "per-category PR curve has output to voc_pr_curve folder.")
			
 
				+        return self.mAP
			
 
				+
			
 
				+    def _get_tp_fp_accum(self, score_pos_list):
			
 
				+        """
			
 
				+        Calculate accumulating true/false positive results from
			
 
				+        [score, pos] records
			
 
				+        """
			
 
				+        sorted_list = sorted(score_pos_list, key=lambda s: s[0], reverse=True)
			
 
				+        accum_tp = 0
			
 
				+        accum_fp = 0
			
 
				+        accum_tp_list = []
			
 
				+        accum_fp_list = []
			
 
				+        for (score, pos) in sorted_list:
			
 
				+            accum_tp += int(pos)
			
 
				+            accum_tp_list.append(accum_tp)
			
 
				+            accum_fp += 1 - int(pos)
			
 
				+            accum_fp_list.append(accum_fp)
			
 
				+        return accum_tp_list, accum_fp_list
			
 
				+
			
 
				+
			
 
				+def ap_per_class(tp, conf, pred_cls, target_cls):
			
 
				+    """
			
 
				+    Computes the average precision, given the recall and precision curves.
			
 
				+    Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
			
 
				+
			
 
				+    Args:
			
 
				+        tp (list): True positives.
			
 
				+        conf (list): Objectness value from 0-1.
			
 
				+        pred_cls (list): Predicted object classes.
			
 
				+        target_cls (list): Target object classes.
			
 
				+    """
			
 
				+    tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(
			
 
				+        pred_cls), np.array(target_cls)
			
 
				+
			
 
				+    # Sort by objectness
			
 
				+    i = np.argsort(-conf)
			
 
				+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
			
 
				+
			
 
				+    # Find unique classes
			
 
				+    unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
			
 
				+
			
 
				+    # Create Precision-Recall curve and compute AP for each class
			
 
				+    ap, p, r = [], [], []
			
 
				+    for c in unique_classes:
			
 
				+        i = pred_cls == c
			
 
				+        n_gt = sum(target_cls == c)  # Number of ground truth objects
			
 
				+        n_p = sum(i)  # Number of predicted objects
			
 
				+
			
 
				+        if (n_p == 0) and (n_gt == 0):
			
 
				+            continue
			
 
				+        elif (n_p == 0) or (n_gt == 0):
			
 
				+            ap.append(0)
			
 
				+            r.append(0)
			
 
				+            p.append(0)
			
 
				+        else:
			
 
				+            # Accumulate FPs and TPs
			
 
				+            fpc = np.cumsum(1 - tp[i])
			
 
				+            tpc = np.cumsum(tp[i])
			
 
				+
			
 
				+            # Recall
			
 
				+            recall_curve = tpc / (n_gt + 1e-16)
			
 
				+            r.append(tpc[-1] / (n_gt + 1e-16))
			
 
				+
			
 
				+            # Precision
			
 
				+            precision_curve = tpc / (tpc + fpc)
			
 
				+            p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
			
 
				+
			
 
				+            # AP from recall-precision curve
			
 
				+            ap.append(compute_ap(recall_curve, precision_curve))
			
 
				+
			
 
				+    return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(
			
 
				+        p)
			
 
				+
			
 
				+
			
 
				+def compute_ap(recall, precision):
			
 
				+    """
			
 
				+    Computes the average precision, given the recall and precision curves.
			
 
				+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
			
 
				+
			
 
				+    Args:
			
 
				+        recall (list): The recall curve.
			
 
				+        precision (list): The precision curve.
			
 
				+
			
 
				+    Returns:
			
 
				+        The average precision as computed in py-faster-rcnn.
			
 
				+    """
			
 
				+    # correct AP calculation
			
 
				+    # first append sentinel values at the end
			
 
				+    mrec = np.concatenate(([0.], recall, [1.]))
			
 
				+    mpre = np.concatenate(([0.], precision, [0.]))
			
 
				+
			
 
				+    # compute the precision envelope
			
 
				+    for i in range(mpre.size - 1, 0, -1):
			
 
				+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
			
 
				+
			
 
				+    # to calculate area under PR curve, look for points
			
 
				+    # where X axis (recall) changes value
			
 
				+    i = np.where(mrec[1:] != mrec[:-1])[0]
			
 
				+
			
 
				+    # and sum (\Delta recall) * prec
			
 
				+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
			
 
				+    return ap
			
--- a/paddlers/models/ppdet/metrics/mcmot_metrics.py
+++ b/paddlers/models/ppdet/metrics/mcmot_metrics.py
@@ -0,0 +1,470 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import copy
			
 
				+import sys
			
 
				+import math
			
 
				+from collections import defaultdict
			
 
				+from motmetrics.math_util import quiet_divide
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn.functional as F
			
 
				+from .metrics import Metric
			
 
				+import motmetrics as mm
			
 
				+import openpyxl
			
 
				+metrics = mm.metrics.motchallenge_metrics
			
 
				+mh = mm.metrics.create()
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = ['MCMOTEvaluator', 'MCMOTMetric']
			
 
				+
			
 
				+METRICS_LIST = [
			
 
				+    'num_frames', 'num_matches', 'num_switches', 'num_transfer', 'num_ascend',
			
 
				+    'num_migrate', 'num_false_positives', 'num_misses', 'num_detections',
			
 
				+    'num_objects', 'num_predictions', 'num_unique_objects', 'mostly_tracked',
			
 
				+    'partially_tracked', 'mostly_lost', 'num_fragmentations', 'motp', 'mota',
			
 
				+    'precision', 'recall', 'idfp', 'idfn', 'idtp', 'idp', 'idr', 'idf1'
			
 
				+]
			
 
				+
			
 
				+NAME_MAP = {
			
 
				+    'num_frames': 'num_frames',
			
 
				+    'num_matches': 'num_matches',
			
 
				+    'num_switches': 'IDs',
			
 
				+    'num_transfer': 'IDt',
			
 
				+    'num_ascend': 'IDa',
			
 
				+    'num_migrate': 'IDm',
			
 
				+    'num_false_positives': 'FP',
			
 
				+    'num_misses': 'FN',
			
 
				+    'num_detections': 'num_detections',
			
 
				+    'num_objects': 'num_objects',
			
 
				+    'num_predictions': 'num_predictions',
			
 
				+    'num_unique_objects': 'GT',
			
 
				+    'mostly_tracked': 'MT',
			
 
				+    'partially_tracked': 'partially_tracked',
			
 
				+    'mostly_lost': 'ML',
			
 
				+    'num_fragmentations': 'FM',
			
 
				+    'motp': 'MOTP',
			
 
				+    'mota': 'MOTA',
			
 
				+    'precision': 'Prcn',
			
 
				+    'recall': 'Rcll',
			
 
				+    'idfp': 'idfp',
			
 
				+    'idfn': 'idfn',
			
 
				+    'idtp': 'idtp',
			
 
				+    'idp': 'IDP',
			
 
				+    'idr': 'IDR',
			
 
				+    'idf1': 'IDF1'
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def parse_accs_metrics(seq_acc, index_name, verbose=False):
			
 
				+    """
			
 
				+    Parse the evaluation indicators of multiple MOTAccumulator
			
 
				+    """
			
 
				+    mh = mm.metrics.create()
			
 
				+    summary = MCMOTEvaluator.get_summary(seq_acc, index_name, METRICS_LIST)
			
 
				+    summary.loc['OVERALL', 'motp'] = (summary['motp'] * summary['num_detections']).sum() / \
			
 
				+                                     summary.loc['OVERALL', 'num_detections']
			
 
				+    if verbose:
			
 
				+        strsummary = mm.io.render_summary(
			
 
				+            summary, formatters=mh.formatters, namemap=NAME_MAP)
			
 
				+        print(strsummary)
			
 
				+
			
 
				+    return summary
			
 
				+
			
 
				+
			
 
				+def seqs_overall_metrics(summary_df, verbose=False):
			
 
				+    """
			
 
				+    Calculate overall metrics for multiple sequences
			
 
				+    """
			
 
				+    add_col = [
			
 
				+        'num_frames', 'num_matches', 'num_switches', 'num_transfer',
			
 
				+        'num_ascend', 'num_migrate', 'num_false_positives', 'num_misses',
			
 
				+        'num_detections', 'num_objects', 'num_predictions',
			
 
				+        'num_unique_objects', 'mostly_tracked', 'partially_tracked',
			
 
				+        'mostly_lost', 'num_fragmentations', 'idfp', 'idfn', 'idtp'
			
 
				+    ]
			
 
				+    calc_col = ['motp', 'mota', 'precision', 'recall', 'idp', 'idr', 'idf1']
			
 
				+    calc_df = summary_df.copy()
			
 
				+
			
 
				+    overall_dic = {}
			
 
				+    for col in add_col:
			
 
				+        overall_dic[col] = calc_df[col].sum()
			
 
				+
			
 
				+    for col in calc_col:
			
 
				+        overall_dic[col] = getattr(MCMOTMetricOverall, col + '_overall')(
			
 
				+            calc_df, overall_dic)
			
 
				+
			
 
				+    overall_df = pd.DataFrame(overall_dic, index=['overall_calc'])
			
 
				+    calc_df = pd.concat([calc_df, overall_df])
			
 
				+
			
 
				+    if verbose:
			
 
				+        mh = mm.metrics.create()
			
 
				+        str_calc_df = mm.io.render_summary(
			
 
				+            calc_df, formatters=mh.formatters, namemap=NAME_MAP)
			
 
				+        print(str_calc_df)
			
 
				+
			
 
				+    return calc_df
			
 
				+
			
 
				+
			
 
				+class MCMOTMetricOverall(object):
			
 
				+    def motp_overall(summary_df, overall_dic):
			
 
				+        motp = quiet_divide((summary_df['motp'] *
			
 
				+                             summary_df['num_detections']).sum(),
			
 
				+                            overall_dic['num_detections'])
			
 
				+        return motp
			
 
				+
			
 
				+    def mota_overall(summary_df, overall_dic):
			
 
				+        del summary_df
			
 
				+        mota = 1. - quiet_divide(
			
 
				+            (overall_dic['num_misses'] + overall_dic['num_switches'] +
			
 
				+             overall_dic['num_false_positives']), overall_dic['num_objects'])
			
 
				+        return mota
			
 
				+
			
 
				+    def precision_overall(summary_df, overall_dic):
			
 
				+        del summary_df
			
 
				+        precision = quiet_divide(overall_dic['num_detections'], (
			
 
				+            overall_dic['num_false_positives'] + overall_dic['num_detections']
			
 
				+        ))
			
 
				+        return precision
			
 
				+
			
 
				+    def recall_overall(summary_df, overall_dic):
			
 
				+        del summary_df
			
 
				+        recall = quiet_divide(overall_dic['num_detections'],
			
 
				+                              overall_dic['num_objects'])
			
 
				+        return recall
			
 
				+
			
 
				+    def idp_overall(summary_df, overall_dic):
			
 
				+        del summary_df
			
 
				+        idp = quiet_divide(overall_dic['idtp'],
			
 
				+                           (overall_dic['idtp'] + overall_dic['idfp']))
			
 
				+        return idp
			
 
				+
			
 
				+    def idr_overall(summary_df, overall_dic):
			
 
				+        del summary_df
			
 
				+        idr = quiet_divide(overall_dic['idtp'],
			
 
				+                           (overall_dic['idtp'] + overall_dic['idfn']))
			
 
				+        return idr
			
 
				+
			
 
				+    def idf1_overall(summary_df, overall_dic):
			
 
				+        del summary_df
			
 
				+        idf1 = quiet_divide(2. * overall_dic['idtp'], (
			
 
				+            overall_dic['num_objects'] + overall_dic['num_predictions']))
			
 
				+        return idf1
			
 
				+
			
 
				+
			
 
				+def read_mcmot_results_union(filename, is_gt, is_ignore):
			
 
				+    results_dict = dict()
			
 
				+    if os.path.isfile(filename):
			
 
				+        all_result = np.loadtxt(filename, delimiter=',')
			
 
				+        if all_result.shape[0] == 0 or all_result.shape[1] < 7:
			
 
				+            return results_dict
			
 
				+        if is_ignore:
			
 
				+            return results_dict
			
 
				+        if is_gt:
			
 
				+            # only for test use
			
 
				+            all_result = all_result[all_result[:, 7] != 0]
			
 
				+            all_result[:, 7] = all_result[:, 7] - 1
			
 
				+
			
 
				+        if all_result.shape[0] == 0:
			
 
				+            return results_dict
			
 
				+
			
 
				+        class_unique = np.unique(all_result[:, 7])
			
 
				+
			
 
				+        last_max_id = 0
			
 
				+        result_cls_list = []
			
 
				+        for cls in class_unique:
			
 
				+            result_cls_split = all_result[all_result[:, 7] == cls]
			
 
				+            result_cls_split[:, 1] = result_cls_split[:, 1] + last_max_id
			
 
				+            # make sure track id different between every category
			
 
				+            last_max_id = max(np.unique(result_cls_split[:, 1])) + 1
			
 
				+            result_cls_list.append(result_cls_split)
			
 
				+
			
 
				+        results_con = np.concatenate(result_cls_list)
			
 
				+
			
 
				+        for line in range(len(results_con)):
			
 
				+            linelist = results_con[line]
			
 
				+            fid = int(linelist[0])
			
 
				+            if fid < 1:
			
 
				+                continue
			
 
				+            results_dict.setdefault(fid, list())
			
 
				+
			
 
				+            if is_gt:
			
 
				+                score = 1
			
 
				+            else:
			
 
				+                score = float(linelist[6])
			
 
				+
			
 
				+            tlwh = tuple(map(float, linelist[2:6]))
			
 
				+            target_id = int(linelist[1])
			
 
				+            cls = int(linelist[7])
			
 
				+
			
 
				+            results_dict[fid].append((tlwh, target_id, cls, score))
			
 
				+
			
 
				+        return results_dict
			
 
				+
			
 
				+
			
 
				+def read_mcmot_results(filename, is_gt, is_ignore):
			
 
				+    results_dict = dict()
			
 
				+    if os.path.isfile(filename):
			
 
				+        with open(filename, 'r') as f:
			
 
				+            for line in f.readlines():
			
 
				+                linelist = line.strip().split(',')
			
 
				+                if len(linelist) < 7:
			
 
				+                    continue
			
 
				+                fid = int(linelist[0])
			
 
				+                if fid < 1:
			
 
				+                    continue
			
 
				+                cid = int(linelist[7])
			
 
				+                if is_gt:
			
 
				+                    score = 1
			
 
				+                    # only for test use
			
 
				+                    cid -= 1
			
 
				+                else:
			
 
				+                    score = float(linelist[6])
			
 
				+
			
 
				+                cls_result_dict = results_dict.setdefault(cid, dict())
			
 
				+                cls_result_dict.setdefault(fid, list())
			
 
				+
			
 
				+                tlwh = tuple(map(float, linelist[2:6]))
			
 
				+                target_id = int(linelist[1])
			
 
				+                cls_result_dict[fid].append((tlwh, target_id, score))
			
 
				+    return results_dict
			
 
				+
			
 
				+
			
 
				+def read_results(filename,
			
 
				+                 data_type,
			
 
				+                 is_gt=False,
			
 
				+                 is_ignore=False,
			
 
				+                 multi_class=False,
			
 
				+                 union=False):
			
 
				+    if data_type in ['mcmot', 'lab']:
			
 
				+        if multi_class:
			
 
				+            if union:
			
 
				+                # The results are evaluated by union all the categories.
			
 
				+                # Track IDs between different categories cannot be duplicate.
			
 
				+                read_fun = read_mcmot_results_union
			
 
				+            else:
			
 
				+                # The results are evaluated separately by category.
			
 
				+                read_fun = read_mcmot_results
			
 
				+        else:
			
 
				+            raise ValueError('multi_class: {}, MCMOT should have cls_id.'.
			
 
				+                             format(multi_class))
			
 
				+    else:
			
 
				+        raise ValueError('Unknown data type: {}'.format(data_type))
			
 
				+
			
 
				+    return read_fun(filename, is_gt, is_ignore)
			
 
				+
			
 
				+
			
 
				+def unzip_objs(objs):
			
 
				+    if len(objs) > 0:
			
 
				+        tlwhs, ids, scores = zip(*objs)
			
 
				+    else:
			
 
				+        tlwhs, ids, scores = [], [], []
			
 
				+    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
			
 
				+    return tlwhs, ids, scores
			
 
				+
			
 
				+
			
 
				+def unzip_objs_cls(objs):
			
 
				+    if len(objs) > 0:
			
 
				+        tlwhs, ids, cls, scores = zip(*objs)
			
 
				+    else:
			
 
				+        tlwhs, ids, cls, scores = [], [], [], []
			
 
				+    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
			
 
				+    ids = np.array(ids)
			
 
				+    cls = np.array(cls)
			
 
				+    scores = np.array(scores)
			
 
				+    return tlwhs, ids, cls, scores
			
 
				+
			
 
				+
			
 
				+class MCMOTEvaluator(object):
			
 
				+    def __init__(self, data_root, seq_name, data_type, num_classes):
			
 
				+        self.data_root = data_root
			
 
				+        self.seq_name = seq_name
			
 
				+        self.data_type = data_type
			
 
				+        self.num_classes = num_classes
			
 
				+
			
 
				+        self.load_annotations()
			
 
				+        self.reset_accumulator()
			
 
				+
			
 
				+        self.class_accs = []
			
 
				+
			
 
				+    def load_annotations(self):
			
 
				+        assert self.data_type == 'mcmot'
			
 
				+        self.gt_filename = os.path.join(self.data_root, '../', '../',
			
 
				+                                        'sequences',
			
 
				+                                        '{}.txt'.format(self.seq_name))
			
 
				+
			
 
				+    def reset_accumulator(self):
			
 
				+        import motmetrics as mm
			
 
				+        mm.lap.default_solver = 'lap'
			
 
				+        self.acc = mm.MOTAccumulator(auto_id=True)
			
 
				+
			
 
				+    def eval_frame_dict(self, trk_objs, gt_objs, rtn_events=False,
			
 
				+                        union=False):
			
 
				+        import motmetrics as mm
			
 
				+        mm.lap.default_solver = 'lap'
			
 
				+        if union:
			
 
				+            trk_tlwhs, trk_ids, trk_cls = unzip_objs_cls(trk_objs)[:3]
			
 
				+            gt_tlwhs, gt_ids, gt_cls = unzip_objs_cls(gt_objs)[:3]
			
 
				+
			
 
				+            # get distance matrix
			
 
				+            iou_distance = mm.distances.iou_matrix(
			
 
				+                gt_tlwhs, trk_tlwhs, max_iou=0.5)
			
 
				+
			
 
				+            # Set the distance between objects of different categories to nan
			
 
				+            gt_cls_len = len(gt_cls)
			
 
				+            trk_cls_len = len(trk_cls)
			
 
				+            # When the number of GT or Trk is 0, iou_distance dimension is (0,0)
			
 
				+            if gt_cls_len != 0 and trk_cls_len != 0:
			
 
				+                gt_cls = gt_cls.reshape(gt_cls_len, 1)
			
 
				+                gt_cls = np.repeat(gt_cls, trk_cls_len, axis=1)
			
 
				+                trk_cls = trk_cls.reshape(1, trk_cls_len)
			
 
				+                trk_cls = np.repeat(trk_cls, gt_cls_len, axis=0)
			
 
				+                iou_distance = np.where(gt_cls == trk_cls, iou_distance,
			
 
				+                                        np.nan)
			
 
				+
			
 
				+        else:
			
 
				+            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
			
 
				+            gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
			
 
				+
			
 
				+            # get distance matrix
			
 
				+            iou_distance = mm.distances.iou_matrix(
			
 
				+                gt_tlwhs, trk_tlwhs, max_iou=0.5)
			
 
				+
			
 
				+        self.acc.update(gt_ids, trk_ids, iou_distance)
			
 
				+
			
 
				+        if rtn_events and iou_distance.size > 0 and hasattr(self.acc,
			
 
				+                                                            'mot_events'):
			
 
				+            events = self.acc.mot_events  # only supported by https://github.com/longcw/py-motmetrics
			
 
				+        else:
			
 
				+            events = None
			
 
				+        return events
			
 
				+
			
 
				+    def eval_file(self, result_filename):
			
 
				+        # evaluation of each category
			
 
				+        gt_frame_dict = read_results(
			
 
				+            self.gt_filename,
			
 
				+            self.data_type,
			
 
				+            is_gt=True,
			
 
				+            multi_class=True,
			
 
				+            union=False)
			
 
				+        result_frame_dict = read_results(
			
 
				+            result_filename,
			
 
				+            self.data_type,
			
 
				+            is_gt=False,
			
 
				+            multi_class=True,
			
 
				+            union=False)
			
 
				+
			
 
				+        for cid in range(self.num_classes):
			
 
				+            self.reset_accumulator()
			
 
				+            cls_result_frame_dict = result_frame_dict.setdefault(cid, dict())
			
 
				+            cls_gt_frame_dict = gt_frame_dict.setdefault(cid, dict())
			
 
				+
			
 
				+            # only labeled frames will be evaluated
			
 
				+            frames = sorted(list(set(cls_gt_frame_dict.keys())))
			
 
				+
			
 
				+            for frame_id in frames:
			
 
				+                trk_objs = cls_result_frame_dict.get(frame_id, [])
			
 
				+                gt_objs = cls_gt_frame_dict.get(frame_id, [])
			
 
				+                self.eval_frame_dict(trk_objs, gt_objs, rtn_events=False)
			
 
				+
			
 
				+            self.class_accs.append(self.acc)
			
 
				+
			
 
				+        return self.class_accs
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def get_summary(accs,
			
 
				+                    names,
			
 
				+                    metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
			
 
				+                             'precision', 'recall')):
			
 
				+        import motmetrics as mm
			
 
				+        mm.lap.default_solver = 'lap'
			
 
				+
			
 
				+        names = copy.deepcopy(names)
			
 
				+        if metrics is None:
			
 
				+            metrics = mm.metrics.motchallenge_metrics
			
 
				+        metrics = copy.deepcopy(metrics)
			
 
				+
			
 
				+        mh = mm.metrics.create()
			
 
				+        summary = mh.compute_many(
			
 
				+            accs, metrics=metrics, names=names, generate_overall=True)
			
 
				+
			
 
				+        return summary
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def save_summary(summary, filename):
			
 
				+        import pandas as pd
			
 
				+        writer = pd.ExcelWriter(filename)
			
 
				+        summary.to_excel(writer)
			
 
				+        writer.save()
			
 
				+
			
 
				+
			
 
				+class MCMOTMetric(Metric):
			
 
				+    def __init__(self, num_classes, save_summary=False):
			
 
				+        self.num_classes = num_classes
			
 
				+        self.save_summary = save_summary
			
 
				+        self.MCMOTEvaluator = MCMOTEvaluator
			
 
				+        self.result_root = None
			
 
				+        self.reset()
			
 
				+
			
 
				+        self.seqs_overall = defaultdict(list)
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.accs = []
			
 
				+        self.seqs = []
			
 
				+
			
 
				+    def update(self, data_root, seq, data_type, result_root, result_filename):
			
 
				+        evaluator = self.MCMOTEvaluator(data_root, seq, data_type,
			
 
				+                                        self.num_classes)
			
 
				+        seq_acc = evaluator.eval_file(result_filename)
			
 
				+        self.accs.append(seq_acc)
			
 
				+        self.seqs.append(seq)
			
 
				+        self.result_root = result_root
			
 
				+
			
 
				+        cls_index_name = [
			
 
				+            '{}_{}'.format(seq, i) for i in range(self.num_classes)
			
 
				+        ]
			
 
				+        summary = parse_accs_metrics(seq_acc, cls_index_name)
			
 
				+        summary.rename(
			
 
				+            index={'OVERALL': '{}_OVERALL'.format(seq)}, inplace=True)
			
 
				+        for row in range(len(summary)):
			
 
				+            self.seqs_overall[row].append(summary.iloc[row:row + 1])
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        self.cls_summary_list = []
			
 
				+        for row in range(self.num_classes):
			
 
				+            seqs_cls_df = pd.concat(self.seqs_overall[row])
			
 
				+            seqs_cls_summary = seqs_overall_metrics(seqs_cls_df)
			
 
				+            cls_summary_overall = seqs_cls_summary.iloc[-1:].copy()
			
 
				+            cls_summary_overall.rename(
			
 
				+                index={'overall_calc': 'overall_calc_{}'.format(row)},
			
 
				+                inplace=True)
			
 
				+            self.cls_summary_list.append(cls_summary_overall)
			
 
				+
			
 
				+    def log(self):
			
 
				+        seqs_summary = seqs_overall_metrics(
			
 
				+            pd.concat(self.seqs_overall[self.num_classes]), verbose=True)
			
 
				+        class_summary = seqs_overall_metrics(
			
 
				+            pd.concat(self.cls_summary_list), verbose=True)
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return 1
			
--- a/paddlers/models/ppdet/metrics/metrics.py
+++ b/paddlers/models/ppdet/metrics/metrics.py
@@ -0,0 +1,434 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import json
			
 
				+import paddle
			
 
				+import numpy as np
			
 
				+import typing
			
 
				+
			
 
				+from .map_utils import prune_zero_padding, DetectionMAP
			
 
				+from .coco_utils import get_infer_results, cocoapi_eval
			
 
				+from .widerface_utils import face_eval_run
			
 
				+from paddlers.models.ppdet.data.source.category import get_categories
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = [
			
 
				+    'Metric', 'COCOMetric', 'VOCMetric', 'WiderFaceMetric',
			
 
				+    'get_infer_results', 'RBoxMetric', 'SNIPERCOCOMetric'
			
 
				+]
			
 
				+
			
 
				+COCO_SIGMAS = np.array([
			
 
				+    .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87,
			
 
				+    .87, .89, .89
			
 
				+]) / 10.0
			
 
				+CROWD_SIGMAS = np.array(
			
 
				+    [.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79,
			
 
				+     .79]) / 10.0
			
 
				+
			
 
				+
			
 
				+class Metric(paddle.metric.Metric):
			
 
				+    def name(self):
			
 
				+        return self.__class__.__name__
			
 
				+
			
 
				+    def reset(self):
			
 
				+        pass
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        pass
			
 
				+
			
 
				+    # paddle.metric.Metric defined :metch:`update`, :meth:`accumulate`
			
 
				+    # :metch:`reset`, in ppdet, we also need following 2 methods:
			
 
				+
			
 
				+    # abstract method for logging metric results
			
 
				+    def log(self):
			
 
				+        pass
			
 
				+
			
 
				+    # abstract method for getting metric results
			
 
				+    def get_results(self):
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+class COCOMetric(Metric):
			
 
				+    def __init__(self, anno_file, **kwargs):
			
 
				+        assert os.path.isfile(anno_file), \
			
 
				+                "anno_file {} not a file".format(anno_file)
			
 
				+        self.anno_file = anno_file
			
 
				+        self.clsid2catid = kwargs.get('clsid2catid', None)
			
 
				+        if self.clsid2catid is None:
			
 
				+            self.clsid2catid, _ = get_categories('COCO', anno_file)
			
 
				+        self.classwise = kwargs.get('classwise', False)
			
 
				+        self.output_eval = kwargs.get('output_eval', None)
			
 
				+        # TODO: bias should be unified
			
 
				+        self.bias = kwargs.get('bias', 0)
			
 
				+        self.save_prediction_only = kwargs.get('save_prediction_only', False)
			
 
				+        self.iou_type = kwargs.get('IouType', 'bbox')
			
 
				+        self.reset()
			
 
				+
			
 
				+    def reset(self):
			
 
				+        # only bbox and mask evaluation support currently
			
 
				+        self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []}
			
 
				+        self.eval_results = {}
			
 
				+
			
 
				+    def update(self, inputs, outputs):
			
 
				+        outs = {}
			
 
				+        # outputs Tensor -> numpy.ndarray
			
 
				+        for k, v in outputs.items():
			
 
				+            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
			
 
				+
			
 
				+        # multi-scale inputs: all inputs have same im_id
			
 
				+        if isinstance(inputs, typing.Sequence):
			
 
				+            im_id = inputs[0]['im_id']
			
 
				+        else:
			
 
				+            im_id = inputs['im_id']
			
 
				+        outs['im_id'] = im_id.numpy() if isinstance(im_id,
			
 
				+                                                    paddle.Tensor) else im_id
			
 
				+
			
 
				+        infer_results = get_infer_results(
			
 
				+            outs, self.clsid2catid, bias=self.bias)
			
 
				+        self.results['bbox'] += infer_results[
			
 
				+            'bbox'] if 'bbox' in infer_results else []
			
 
				+        self.results['mask'] += infer_results[
			
 
				+            'mask'] if 'mask' in infer_results else []
			
 
				+        self.results['segm'] += infer_results[
			
 
				+            'segm'] if 'segm' in infer_results else []
			
 
				+        self.results['keypoint'] += infer_results[
			
 
				+            'keypoint'] if 'keypoint' in infer_results else []
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        if len(self.results['bbox']) > 0:
			
 
				+            output = "bbox.json"
			
 
				+            if self.output_eval:
			
 
				+                output = os.path.join(self.output_eval, output)
			
 
				+            with open(output, 'w') as f:
			
 
				+                json.dump(self.results['bbox'], f)
			
 
				+                logger.info('The bbox result is saved to bbox.json.')
			
 
				+
			
 
				+            if self.save_prediction_only:
			
 
				+                logger.info('The bbox result is saved to {} and do not '
			
 
				+                            'evaluate the mAP.'.format(output))
			
 
				+            else:
			
 
				+                bbox_stats = cocoapi_eval(
			
 
				+                    output,
			
 
				+                    'bbox',
			
 
				+                    anno_file=self.anno_file,
			
 
				+                    classwise=self.classwise)
			
 
				+                self.eval_results['bbox'] = bbox_stats
			
 
				+                sys.stdout.flush()
			
 
				+
			
 
				+        if len(self.results['mask']) > 0:
			
 
				+            output = "mask.json"
			
 
				+            if self.output_eval:
			
 
				+                output = os.path.join(self.output_eval, output)
			
 
				+            with open(output, 'w') as f:
			
 
				+                json.dump(self.results['mask'], f)
			
 
				+                logger.info('The mask result is saved to mask.json.')
			
 
				+
			
 
				+            if self.save_prediction_only:
			
 
				+                logger.info('The mask result is saved to {} and do not '
			
 
				+                            'evaluate the mAP.'.format(output))
			
 
				+            else:
			
 
				+                seg_stats = cocoapi_eval(
			
 
				+                    output,
			
 
				+                    'segm',
			
 
				+                    anno_file=self.anno_file,
			
 
				+                    classwise=self.classwise)
			
 
				+                self.eval_results['mask'] = seg_stats
			
 
				+                sys.stdout.flush()
			
 
				+
			
 
				+        if len(self.results['segm']) > 0:
			
 
				+            output = "segm.json"
			
 
				+            if self.output_eval:
			
 
				+                output = os.path.join(self.output_eval, output)
			
 
				+            with open(output, 'w') as f:
			
 
				+                json.dump(self.results['segm'], f)
			
 
				+                logger.info('The segm result is saved to segm.json.')
			
 
				+
			
 
				+            if self.save_prediction_only:
			
 
				+                logger.info('The segm result is saved to {} and do not '
			
 
				+                            'evaluate the mAP.'.format(output))
			
 
				+            else:
			
 
				+                seg_stats = cocoapi_eval(
			
 
				+                    output,
			
 
				+                    'segm',
			
 
				+                    anno_file=self.anno_file,
			
 
				+                    classwise=self.classwise)
			
 
				+                self.eval_results['mask'] = seg_stats
			
 
				+                sys.stdout.flush()
			
 
				+
			
 
				+        if len(self.results['keypoint']) > 0:
			
 
				+            output = "keypoint.json"
			
 
				+            if self.output_eval:
			
 
				+                output = os.path.join(self.output_eval, output)
			
 
				+            with open(output, 'w') as f:
			
 
				+                json.dump(self.results['keypoint'], f)
			
 
				+                logger.info('The keypoint result is saved to keypoint.json.')
			
 
				+
			
 
				+            if self.save_prediction_only:
			
 
				+                logger.info('The keypoint result is saved to {} and do not '
			
 
				+                            'evaluate the mAP.'.format(output))
			
 
				+            else:
			
 
				+                style = 'keypoints'
			
 
				+                use_area = True
			
 
				+                sigmas = COCO_SIGMAS
			
 
				+                if self.iou_type == 'keypoints_crowd':
			
 
				+                    style = 'keypoints_crowd'
			
 
				+                    use_area = False
			
 
				+                    sigmas = CROWD_SIGMAS
			
 
				+                keypoint_stats = cocoapi_eval(
			
 
				+                    output,
			
 
				+                    style,
			
 
				+                    anno_file=self.anno_file,
			
 
				+                    classwise=self.classwise,
			
 
				+                    sigmas=sigmas,
			
 
				+                    use_area=use_area)
			
 
				+                self.eval_results['keypoint'] = keypoint_stats
			
 
				+                sys.stdout.flush()
			
 
				+
			
 
				+    def log(self):
			
 
				+        pass
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return self.eval_results
			
 
				+
			
 
				+
			
 
				+class VOCMetric(Metric):
			
 
				+    def __init__(self,
			
 
				+                 label_list,
			
 
				+                 class_num=20,
			
 
				+                 overlap_thresh=0.5,
			
 
				+                 map_type='11point',
			
 
				+                 is_bbox_normalized=False,
			
 
				+                 evaluate_difficult=False,
			
 
				+                 classwise=False):
			
 
				+        assert os.path.isfile(label_list), \
			
 
				+                "label_list {} not a file".format(label_list)
			
 
				+        self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
			
 
				+
			
 
				+        self.overlap_thresh = overlap_thresh
			
 
				+        self.map_type = map_type
			
 
				+        self.evaluate_difficult = evaluate_difficult
			
 
				+        self.detection_map = DetectionMAP(
			
 
				+            class_num=class_num,
			
 
				+            overlap_thresh=overlap_thresh,
			
 
				+            map_type=map_type,
			
 
				+            is_bbox_normalized=is_bbox_normalized,
			
 
				+            evaluate_difficult=evaluate_difficult,
			
 
				+            catid2name=self.catid2name,
			
 
				+            classwise=classwise)
			
 
				+
			
 
				+        self.reset()
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.detection_map.reset()
			
 
				+
			
 
				+    def update(self, inputs, outputs):
			
 
				+        bbox_np = outputs['bbox'].numpy()
			
 
				+        bboxes = bbox_np[:, 2:]
			
 
				+        scores = bbox_np[:, 1]
			
 
				+        labels = bbox_np[:, 0]
			
 
				+        bbox_lengths = outputs['bbox_num'].numpy()
			
 
				+
			
 
				+        if bboxes.shape == (1, 1) or bboxes is None:
			
 
				+            return
			
 
				+        gt_boxes = inputs['gt_bbox']
			
 
				+        gt_labels = inputs['gt_class']
			
 
				+        difficults = inputs['difficult'] if not self.evaluate_difficult \
			
 
				+                            else None
			
 
				+
			
 
				+        scale_factor = inputs['scale_factor'].numpy(
			
 
				+        ) if 'scale_factor' in inputs else np.ones(
			
 
				+            (gt_boxes.shape[0], 2)).astype('float32')
			
 
				+
			
 
				+        bbox_idx = 0
			
 
				+        for i in range(len(gt_boxes)):
			
 
				+            gt_box = gt_boxes[i].numpy()
			
 
				+            h, w = scale_factor[i]
			
 
				+            gt_box = gt_box / np.array([w, h, w, h])
			
 
				+            gt_label = gt_labels[i].numpy()
			
 
				+            difficult = None if difficults is None \
			
 
				+                            else difficults[i].numpy()
			
 
				+            bbox_num = bbox_lengths[i]
			
 
				+            bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
			
 
				+            score = scores[bbox_idx:bbox_idx + bbox_num]
			
 
				+            label = labels[bbox_idx:bbox_idx + bbox_num]
			
 
				+            gt_box, gt_label, difficult = prune_zero_padding(gt_box, gt_label,
			
 
				+                                                             difficult)
			
 
				+            self.detection_map.update(bbox, score, label, gt_box, gt_label,
			
 
				+                                      difficult)
			
 
				+            bbox_idx += bbox_num
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        logger.info("Accumulating evaluatation results...")
			
 
				+        self.detection_map.accumulate()
			
 
				+
			
 
				+    def log(self):
			
 
				+        map_stat = 100. * self.detection_map.get_map()
			
 
				+        logger.info("mAP({:.2f}, {}) = {:.2f}%".format(
			
 
				+            self.overlap_thresh, self.map_type, map_stat))
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return {'bbox': [self.detection_map.get_map()]}
			
 
				+
			
 
				+
			
 
				+class WiderFaceMetric(Metric):
			
 
				+    def __init__(self, image_dir, anno_file, multi_scale=True):
			
 
				+        self.image_dir = image_dir
			
 
				+        self.anno_file = anno_file
			
 
				+        self.multi_scale = multi_scale
			
 
				+        self.clsid2catid, self.catid2name = get_categories('widerface')
			
 
				+
			
 
				+    def update(self, model):
			
 
				+
			
 
				+        face_eval_run(
			
 
				+            model,
			
 
				+            self.image_dir,
			
 
				+            self.anno_file,
			
 
				+            pred_dir='output/pred',
			
 
				+            eval_mode='widerface',
			
 
				+            multi_scale=self.multi_scale)
			
 
				+
			
 
				+
			
 
				+class RBoxMetric(Metric):
			
 
				+    def __init__(self, anno_file, **kwargs):
			
 
				+        assert os.path.isfile(anno_file), \
			
 
				+                "anno_file {} not a file".format(anno_file)
			
 
				+        assert os.path.exists(anno_file), "anno_file {} not exists".format(
			
 
				+            anno_file)
			
 
				+        self.anno_file = anno_file
			
 
				+        self.gt_anno = json.load(open(self.anno_file))
			
 
				+        cats = self.gt_anno['categories']
			
 
				+        self.clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
			
 
				+        self.catid2clsid = {cat['id']: i for i, cat in enumerate(cats)}
			
 
				+        self.catid2name = {cat['id']: cat['name'] for cat in cats}
			
 
				+        self.classwise = kwargs.get('classwise', False)
			
 
				+        self.output_eval = kwargs.get('output_eval', None)
			
 
				+        # TODO: bias should be unified
			
 
				+        self.bias = kwargs.get('bias', 0)
			
 
				+        self.save_prediction_only = kwargs.get('save_prediction_only', False)
			
 
				+        self.iou_type = kwargs.get('IouType', 'bbox')
			
 
				+        self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
			
 
				+        self.map_type = kwargs.get('map_type', '11point')
			
 
				+        self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
			
 
				+        class_num = len(self.catid2name)
			
 
				+        self.detection_map = DetectionMAP(
			
 
				+            class_num=class_num,
			
 
				+            overlap_thresh=self.overlap_thresh,
			
 
				+            map_type=self.map_type,
			
 
				+            is_bbox_normalized=False,
			
 
				+            evaluate_difficult=self.evaluate_difficult,
			
 
				+            catid2name=self.catid2name,
			
 
				+            classwise=self.classwise)
			
 
				+
			
 
				+        self.reset()
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.result_bbox = []
			
 
				+        self.detection_map.reset()
			
 
				+
			
 
				+    def update(self, inputs, outputs):
			
 
				+        outs = {}
			
 
				+        # outputs Tensor -> numpy.ndarray
			
 
				+        for k, v in outputs.items():
			
 
				+            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
			
 
				+
			
 
				+        im_id = inputs['im_id']
			
 
				+        outs['im_id'] = im_id.numpy() if isinstance(im_id,
			
 
				+                                                    paddle.Tensor) else im_id
			
 
				+
			
 
				+        infer_results = get_infer_results(
			
 
				+            outs, self.clsid2catid, bias=self.bias)
			
 
				+        self.result_bbox += infer_results[
			
 
				+            'bbox'] if 'bbox' in infer_results else []
			
 
				+        bbox = [b['bbox'] for b in self.result_bbox]
			
 
				+        score = [b['score'] for b in self.result_bbox]
			
 
				+        label = [b['category_id'] for b in self.result_bbox]
			
 
				+        label = [self.catid2clsid[e] for e in label]
			
 
				+        gt_box = [
			
 
				+            e['bbox'] for e in self.gt_anno['annotations']
			
 
				+            if e['image_id'] == outs['im_id']
			
 
				+        ]
			
 
				+        gt_label = [
			
 
				+            e['category_id'] for e in self.gt_anno['annotations']
			
 
				+            if e['image_id'] == outs['im_id']
			
 
				+        ]
			
 
				+        gt_label = [self.catid2clsid[e] for e in gt_label]
			
 
				+        self.detection_map.update(bbox, score, label, gt_box, gt_label)
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        if len(self.result_bbox) > 0:
			
 
				+            output = "bbox.json"
			
 
				+            if self.output_eval:
			
 
				+                output = os.path.join(self.output_eval, output)
			
 
				+            with open(output, 'w') as f:
			
 
				+                json.dump(self.result_bbox, f)
			
 
				+                logger.info('The bbox result is saved to bbox.json.')
			
 
				+
			
 
				+            if self.save_prediction_only:
			
 
				+                logger.info('The bbox result is saved to {} and do not '
			
 
				+                            'evaluate the mAP.'.format(output))
			
 
				+            else:
			
 
				+                logger.info("Accumulating evaluatation results...")
			
 
				+                self.detection_map.accumulate()
			
 
				+
			
 
				+    def log(self):
			
 
				+        map_stat = 100. * self.detection_map.get_map()
			
 
				+        logger.info("mAP({:.2f}, {}) = {:.2f}%".format(
			
 
				+            self.overlap_thresh, self.map_type, map_stat))
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return {'bbox': [self.detection_map.get_map()]}
			
 
				+
			
 
				+
			
 
				+class SNIPERCOCOMetric(COCOMetric):
			
 
				+    def __init__(self, anno_file, **kwargs):
			
 
				+        super(SNIPERCOCOMetric, self).__init__(anno_file, **kwargs)
			
 
				+        self.dataset = kwargs["dataset"]
			
 
				+        self.chip_results = []
			
 
				+
			
 
				+    def reset(self):
			
 
				+        # only bbox and mask evaluation support currently
			
 
				+        self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []}
			
 
				+        self.eval_results = {}
			
 
				+        self.chip_results = []
			
 
				+
			
 
				+    def update(self, inputs, outputs):
			
 
				+        outs = {}
			
 
				+        # outputs Tensor -> numpy.ndarray
			
 
				+        for k, v in outputs.items():
			
 
				+            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
			
 
				+
			
 
				+        im_id = inputs['im_id']
			
 
				+        outs['im_id'] = im_id.numpy() if isinstance(im_id,
			
 
				+                                                    paddle.Tensor) else im_id
			
 
				+
			
 
				+        self.chip_results.append(outs)
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        results = self.dataset.anno_cropper.aggregate_chips_detections(
			
 
				+            self.chip_results)
			
 
				+        for outs in results:
			
 
				+            infer_results = get_infer_results(
			
 
				+                outs, self.clsid2catid, bias=self.bias)
			
 
				+            self.results['bbox'] += infer_results[
			
 
				+                'bbox'] if 'bbox' in infer_results else []
			
 
				+
			
 
				+        super(SNIPERCOCOMetric, self).accumulate()
			
--- a/paddlers/models/ppdet/metrics/mot_metrics.py
+++ b/paddlers/models/ppdet/metrics/mot_metrics.py
@@ -0,0 +1,1236 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import copy
			
 
				+import sys
			
 
				+import math
			
 
				+from collections import defaultdict
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+import paddle.nn.functional as F
			
 
				+from paddlers.models.ppdet.modeling.bbox_utils import bbox_iou_np_expand
			
 
				+from .map_utils import ap_per_class
			
 
				+from .metrics import Metric
			
 
				+from .munkres import Munkres
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = ['MOTEvaluator', 'MOTMetric', 'JDEDetMetric', 'KITTIMOTMetric']
			
 
				+
			
 
				+
			
 
				+def read_mot_results(filename, is_gt=False, is_ignore=False):
			
 
				+    valid_labels = {1}
			
 
				+    ignore_labels = {2, 7, 8, 12}  # only in motchallenge datasets like 'MOT16'
			
 
				+    results_dict = dict()
			
 
				+    if os.path.isfile(filename):
			
 
				+        with open(filename, 'r') as f:
			
 
				+            for line in f.readlines():
			
 
				+                linelist = line.split(',')
			
 
				+                if len(linelist) < 7:
			
 
				+                    continue
			
 
				+                fid = int(linelist[0])
			
 
				+                if fid < 1:
			
 
				+                    continue
			
 
				+                results_dict.setdefault(fid, list())
			
 
				+
			
 
				+                box_size = float(linelist[4]) * float(linelist[5])
			
 
				+
			
 
				+                if is_gt:
			
 
				+                    label = int(float(linelist[7]))
			
 
				+                    mark = int(float(linelist[6]))
			
 
				+                    if mark == 0 or label not in valid_labels:
			
 
				+                        continue
			
 
				+                    score = 1
			
 
				+                elif is_ignore:
			
 
				+                    if 'MOT16-' in filename or 'MOT17-' in filename or 'MOT15-' in filename or 'MOT20-' in filename:
			
 
				+                        label = int(float(linelist[7]))
			
 
				+                        vis_ratio = float(linelist[8])
			
 
				+                        if label not in ignore_labels and vis_ratio >= 0:
			
 
				+                            continue
			
 
				+                    else:
			
 
				+                        continue
			
 
				+                    score = 1
			
 
				+                else:
			
 
				+                    score = float(linelist[6])
			
 
				+
			
 
				+                tlwh = tuple(map(float, linelist[2:6]))
			
 
				+                target_id = int(linelist[1])
			
 
				+
			
 
				+                results_dict[fid].append((tlwh, target_id, score))
			
 
				+    return results_dict
			
 
				+
			
 
				+
			
 
				+"""
			
 
				+MOT dataset label list, see in https://motchallenge.net
			
 
				+labels={'ped', ...			    % 1
			
 
				+        'person_on_vhcl', ...	% 2
			
 
				+        'car', ...				% 3
			
 
				+        'bicycle', ...			% 4
			
 
				+        'mbike', ...			% 5
			
 
				+        'non_mot_vhcl', ...		% 6
			
 
				+        'static_person', ...	% 7
			
 
				+        'distractor', ...		% 8
			
 
				+        'occluder', ...			% 9
			
 
				+        'occluder_on_grnd', ...	% 10
			
 
				+        'occluder_full', ...	% 11
			
 
				+        'reflection', ...		% 12
			
 
				+        'crowd' ...			    % 13
			
 
				+};
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+def unzip_objs(objs):
			
 
				+    if len(objs) > 0:
			
 
				+        tlwhs, ids, scores = zip(*objs)
			
 
				+    else:
			
 
				+        tlwhs, ids, scores = [], [], []
			
 
				+    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
			
 
				+    return tlwhs, ids, scores
			
 
				+
			
 
				+
			
 
				+class MOTEvaluator(object):
			
 
				+    def __init__(self, data_root, seq_name, data_type):
			
 
				+        self.data_root = data_root
			
 
				+        self.seq_name = seq_name
			
 
				+        self.data_type = data_type
			
 
				+
			
 
				+        self.load_annotations()
			
 
				+        self.reset_accumulator()
			
 
				+
			
 
				+    def load_annotations(self):
			
 
				+        assert self.data_type == 'mot'
			
 
				+        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
			
 
				+                                   'gt.txt')
			
 
				+        self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
			
 
				+        self.gt_ignore_frame_dict = read_mot_results(
			
 
				+            gt_filename, is_ignore=True)
			
 
				+
			
 
				+    def reset_accumulator(self):
			
 
				+        import motmetrics as mm
			
 
				+        mm.lap.default_solver = 'lap'
			
 
				+        self.acc = mm.MOTAccumulator(auto_id=True)
			
 
				+
			
 
				+    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
			
 
				+        import motmetrics as mm
			
 
				+        mm.lap.default_solver = 'lap'
			
 
				+        # results
			
 
				+        trk_tlwhs = np.copy(trk_tlwhs)
			
 
				+        trk_ids = np.copy(trk_ids)
			
 
				+
			
 
				+        # gts
			
 
				+        gt_objs = self.gt_frame_dict.get(frame_id, [])
			
 
				+        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
			
 
				+
			
 
				+        # ignore boxes
			
 
				+        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
			
 
				+        ignore_tlwhs = unzip_objs(ignore_objs)[0]
			
 
				+
			
 
				+        # remove ignored results
			
 
				+        keep = np.ones(len(trk_tlwhs), dtype=bool)
			
 
				+        iou_distance = mm.distances.iou_matrix(
			
 
				+            ignore_tlwhs, trk_tlwhs, max_iou=0.5)
			
 
				+        if len(iou_distance) > 0:
			
 
				+            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
			
 
				+            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
			
 
				+            match_ious = iou_distance[match_is, match_js]
			
 
				+
			
 
				+            match_js = np.asarray(match_js, dtype=int)
			
 
				+            match_js = match_js[np.logical_not(np.isnan(match_ious))]
			
 
				+            keep[match_js] = False
			
 
				+            trk_tlwhs = trk_tlwhs[keep]
			
 
				+            trk_ids = trk_ids[keep]
			
 
				+
			
 
				+        # get distance matrix
			
 
				+        iou_distance = mm.distances.iou_matrix(
			
 
				+            gt_tlwhs, trk_tlwhs, max_iou=0.5)
			
 
				+
			
 
				+        # acc
			
 
				+        self.acc.update(gt_ids, trk_ids, iou_distance)
			
 
				+
			
 
				+        if rtn_events and iou_distance.size > 0 and hasattr(self.acc,
			
 
				+                                                            'last_mot_events'):
			
 
				+            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
			
 
				+        else:
			
 
				+            events = None
			
 
				+        return events
			
 
				+
			
 
				+    def eval_file(self, filename):
			
 
				+        self.reset_accumulator()
			
 
				+
			
 
				+        result_frame_dict = read_mot_results(filename, is_gt=False)
			
 
				+        frames = sorted(list(set(result_frame_dict.keys())))
			
 
				+        for frame_id in frames:
			
 
				+            trk_objs = result_frame_dict.get(frame_id, [])
			
 
				+            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
			
 
				+            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
			
 
				+
			
 
				+        return self.acc
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def get_summary(accs,
			
 
				+                    names,
			
 
				+                    metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
			
 
				+                             'precision', 'recall')):
			
 
				+        import motmetrics as mm
			
 
				+        mm.lap.default_solver = 'lap'
			
 
				+        names = copy.deepcopy(names)
			
 
				+        if metrics is None:
			
 
				+            metrics = mm.metrics.motchallenge_metrics
			
 
				+        metrics = copy.deepcopy(metrics)
			
 
				+
			
 
				+        mh = mm.metrics.create()
			
 
				+        summary = mh.compute_many(
			
 
				+            accs, metrics=metrics, names=names, generate_overall=True)
			
 
				+        return summary
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def save_summary(summary, filename):
			
 
				+        import pandas as pd
			
 
				+        writer = pd.ExcelWriter(filename)
			
 
				+        summary.to_excel(writer)
			
 
				+        writer.save()
			
 
				+
			
 
				+
			
 
				+class MOTMetric(Metric):
			
 
				+    def __init__(self, save_summary=False):
			
 
				+        self.save_summary = save_summary
			
 
				+        self.MOTEvaluator = MOTEvaluator
			
 
				+        self.result_root = None
			
 
				+        self.reset()
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.accs = []
			
 
				+        self.seqs = []
			
 
				+
			
 
				+    def update(self, data_root, seq, data_type, result_root, result_filename):
			
 
				+        evaluator = self.MOTEvaluator(data_root, seq, data_type)
			
 
				+        self.accs.append(evaluator.eval_file(result_filename))
			
 
				+        self.seqs.append(seq)
			
 
				+        self.result_root = result_root
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        import motmetrics as mm
			
 
				+        import openpyxl
			
 
				+        metrics = mm.metrics.motchallenge_metrics
			
 
				+        mh = mm.metrics.create()
			
 
				+        summary = self.MOTEvaluator.get_summary(self.accs, self.seqs, metrics)
			
 
				+        self.strsummary = mm.io.render_summary(
			
 
				+            summary,
			
 
				+            formatters=mh.formatters,
			
 
				+            namemap=mm.io.motchallenge_metric_names)
			
 
				+        if self.save_summary:
			
 
				+            self.MOTEvaluator.save_summary(
			
 
				+                summary, os.path.join(self.result_root, 'summary.xlsx'))
			
 
				+
			
 
				+    def log(self):
			
 
				+        print(self.strsummary)
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return self.strsummary
			
 
				+
			
 
				+
			
 
				+class JDEDetMetric(Metric):
			
 
				+    # Note this detection AP metric is different from COCOMetric or VOCMetric,
			
 
				+    # and the bboxes coordinates are not scaled to the original image
			
 
				+    def __init__(self, overlap_thresh=0.5):
			
 
				+        self.overlap_thresh = overlap_thresh
			
 
				+        self.reset()
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.AP_accum = np.zeros(1)
			
 
				+        self.AP_accum_count = np.zeros(1)
			
 
				+
			
 
				+    def update(self, inputs, outputs):
			
 
				+        bboxes = outputs['bbox'][:, 2:].numpy()
			
 
				+        scores = outputs['bbox'][:, 1].numpy()
			
 
				+        labels = outputs['bbox'][:, 0].numpy()
			
 
				+        bbox_lengths = outputs['bbox_num'].numpy()
			
 
				+        if bboxes.shape[0] == 1 and bboxes.sum() == 0.0:
			
 
				+            return
			
 
				+
			
 
				+        gt_boxes = inputs['gt_bbox'].numpy()[0]
			
 
				+        gt_labels = inputs['gt_class'].numpy()[0]
			
 
				+        if gt_labels.shape[0] == 0:
			
 
				+            return
			
 
				+
			
 
				+        correct = []
			
 
				+        detected = []
			
 
				+        for i in range(bboxes.shape[0]):
			
 
				+            obj_pred = 0
			
 
				+            pred_bbox = bboxes[i].reshape(1, 4)
			
 
				+            # Compute iou with target boxes
			
 
				+            iou = bbox_iou_np_expand(pred_bbox, gt_boxes, x1y1x2y2=True)[0]
			
 
				+            # Extract index of largest overlap
			
 
				+            best_i = np.argmax(iou)
			
 
				+            # If overlap exceeds threshold and classification is correct mark as correct
			
 
				+            if iou[best_i] > self.overlap_thresh and obj_pred == gt_labels[
			
 
				+                    best_i] and best_i not in detected:
			
 
				+                correct.append(1)
			
 
				+                detected.append(best_i)
			
 
				+            else:
			
 
				+                correct.append(0)
			
 
				+
			
 
				+        # Compute Average Precision (AP) per class
			
 
				+        target_cls = list(gt_labels.T[0])
			
 
				+        AP, AP_class, R, P = ap_per_class(
			
 
				+            tp=correct,
			
 
				+            conf=scores,
			
 
				+            pred_cls=np.zeros_like(scores),
			
 
				+            target_cls=target_cls)
			
 
				+        self.AP_accum_count += np.bincount(AP_class, minlength=1)
			
 
				+        self.AP_accum += np.bincount(AP_class, minlength=1, weights=AP)
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        logger.info("Accumulating evaluatation results...")
			
 
				+        self.map_stat = self.AP_accum[0] / (self.AP_accum_count[0] + 1E-16)
			
 
				+
			
 
				+    def log(self):
			
 
				+        map_stat = 100. * self.map_stat
			
 
				+        logger.info("mAP({:.2f}) = {:.2f}%".format(self.overlap_thresh,
			
 
				+                                                   map_stat))
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return self.map_stat
			
 
				+
			
 
				+
			
 
				+"""
			
 
				+Following code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/evaluate_tracking.py
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+class tData:
			
 
				+    """
			
 
				+        Utility class to load data.
			
 
				+    """
			
 
				+    def __init__(self,frame=-1,obj_type="unset",truncation=-1,occlusion=-1,\
			
 
				+                 obs_angle=-10,x1=-1,y1=-1,x2=-1,y2=-1,w=-1,h=-1,l=-1,\
			
 
				+                 X=-1000,Y=-1000,Z=-1000,yaw=-10,score=-1000,track_id=-1):
			
 
				+        """
			
 
				+            Constructor, initializes the object given the parameters.
			
 
				+        """
			
 
				+        self.frame = frame
			
 
				+        self.track_id = track_id
			
 
				+        self.obj_type = obj_type
			
 
				+        self.truncation = truncation
			
 
				+        self.occlusion = occlusion
			
 
				+        self.obs_angle = obs_angle
			
 
				+        self.x1 = x1
			
 
				+        self.y1 = y1
			
 
				+        self.x2 = x2
			
 
				+        self.y2 = y2
			
 
				+        self.w = w
			
 
				+        self.h = h
			
 
				+        self.l = l
			
 
				+        self.X = X
			
 
				+        self.Y = Y
			
 
				+        self.Z = Z
			
 
				+        self.yaw = yaw
			
 
				+        self.score = score
			
 
				+        self.ignored = False
			
 
				+        self.valid = False
			
 
				+        self.tracker = -1
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        attrs = vars(self)
			
 
				+        return '\n'.join("%s: %s" % item for item in attrs.items())
			
 
				+
			
 
				+
			
 
				+class KITTIEvaluation(object):
			
 
				+    """ KITTI tracking statistics (CLEAR MOT, id-switches, fragments, ML/PT/MT, precision/recall)
			
 
				+             MOTA	- Multi-object tracking accuracy in [0,100]
			
 
				+             MOTP	- Multi-object tracking precision in [0,100] (3D) / [td,100] (2D)
			
 
				+             MOTAL	- Multi-object tracking accuracy in [0,100] with log10(id-switches)
			
 
				+
			
 
				+             id-switches - number of id switches
			
 
				+             fragments   - number of fragmentations
			
 
				+
			
 
				+             MT, PT, ML	- number of mostly tracked, partially tracked and mostly lost trajectories
			
 
				+
			
 
				+             recall	        - recall = percentage of detected targets
			
 
				+             precision	    - precision = percentage of correctly detected targets
			
 
				+             FAR		    - number of false alarms per frame
			
 
				+             falsepositives - number of false positives (FP)
			
 
				+             missed         - number of missed targets (FN)
			
 
				+    """
			
 
				+    def __init__(self, result_path, gt_path, min_overlap=0.5, max_truncation = 0,\
			
 
				+                min_height = 25, max_occlusion = 2, cls="car",\
			
 
				+                n_frames=[], seqs=[], n_sequences=0):
			
 
				+        # get number of sequences and
			
 
				+        # get number of frames per sequence from test mapping
			
 
				+        # (created while extracting the benchmark)
			
 
				+        self.gt_path = os.path.join(gt_path, "../labels")
			
 
				+        self.n_frames = n_frames
			
 
				+        self.sequence_name = seqs
			
 
				+        self.n_sequences = n_sequences
			
 
				+
			
 
				+        self.cls = cls  # class to evaluate, i.e. pedestrian or car
			
 
				+
			
 
				+        self.result_path = result_path
			
 
				+
			
 
				+        # statistics and numbers for evaluation
			
 
				+        self.n_gt = 0  # number of ground truth detections minus ignored false negatives and true positives
			
 
				+        self.n_igt = 0  # number of ignored ground truth detections
			
 
				+        self.n_gts = [
			
 
				+        ]  # number of ground truth detections minus ignored false negatives and true positives PER SEQUENCE
			
 
				+        self.n_igts = [
			
 
				+        ]  # number of ground ignored truth detections PER SEQUENCE
			
 
				+        self.n_gt_trajectories = 0
			
 
				+        self.n_gt_seq = []
			
 
				+        self.n_tr = 0  # number of tracker detections minus ignored tracker detections
			
 
				+        self.n_trs = [
			
 
				+        ]  # number of tracker detections minus ignored tracker detections PER SEQUENCE
			
 
				+        self.n_itr = 0  # number of ignored tracker detections
			
 
				+        self.n_itrs = []  # number of ignored tracker detections PER SEQUENCE
			
 
				+        self.n_igttr = 0  # number of ignored ground truth detections where the corresponding associated tracker detection is also ignored
			
 
				+        self.n_tr_trajectories = 0
			
 
				+        self.n_tr_seq = []
			
 
				+        self.MOTA = 0
			
 
				+        self.MOTP = 0
			
 
				+        self.MOTAL = 0
			
 
				+        self.MODA = 0
			
 
				+        self.MODP = 0
			
 
				+        self.MODP_t = []
			
 
				+        self.recall = 0
			
 
				+        self.precision = 0
			
 
				+        self.F1 = 0
			
 
				+        self.FAR = 0
			
 
				+        self.total_cost = 0
			
 
				+        self.itp = 0  # number of ignored true positives
			
 
				+        self.itps = []  # number of ignored true positives PER SEQUENCE
			
 
				+        self.tp = 0  # number of true positives including ignored true positives!
			
 
				+        self.tps = [
			
 
				+        ]  # number of true positives including ignored true positives PER SEQUENCE
			
 
				+        self.fn = 0  # number of false negatives WITHOUT ignored false negatives
			
 
				+        self.fns = [
			
 
				+        ]  # number of false negatives WITHOUT ignored false negatives PER SEQUENCE
			
 
				+        self.ifn = 0  # number of ignored false negatives
			
 
				+        self.ifns = []  # number of ignored false negatives PER SEQUENCE
			
 
				+        self.fp = 0  # number of false positives
			
 
				+        # a bit tricky, the number of ignored false negatives and ignored true positives
			
 
				+        # is subtracted, but if both tracker detection and ground truth detection
			
 
				+        # are ignored this number is added again to avoid double counting
			
 
				+        self.fps = []  # above PER SEQUENCE
			
 
				+        self.mme = 0
			
 
				+        self.fragments = 0
			
 
				+        self.id_switches = 0
			
 
				+        self.MT = 0
			
 
				+        self.PT = 0
			
 
				+        self.ML = 0
			
 
				+
			
 
				+        self.min_overlap = min_overlap  # minimum bounding box overlap for 3rd party metrics
			
 
				+        self.max_truncation = max_truncation  # maximum truncation of an object for evaluation
			
 
				+        self.max_occlusion = max_occlusion  # maximum occlusion of an object for evaluation
			
 
				+        self.min_height = min_height  # minimum height of an object for evaluation
			
 
				+        self.n_sample_points = 500
			
 
				+
			
 
				+        # this should be enough to hold all groundtruth trajectories
			
 
				+        # is expanded if necessary and reduced in any case
			
 
				+        self.gt_trajectories = [[] for x in range(self.n_sequences)]
			
 
				+        self.ign_trajectories = [[] for x in range(self.n_sequences)]
			
 
				+
			
 
				+    def loadGroundtruth(self):
			
 
				+        try:
			
 
				+            self._loadData(
			
 
				+                self.gt_path, cls=self.cls, loading_groundtruth=True)
			
 
				+        except IOError:
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
 
				+    def loadTracker(self):
			
 
				+        try:
			
 
				+            if not self._loadData(
			
 
				+                    self.result_path, cls=self.cls, loading_groundtruth=False):
			
 
				+                return False
			
 
				+        except IOError:
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
 
				+    def _loadData(self,
			
 
				+                  root_dir,
			
 
				+                  cls,
			
 
				+                  min_score=-1000,
			
 
				+                  loading_groundtruth=False):
			
 
				+        """
			
 
				+            Generic loader for ground truth and tracking data.
			
 
				+            Use loadGroundtruth() or loadTracker() to load this data.
			
 
				+            Loads detections in KITTI format from textfiles.
			
 
				+        """
			
 
				+        # construct objectDetections object to hold detection data
			
 
				+        t_data = tData()
			
 
				+        data = []
			
 
				+        eval_2d = True
			
 
				+        eval_3d = True
			
 
				+
			
 
				+        seq_data = []
			
 
				+        n_trajectories = 0
			
 
				+        n_trajectories_seq = []
			
 
				+        for seq, s_name in enumerate(self.sequence_name):
			
 
				+            i = 0
			
 
				+            filename = os.path.join(root_dir, "%s.txt" % s_name)
			
 
				+            f = open(filename, "r")
			
 
				+
			
 
				+            f_data = [
			
 
				+                [] for x in range(self.n_frames[seq])
			
 
				+            ]  # current set has only 1059 entries, sufficient length is checked anyway
			
 
				+            ids = []
			
 
				+            n_in_seq = 0
			
 
				+            id_frame_cache = []
			
 
				+            for line in f:
			
 
				+                # KITTI tracking benchmark data format:
			
 
				+                # (frame,tracklet_id,objectType,truncation,occlusion,alpha,x1,y1,x2,y2,h,w,l,X,Y,Z,ry)
			
 
				+                line = line.strip()
			
 
				+                fields = line.split(" ")
			
 
				+                # classes that should be loaded (ignored neighboring classes)
			
 
				+                if "car" in cls.lower():
			
 
				+                    classes = ["car", "van"]
			
 
				+                elif "pedestrian" in cls.lower():
			
 
				+                    classes = ["pedestrian", "person_sitting"]
			
 
				+                else:
			
 
				+                    classes = [cls.lower()]
			
 
				+                classes += ["dontcare"]
			
 
				+                if not any([s for s in classes if s in fields[2].lower()]):
			
 
				+                    continue
			
 
				+                # get fields from table
			
 
				+                t_data.frame = int(float(fields[0]))  # frame
			
 
				+                t_data.track_id = int(float(fields[1]))  # id
			
 
				+                t_data.obj_type = fields[
			
 
				+                    2].lower()  # object type [car, pedestrian, cyclist, ...]
			
 
				+                t_data.truncation = int(
			
 
				+                    float(fields[3]))  # truncation [-1,0,1,2]
			
 
				+                t_data.occlusion = int(
			
 
				+                    float(fields[4]))  # occlusion  [-1,0,1,2]
			
 
				+                t_data.obs_angle = float(fields[5])  # observation angle [rad]
			
 
				+                t_data.x1 = float(fields[6])  # left   [px]
			
 
				+                t_data.y1 = float(fields[7])  # top    [px]
			
 
				+                t_data.x2 = float(fields[8])  # right  [px]
			
 
				+                t_data.y2 = float(fields[9])  # bottom [px]
			
 
				+                t_data.h = float(fields[10])  # height [m]
			
 
				+                t_data.w = float(fields[11])  # width  [m]
			
 
				+                t_data.l = float(fields[12])  # length [m]
			
 
				+                t_data.X = float(fields[13])  # X [m]
			
 
				+                t_data.Y = float(fields[14])  # Y [m]
			
 
				+                t_data.Z = float(fields[15])  # Z [m]
			
 
				+                t_data.yaw = float(fields[16])  # yaw angle [rad]
			
 
				+                if not loading_groundtruth:
			
 
				+                    if len(fields) == 17:
			
 
				+                        t_data.score = -1
			
 
				+                    elif len(fields) == 18:
			
 
				+                        t_data.score = float(fields[17])  # detection score
			
 
				+                    else:
			
 
				+                        logger.info("file is not in KITTI format")
			
 
				+                        return
			
 
				+
			
 
				+                # do not consider objects marked as invalid
			
 
				+                if t_data.track_id is -1 and t_data.obj_type != "dontcare":
			
 
				+                    continue
			
 
				+
			
 
				+                idx = t_data.frame
			
 
				+                # check if length for frame data is sufficient
			
 
				+                if idx >= len(f_data):
			
 
				+                    print("extend f_data", idx, len(f_data))
			
 
				+                    f_data += [[] for x in range(max(500, idx - len(f_data)))]
			
 
				+                try:
			
 
				+                    id_frame = (t_data.frame, t_data.track_id)
			
 
				+                    if id_frame in id_frame_cache and not loading_groundtruth:
			
 
				+                        logger.info(
			
 
				+                            "track ids are not unique for sequence %d: frame %d"
			
 
				+                            % (seq, t_data.frame))
			
 
				+                        logger.info(
			
 
				+                            "track id %d occured at least twice for this frame"
			
 
				+                            % t_data.track_id)
			
 
				+                        logger.info("Exiting...")
			
 
				+                        #continue # this allows to evaluate non-unique result files
			
 
				+                        return False
			
 
				+                    id_frame_cache.append(id_frame)
			
 
				+                    f_data[t_data.frame].append(copy.copy(t_data))
			
 
				+                except:
			
 
				+                    print(len(f_data), idx)
			
 
				+                    raise
			
 
				+
			
 
				+                if t_data.track_id not in ids and t_data.obj_type != "dontcare":
			
 
				+                    ids.append(t_data.track_id)
			
 
				+                    n_trajectories += 1
			
 
				+                    n_in_seq += 1
			
 
				+
			
 
				+                # check if uploaded data provides information for 2D and 3D evaluation
			
 
				+                if not loading_groundtruth and eval_2d is True and (
			
 
				+                        t_data.x1 == -1 or t_data.x2 == -1 or
			
 
				+                        t_data.y1 == -1 or t_data.y2 == -1):
			
 
				+                    eval_2d = False
			
 
				+                if not loading_groundtruth and eval_3d is True and (
			
 
				+                        t_data.X == -1000 or t_data.Y == -1000 or
			
 
				+                        t_data.Z == -1000):
			
 
				+                    eval_3d = False
			
 
				+
			
 
				+            # only add existing frames
			
 
				+            n_trajectories_seq.append(n_in_seq)
			
 
				+            seq_data.append(f_data)
			
 
				+            f.close()
			
 
				+
			
 
				+        if not loading_groundtruth:
			
 
				+            self.tracker = seq_data
			
 
				+            self.n_tr_trajectories = n_trajectories
			
 
				+            self.eval_2d = eval_2d
			
 
				+            self.eval_3d = eval_3d
			
 
				+            self.n_tr_seq = n_trajectories_seq
			
 
				+            if self.n_tr_trajectories == 0:
			
 
				+                return False
			
 
				+        else:
			
 
				+            # split ground truth and DontCare areas
			
 
				+            self.dcareas = []
			
 
				+            self.groundtruth = []
			
 
				+            for seq_idx in range(len(seq_data)):
			
 
				+                seq_gt = seq_data[seq_idx]
			
 
				+                s_g, s_dc = [], []
			
 
				+                for f in range(len(seq_gt)):
			
 
				+                    all_gt = seq_gt[f]
			
 
				+                    g, dc = [], []
			
 
				+                    for gg in all_gt:
			
 
				+                        if gg.obj_type == "dontcare":
			
 
				+                            dc.append(gg)
			
 
				+                        else:
			
 
				+                            g.append(gg)
			
 
				+                    s_g.append(g)
			
 
				+                    s_dc.append(dc)
			
 
				+                self.dcareas.append(s_dc)
			
 
				+                self.groundtruth.append(s_g)
			
 
				+            self.n_gt_seq = n_trajectories_seq
			
 
				+            self.n_gt_trajectories = n_trajectories
			
 
				+        return True
			
 
				+
			
 
				+    def boxoverlap(self, a, b, criterion="union"):
			
 
				+        """
			
 
				+            boxoverlap computes intersection over union for bbox a and b in KITTI format.
			
 
				+            If the criterion is 'union', overlap = (a inter b) / a union b).
			
 
				+            If the criterion is 'a', overlap = (a inter b) / a, where b should be a dontcare area.
			
 
				+        """
			
 
				+        x1 = max(a.x1, b.x1)
			
 
				+        y1 = max(a.y1, b.y1)
			
 
				+        x2 = min(a.x2, b.x2)
			
 
				+        y2 = min(a.y2, b.y2)
			
 
				+
			
 
				+        w = x2 - x1
			
 
				+        h = y2 - y1
			
 
				+
			
 
				+        if w <= 0. or h <= 0.:
			
 
				+            return 0.
			
 
				+        inter = w * h
			
 
				+        aarea = (a.x2 - a.x1) * (a.y2 - a.y1)
			
 
				+        barea = (b.x2 - b.x1) * (b.y2 - b.y1)
			
 
				+        # intersection over union overlap
			
 
				+        if criterion.lower() == "union":
			
 
				+            o = inter / float(aarea + barea - inter)
			
 
				+        elif criterion.lower() == "a":
			
 
				+            o = float(inter) / float(aarea)
			
 
				+        else:
			
 
				+            raise TypeError("Unkown type for criterion")
			
 
				+        return o
			
 
				+
			
 
				+    def compute3rdPartyMetrics(self):
			
 
				+        """
			
 
				+            Computes the metrics defined in
			
 
				+                - Stiefelhagen 2008: Evaluating Multiple Object Tracking Performance: The CLEAR MOT Metrics
			
 
				+                  MOTA, MOTAL, MOTP
			
 
				+                - Nevatia 2008: Global Data Association for Multi-Object Tracking Using Network Flows
			
 
				+                  MT/PT/ML
			
 
				+        """
			
 
				+        # construct Munkres object for Hungarian Method association
			
 
				+        hm = Munkres()
			
 
				+        max_cost = 1e9
			
 
				+
			
 
				+        # go through all frames and associate ground truth and tracker results
			
 
				+        # groundtruth and tracker contain lists for every single frame containing lists of KITTI format detections
			
 
				+        fr, ids = 0, 0
			
 
				+        for seq_idx in range(len(self.groundtruth)):
			
 
				+            seq_gt = self.groundtruth[seq_idx]
			
 
				+            seq_dc = self.dcareas[seq_idx]  # don't care areas
			
 
				+            seq_tracker = self.tracker[seq_idx]
			
 
				+            seq_trajectories = defaultdict(list)
			
 
				+            seq_ignored = defaultdict(list)
			
 
				+
			
 
				+            # statistics over the current sequence, check the corresponding
			
 
				+            # variable comments in __init__ to get their meaning
			
 
				+            seqtp = 0
			
 
				+            seqitp = 0
			
 
				+            seqfn = 0
			
 
				+            seqifn = 0
			
 
				+            seqfp = 0
			
 
				+            seqigt = 0
			
 
				+            seqitr = 0
			
 
				+
			
 
				+            last_ids = [[], []]
			
 
				+            n_gts = 0
			
 
				+            n_trs = 0
			
 
				+
			
 
				+            for f in range(len(seq_gt)):
			
 
				+                g = seq_gt[f]
			
 
				+                dc = seq_dc[f]
			
 
				+
			
 
				+                t = seq_tracker[f]
			
 
				+                # counting total number of ground truth and tracker objects
			
 
				+                self.n_gt += len(g)
			
 
				+                self.n_tr += len(t)
			
 
				+
			
 
				+                n_gts += len(g)
			
 
				+                n_trs += len(t)
			
 
				+
			
 
				+                # use hungarian method to associate, using boxoverlap 0..1 as cost
			
 
				+                # build cost matrix
			
 
				+                cost_matrix = []
			
 
				+                this_ids = [[], []]
			
 
				+                for gg in g:
			
 
				+                    # save current ids
			
 
				+                    this_ids[0].append(gg.track_id)
			
 
				+                    this_ids[1].append(-1)
			
 
				+                    gg.tracker = -1
			
 
				+                    gg.id_switch = 0
			
 
				+                    gg.fragmentation = 0
			
 
				+                    cost_row = []
			
 
				+                    for tt in t:
			
 
				+                        # overlap == 1 is cost ==0
			
 
				+                        c = 1 - self.boxoverlap(gg, tt)
			
 
				+                        # gating for boxoverlap
			
 
				+                        if c <= self.min_overlap:
			
 
				+                            cost_row.append(c)
			
 
				+                        else:
			
 
				+                            cost_row.append(max_cost)  # = 1e9
			
 
				+                    cost_matrix.append(cost_row)
			
 
				+                    # all ground truth trajectories are initially not associated
			
 
				+                    # extend groundtruth trajectories lists (merge lists)
			
 
				+                    seq_trajectories[gg.track_id].append(-1)
			
 
				+                    seq_ignored[gg.track_id].append(False)
			
 
				+
			
 
				+                if len(g) is 0:
			
 
				+                    cost_matrix = [[]]
			
 
				+                # associate
			
 
				+                association_matrix = hm.compute(cost_matrix)
			
 
				+
			
 
				+                # tmp variables for sanity checks and MODP computation
			
 
				+                tmptp = 0
			
 
				+                tmpfp = 0
			
 
				+                tmpfn = 0
			
 
				+                tmpc = 0  # this will sum up the overlaps for all true positives
			
 
				+                tmpcs = [0] * len(
			
 
				+                    g)  # this will save the overlaps for all true positives
			
 
				+                # the reason is that some true positives might be ignored
			
 
				+                # later such that the corrsponding overlaps can
			
 
				+                # be subtracted from tmpc for MODP computation
			
 
				+
			
 
				+                # mapping for tracker ids and ground truth ids
			
 
				+                for row, col in association_matrix:
			
 
				+                    # apply gating on boxoverlap
			
 
				+                    c = cost_matrix[row][col]
			
 
				+                    if c < max_cost:
			
 
				+                        g[row].tracker = t[col].track_id
			
 
				+                        this_ids[1][row] = t[col].track_id
			
 
				+                        t[col].valid = True
			
 
				+                        g[row].distance = c
			
 
				+                        self.total_cost += 1 - c
			
 
				+                        tmpc += 1 - c
			
 
				+                        tmpcs[row] = 1 - c
			
 
				+                        seq_trajectories[g[row].track_id][-1] = t[col].track_id
			
 
				+
			
 
				+                        # true positives are only valid associations
			
 
				+                        self.tp += 1
			
 
				+                        tmptp += 1
			
 
				+                    else:
			
 
				+                        g[row].tracker = -1
			
 
				+                        self.fn += 1
			
 
				+                        tmpfn += 1
			
 
				+
			
 
				+                # associate tracker and DontCare areas
			
 
				+                # ignore tracker in neighboring classes
			
 
				+                nignoredtracker = 0  # number of ignored tracker detections
			
 
				+                ignoredtrackers = dict()  # will associate the track_id with -1
			
 
				+                # if it is not ignored and 1 if it is
			
 
				+                # ignored;
			
 
				+                # this is used to avoid double counting ignored
			
 
				+                # cases, see the next loop
			
 
				+
			
 
				+                for tt in t:
			
 
				+                    ignoredtrackers[tt.track_id] = -1
			
 
				+                    # ignore detection if it belongs to a neighboring class or is
			
 
				+                    # smaller or equal to the minimum height
			
 
				+
			
 
				+                    tt_height = abs(tt.y1 - tt.y2)
			
 
				+                    if ((self.cls == "car" and tt.obj_type == "van") or
			
 
				+                        (self.cls == "pedestrian" and
			
 
				+                         tt.obj_type == "person_sitting") or
			
 
				+                            tt_height <= self.min_height) and not tt.valid:
			
 
				+                        nignoredtracker += 1
			
 
				+                        tt.ignored = True
			
 
				+                        ignoredtrackers[tt.track_id] = 1
			
 
				+                        continue
			
 
				+                    for d in dc:
			
 
				+                        overlap = self.boxoverlap(tt, d, "a")
			
 
				+                        if overlap > 0.5 and not tt.valid:
			
 
				+                            tt.ignored = True
			
 
				+                            nignoredtracker += 1
			
 
				+                            ignoredtrackers[tt.track_id] = 1
			
 
				+                            break
			
 
				+
			
 
				+                # check for ignored FN/TP (truncation or neighboring object class)
			
 
				+                ignoredfn = 0  # the number of ignored false negatives
			
 
				+                nignoredtp = 0  # the number of ignored true positives
			
 
				+                nignoredpairs = 0  # the number of ignored pairs, i.e. a true positive
			
 
				+                # which is ignored but where the associated tracker
			
 
				+                # detection has already been ignored
			
 
				+
			
 
				+                gi = 0
			
 
				+                for gg in g:
			
 
				+                    if gg.tracker < 0:
			
 
				+                        if gg.occlusion>self.max_occlusion or gg.truncation>self.max_truncation\
			
 
				+                                or (self.cls=="car" and gg.obj_type=="van") or (self.cls=="pedestrian" and gg.obj_type=="person_sitting"):
			
 
				+                            seq_ignored[gg.track_id][-1] = True
			
 
				+                            gg.ignored = True
			
 
				+                            ignoredfn += 1
			
 
				+
			
 
				+                    elif gg.tracker >= 0:
			
 
				+                        if gg.occlusion>self.max_occlusion or gg.truncation>self.max_truncation\
			
 
				+                                or (self.cls=="car" and gg.obj_type=="van") or (self.cls=="pedestrian" and gg.obj_type=="person_sitting"):
			
 
				+
			
 
				+                            seq_ignored[gg.track_id][-1] = True
			
 
				+                            gg.ignored = True
			
 
				+                            nignoredtp += 1
			
 
				+
			
 
				+                            # if the associated tracker detection is already ignored,
			
 
				+                            # we want to avoid double counting ignored detections
			
 
				+                            if ignoredtrackers[gg.tracker] > 0:
			
 
				+                                nignoredpairs += 1
			
 
				+
			
 
				+                            # for computing MODP, the overlaps from ignored detections
			
 
				+                            # are subtracted
			
 
				+                            tmpc -= tmpcs[gi]
			
 
				+                    gi += 1
			
 
				+
			
 
				+                # the below might be confusion, check the comments in __init__
			
 
				+                # to see what the individual statistics represent
			
 
				+
			
 
				+                # correct TP by number of ignored TP due to truncation
			
 
				+                # ignored TP are shown as tracked in visualization
			
 
				+                tmptp -= nignoredtp
			
 
				+
			
 
				+                # count the number of ignored true positives
			
 
				+                self.itp += nignoredtp
			
 
				+
			
 
				+                # adjust the number of ground truth objects considered
			
 
				+                self.n_gt -= (ignoredfn + nignoredtp)
			
 
				+
			
 
				+                # count the number of ignored ground truth objects
			
 
				+                self.n_igt += ignoredfn + nignoredtp
			
 
				+
			
 
				+                # count the number of ignored tracker objects
			
 
				+                self.n_itr += nignoredtracker
			
 
				+
			
 
				+                # count the number of ignored pairs, i.e. associated tracker and
			
 
				+                # ground truth objects that are both ignored
			
 
				+                self.n_igttr += nignoredpairs
			
 
				+
			
 
				+                # false negatives = associated gt bboxes exceding association threshold + non-associated gt bboxes
			
 
				+                tmpfn += len(g) - len(association_matrix) - ignoredfn
			
 
				+                self.fn += len(g) - len(association_matrix) - ignoredfn
			
 
				+                self.ifn += ignoredfn
			
 
				+
			
 
				+                # false positives = tracker bboxes - associated tracker bboxes
			
 
				+                # mismatches (mme_t)
			
 
				+                tmpfp += len(
			
 
				+                    t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs
			
 
				+                self.fp += len(
			
 
				+                    t) - tmptp - nignoredtracker - nignoredtp + nignoredpairs
			
 
				+
			
 
				+                # update sequence data
			
 
				+                seqtp += tmptp
			
 
				+                seqitp += nignoredtp
			
 
				+                seqfp += tmpfp
			
 
				+                seqfn += tmpfn
			
 
				+                seqifn += ignoredfn
			
 
				+                seqigt += ignoredfn + nignoredtp
			
 
				+                seqitr += nignoredtracker
			
 
				+
			
 
				+                # sanity checks
			
 
				+                # - the number of true positives minues ignored true positives
			
 
				+                #   should be greater or equal to 0
			
 
				+                # - the number of false negatives should be greater or equal to 0
			
 
				+                # - the number of false positives needs to be greater or equal to 0
			
 
				+                #   otherwise ignored detections might be counted double
			
 
				+                # - the number of counted true positives (plus ignored ones)
			
 
				+                #   and the number of counted false negatives (plus ignored ones)
			
 
				+                #   should match the total number of ground truth objects
			
 
				+                # - the number of counted true positives (plus ignored ones)
			
 
				+                #   and the number of counted false positives
			
 
				+                #   plus the number of ignored tracker detections should
			
 
				+                #   match the total number of tracker detections; note that
			
 
				+                #   nignoredpairs is subtracted here to avoid double counting
			
 
				+                #   of ignored detection sin nignoredtp and nignoredtracker
			
 
				+                if tmptp < 0:
			
 
				+                    print(tmptp, nignoredtp)
			
 
				+                    raise NameError("Something went wrong! TP is negative")
			
 
				+                if tmpfn < 0:
			
 
				+                    print(tmpfn,
			
 
				+                          len(g),
			
 
				+                          len(association_matrix), ignoredfn, nignoredpairs)
			
 
				+                    raise NameError("Something went wrong! FN is negative")
			
 
				+                if tmpfp < 0:
			
 
				+                    print(tmpfp,
			
 
				+                          len(t), tmptp, nignoredtracker, nignoredtp,
			
 
				+                          nignoredpairs)
			
 
				+                    raise NameError("Something went wrong! FP is negative")
			
 
				+                if tmptp + tmpfn is not len(g) - ignoredfn - nignoredtp:
			
 
				+                    print("seqidx", seq_idx)
			
 
				+                    print("frame ", f)
			
 
				+                    print("TP    ", tmptp)
			
 
				+                    print("FN    ", tmpfn)
			
 
				+                    print("FP    ", tmpfp)
			
 
				+                    print("nGT   ", len(g))
			
 
				+                    print("nAss  ", len(association_matrix))
			
 
				+                    print("ign GT", ignoredfn)
			
 
				+                    print("ign TP", nignoredtp)
			
 
				+                    raise NameError(
			
 
				+                        "Something went wrong! nGroundtruth is not TP+FN")
			
 
				+                if tmptp + tmpfp + nignoredtp + nignoredtracker - nignoredpairs is not len(
			
 
				+                        t):
			
 
				+                    print(seq_idx, f, len(t), tmptp, tmpfp)
			
 
				+                    print(len(association_matrix), association_matrix)
			
 
				+                    raise NameError(
			
 
				+                        "Something went wrong! nTracker is not TP+FP")
			
 
				+
			
 
				+                # check for id switches or fragmentations
			
 
				+                for i, tt in enumerate(this_ids[0]):
			
 
				+                    if tt in last_ids[0]:
			
 
				+                        idx = last_ids[0].index(tt)
			
 
				+                        tid = this_ids[1][i]
			
 
				+                        lid = last_ids[1][idx]
			
 
				+                        if tid != lid and lid != -1 and tid != -1:
			
 
				+                            if g[i].truncation < self.max_truncation:
			
 
				+                                g[i].id_switch = 1
			
 
				+                                ids += 1
			
 
				+                        if tid != lid and lid != -1:
			
 
				+                            if g[i].truncation < self.max_truncation:
			
 
				+                                g[i].fragmentation = 1
			
 
				+                                fr += 1
			
 
				+
			
 
				+                # save current index
			
 
				+                last_ids = this_ids
			
 
				+                # compute MOTP_t
			
 
				+                MODP_t = 1
			
 
				+                if tmptp != 0:
			
 
				+                    MODP_t = tmpc / float(tmptp)
			
 
				+                self.MODP_t.append(MODP_t)
			
 
				+
			
 
				+            # remove empty lists for current gt trajectories
			
 
				+            self.gt_trajectories[seq_idx] = seq_trajectories
			
 
				+            self.ign_trajectories[seq_idx] = seq_ignored
			
 
				+
			
 
				+            # gather statistics for "per sequence" statistics.
			
 
				+            self.n_gts.append(n_gts)
			
 
				+            self.n_trs.append(n_trs)
			
 
				+            self.tps.append(seqtp)
			
 
				+            self.itps.append(seqitp)
			
 
				+            self.fps.append(seqfp)
			
 
				+            self.fns.append(seqfn)
			
 
				+            self.ifns.append(seqifn)
			
 
				+            self.n_igts.append(seqigt)
			
 
				+            self.n_itrs.append(seqitr)
			
 
				+
			
 
				+        # compute MT/PT/ML, fragments, idswitches for all groundtruth trajectories
			
 
				+        n_ignored_tr_total = 0
			
 
				+        for seq_idx, (
			
 
				+                seq_trajectories, seq_ignored
			
 
				+        ) in enumerate(zip(self.gt_trajectories, self.ign_trajectories)):
			
 
				+            if len(seq_trajectories) == 0:
			
 
				+                continue
			
 
				+            tmpMT, tmpML, tmpPT, tmpId_switches, tmpFragments = [0] * 5
			
 
				+            n_ignored_tr = 0
			
 
				+            for g, ign_g in zip(seq_trajectories.values(),
			
 
				+                                seq_ignored.values()):
			
 
				+                # all frames of this gt trajectory are ignored
			
 
				+                if all(ign_g):
			
 
				+                    n_ignored_tr += 1
			
 
				+                    n_ignored_tr_total += 1
			
 
				+                    continue
			
 
				+                # all frames of this gt trajectory are not assigned to any detections
			
 
				+                if all([this == -1 for this in g]):
			
 
				+                    tmpML += 1
			
 
				+                    self.ML += 1
			
 
				+                    continue
			
 
				+                # compute tracked frames in trajectory
			
 
				+                last_id = g[0]
			
 
				+                # first detection (necessary to be in gt_trajectories) is always tracked
			
 
				+                tracked = 1 if g[0] >= 0 else 0
			
 
				+                lgt = 0 if ign_g[0] else 1
			
 
				+                for f in range(1, len(g)):
			
 
				+                    if ign_g[f]:
			
 
				+                        last_id = -1
			
 
				+                        continue
			
 
				+                    lgt += 1
			
 
				+                    if last_id != g[f] and last_id != -1 and g[f] != -1 and g[
			
 
				+                            f - 1] != -1:
			
 
				+                        tmpId_switches += 1
			
 
				+                        self.id_switches += 1
			
 
				+                    if f < len(g) - 1 and g[f - 1] != g[
			
 
				+                            f] and last_id != -1 and g[f] != -1 and g[f +
			
 
				+                                                                      1] != -1:
			
 
				+                        tmpFragments += 1
			
 
				+                        self.fragments += 1
			
 
				+                    if g[f] != -1:
			
 
				+                        tracked += 1
			
 
				+                        last_id = g[f]
			
 
				+                # handle last frame; tracked state is handled in for loop (g[f]!=-1)
			
 
				+                if len(g) > 1 and g[f - 1] != g[f] and last_id != -1 and g[
			
 
				+                        f] != -1 and not ign_g[f]:
			
 
				+                    tmpFragments += 1
			
 
				+                    self.fragments += 1
			
 
				+
			
 
				+                # compute MT/PT/ML
			
 
				+                tracking_ratio = tracked / float(len(g) - sum(ign_g))
			
 
				+                if tracking_ratio > 0.8:
			
 
				+                    tmpMT += 1
			
 
				+                    self.MT += 1
			
 
				+                elif tracking_ratio < 0.2:
			
 
				+                    tmpML += 1
			
 
				+                    self.ML += 1
			
 
				+                else:  # 0.2 <= tracking_ratio <= 0.8
			
 
				+                    tmpPT += 1
			
 
				+                    self.PT += 1
			
 
				+
			
 
				+        if (self.n_gt_trajectories - n_ignored_tr_total) == 0:
			
 
				+            self.MT = 0.
			
 
				+            self.PT = 0.
			
 
				+            self.ML = 0.
			
 
				+        else:
			
 
				+            self.MT /= float(self.n_gt_trajectories - n_ignored_tr_total)
			
 
				+            self.PT /= float(self.n_gt_trajectories - n_ignored_tr_total)
			
 
				+            self.ML /= float(self.n_gt_trajectories - n_ignored_tr_total)
			
 
				+
			
 
				+        # precision/recall etc.
			
 
				+        if (self.fp + self.tp) == 0 or (self.tp + self.fn) == 0:
			
 
				+            self.recall = 0.
			
 
				+            self.precision = 0.
			
 
				+        else:
			
 
				+            self.recall = self.tp / float(self.tp + self.fn)
			
 
				+            self.precision = self.tp / float(self.fp + self.tp)
			
 
				+        if (self.recall + self.precision) == 0:
			
 
				+            self.F1 = 0.
			
 
				+        else:
			
 
				+            self.F1 = 2. * (self.precision * self.recall) / (
			
 
				+                self.precision + self.recall)
			
 
				+        if sum(self.n_frames) == 0:
			
 
				+            self.FAR = "n/a"
			
 
				+        else:
			
 
				+            self.FAR = self.fp / float(sum(self.n_frames))
			
 
				+
			
 
				+        # compute CLEARMOT
			
 
				+        if self.n_gt == 0:
			
 
				+            self.MOTA = -float("inf")
			
 
				+            self.MODA = -float("inf")
			
 
				+        else:
			
 
				+            self.MOTA = 1 - (self.fn + self.fp + self.id_switches
			
 
				+                             ) / float(self.n_gt)
			
 
				+            self.MODA = 1 - (self.fn + self.fp) / float(self.n_gt)
			
 
				+        if self.tp == 0:
			
 
				+            self.MOTP = float("inf")
			
 
				+        else:
			
 
				+            self.MOTP = self.total_cost / float(self.tp)
			
 
				+        if self.n_gt != 0:
			
 
				+            if self.id_switches == 0:
			
 
				+                self.MOTAL = 1 - (self.fn + self.fp + self.id_switches
			
 
				+                                  ) / float(self.n_gt)
			
 
				+            else:
			
 
				+                self.MOTAL = 1 - (self.fn + self.fp +
			
 
				+                                  math.log10(self.id_switches)
			
 
				+                                  ) / float(self.n_gt)
			
 
				+        else:
			
 
				+            self.MOTAL = -float("inf")
			
 
				+        if sum(self.n_frames) == 0:
			
 
				+            self.MODP = "n/a"
			
 
				+        else:
			
 
				+            self.MODP = sum(self.MODP_t) / float(sum(self.n_frames))
			
 
				+        return True
			
 
				+
			
 
				+    def createSummary(self):
			
 
				+        summary = ""
			
 
				+        summary += "tracking evaluation summary".center(80, "=") + "\n"
			
 
				+        summary += self.printEntry("Multiple Object Tracking Accuracy (MOTA)",
			
 
				+                                   self.MOTA) + "\n"
			
 
				+        summary += self.printEntry("Multiple Object Tracking Precision (MOTP)",
			
 
				+                                   self.MOTP) + "\n"
			
 
				+        summary += self.printEntry("Multiple Object Tracking Accuracy (MOTAL)",
			
 
				+                                   self.MOTAL) + "\n"
			
 
				+        summary += self.printEntry("Multiple Object Detection Accuracy (MODA)",
			
 
				+                                   self.MODA) + "\n"
			
 
				+        summary += self.printEntry(
			
 
				+            "Multiple Object Detection Precision (MODP)", self.MODP) + "\n"
			
 
				+        summary += "\n"
			
 
				+        summary += self.printEntry("Recall", self.recall) + "\n"
			
 
				+        summary += self.printEntry("Precision", self.precision) + "\n"
			
 
				+        summary += self.printEntry("F1", self.F1) + "\n"
			
 
				+        summary += self.printEntry("False Alarm Rate", self.FAR) + "\n"
			
 
				+        summary += "\n"
			
 
				+        summary += self.printEntry("Mostly Tracked", self.MT) + "\n"
			
 
				+        summary += self.printEntry("Partly Tracked", self.PT) + "\n"
			
 
				+        summary += self.printEntry("Mostly Lost", self.ML) + "\n"
			
 
				+        summary += "\n"
			
 
				+        summary += self.printEntry("True Positives", self.tp) + "\n"
			
 
				+        #summary += self.printEntry("True Positives per Sequence", self.tps) + "\n"
			
 
				+        summary += self.printEntry("Ignored True Positives", self.itp) + "\n"
			
 
				+        #summary += self.printEntry("Ignored True Positives per Sequence", self.itps) + "\n"
			
 
				+
			
 
				+        summary += self.printEntry("False Positives", self.fp) + "\n"
			
 
				+        #summary += self.printEntry("False Positives per Sequence", self.fps) + "\n"
			
 
				+        summary += self.printEntry("False Negatives", self.fn) + "\n"
			
 
				+        #summary += self.printEntry("False Negatives per Sequence", self.fns) + "\n"
			
 
				+        summary += self.printEntry("ID-switches", self.id_switches) + "\n"
			
 
				+        self.fp = self.fp / self.n_gt
			
 
				+        self.fn = self.fn / self.n_gt
			
 
				+        self.id_switches = self.id_switches / self.n_gt
			
 
				+        summary += self.printEntry("False Positives Ratio", self.fp) + "\n"
			
 
				+        #summary += self.printEntry("False Positives per Sequence", self.fps) + "\n"
			
 
				+        summary += self.printEntry("False Negatives Ratio", self.fn) + "\n"
			
 
				+        #summary += self.printEntry("False Negatives per Sequence", self.fns) + "\n"
			
 
				+        summary += self.printEntry("Ignored False Negatives Ratio",
			
 
				+                                   self.ifn) + "\n"
			
 
				+
			
 
				+        #summary += self.printEntry("Ignored False Negatives per Sequence", self.ifns) + "\n"
			
 
				+        summary += self.printEntry("Missed Targets", self.fn) + "\n"
			
 
				+        summary += self.printEntry("ID-switches", self.id_switches) + "\n"
			
 
				+        summary += self.printEntry("Fragmentations", self.fragments) + "\n"
			
 
				+        summary += "\n"
			
 
				+        summary += self.printEntry("Ground Truth Objects (Total)", self.n_gt +
			
 
				+                                   self.n_igt) + "\n"
			
 
				+        #summary += self.printEntry("Ground Truth Objects (Total) per Sequence", self.n_gts) + "\n"
			
 
				+        summary += self.printEntry("Ignored Ground Truth Objects",
			
 
				+                                   self.n_igt) + "\n"
			
 
				+        #summary += self.printEntry("Ignored Ground Truth Objects per Sequence", self.n_igts) + "\n"
			
 
				+        summary += self.printEntry("Ground Truth Trajectories",
			
 
				+                                   self.n_gt_trajectories) + "\n"
			
 
				+        summary += "\n"
			
 
				+        summary += self.printEntry("Tracker Objects (Total)", self.n_tr) + "\n"
			
 
				+        #summary += self.printEntry("Tracker Objects (Total) per Sequence", self.n_trs) + "\n"
			
 
				+        summary += self.printEntry("Ignored Tracker Objects",
			
 
				+                                   self.n_itr) + "\n"
			
 
				+        #summary += self.printEntry("Ignored Tracker Objects per Sequence", self.n_itrs) + "\n"
			
 
				+        summary += self.printEntry("Tracker Trajectories",
			
 
				+                                   self.n_tr_trajectories) + "\n"
			
 
				+        #summary += "\n"
			
 
				+        #summary += self.printEntry("Ignored Tracker Objects with Associated Ignored Ground Truth Objects", self.n_igttr) + "\n"
			
 
				+        summary += "=" * 80
			
 
				+        return summary
			
 
				+
			
 
				+    def printEntry(self, key, val, width=(70, 10)):
			
 
				+        """
			
 
				+            Pretty print an entry in a table fashion.
			
 
				+        """
			
 
				+        s_out = key.ljust(width[0])
			
 
				+        if type(val) == int:
			
 
				+            s = "%%%dd" % width[1]
			
 
				+            s_out += s % val
			
 
				+        elif type(val) == float:
			
 
				+            s = "%%%df" % (width[1])
			
 
				+            s_out += s % val
			
 
				+        else:
			
 
				+            s_out += ("%s" % val).rjust(width[1])
			
 
				+        return s_out
			
 
				+
			
 
				+    def saveToStats(self, save_summary):
			
 
				+        """
			
 
				+            Save the statistics in a whitespace separate file.
			
 
				+        """
			
 
				+        summary = self.createSummary()
			
 
				+        if save_summary:
			
 
				+            filename = os.path.join(self.result_path,
			
 
				+                                    "summary_%s.txt" % self.cls)
			
 
				+            dump = open(filename, "w+")
			
 
				+            dump.write(summary)
			
 
				+            dump.close()
			
 
				+        return summary
			
 
				+
			
 
				+
			
 
				+class KITTIMOTMetric(Metric):
			
 
				+    def __init__(self, save_summary=True):
			
 
				+        self.save_summary = save_summary
			
 
				+        self.MOTEvaluator = KITTIEvaluation
			
 
				+        self.result_root = None
			
 
				+        self.reset()
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.seqs = []
			
 
				+        self.n_sequences = 0
			
 
				+        self.n_frames = []
			
 
				+        self.strsummary = ''
			
 
				+
			
 
				+    def update(self, data_root, seq, data_type, result_root, result_filename):
			
 
				+        assert data_type == 'kitti', "data_type should 'kitti'"
			
 
				+        self.result_root = result_root
			
 
				+        self.gt_path = data_root
			
 
				+        gt_path = '{}/../labels/{}.txt'.format(data_root, seq)
			
 
				+        gt = open(gt_path, "r")
			
 
				+        max_frame = 0
			
 
				+        for line in gt:
			
 
				+            line = line.strip()
			
 
				+            line_list = line.split(" ")
			
 
				+            if int(line_list[0]) > max_frame:
			
 
				+                max_frame = int(line_list[0])
			
 
				+        rs = open(result_filename, "r")
			
 
				+        for line in rs:
			
 
				+            line = line.strip()
			
 
				+            line_list = line.split(" ")
			
 
				+            if int(line_list[0]) > max_frame:
			
 
				+                max_frame = int(line_list[0])
			
 
				+        gt.close()
			
 
				+        rs.close()
			
 
				+        self.n_frames.append(max_frame + 1)
			
 
				+        self.seqs.append(seq)
			
 
				+        self.n_sequences += 1
			
 
				+
			
 
				+    def accumulate(self):
			
 
				+        logger.info("Processing Result for KITTI Tracking Benchmark")
			
 
				+        e = self.MOTEvaluator(result_path=self.result_root, gt_path=self.gt_path,\
			
 
				+            n_frames=self.n_frames, seqs=self.seqs, n_sequences=self.n_sequences)
			
 
				+        try:
			
 
				+            if not e.loadTracker():
			
 
				+                return
			
 
				+            logger.info("Loading Results - Success")
			
 
				+            logger.info("Evaluate Object Class: %s" % c.upper())
			
 
				+        except:
			
 
				+            logger.info("Caught exception while loading result data.")
			
 
				+        if not e.loadGroundtruth():
			
 
				+            raise ValueError("Ground truth not found.")
			
 
				+        logger.info("Loading Groundtruth - Success")
			
 
				+        # sanity checks
			
 
				+        if len(e.groundtruth) is not len(e.tracker):
			
 
				+            logger.info(
			
 
				+                "The uploaded data does not provide results for every sequence."
			
 
				+            )
			
 
				+            return False
			
 
				+        logger.info("Loaded %d Sequences." % len(e.groundtruth))
			
 
				+        logger.info("Start Evaluation...")
			
 
				+
			
 
				+        if e.compute3rdPartyMetrics():
			
 
				+            self.strsummary = e.saveToStats(self.save_summary)
			
 
				+        else:
			
 
				+            logger.info(
			
 
				+                "There seem to be no true positives or false positives at all in the submitted data."
			
 
				+            )
			
 
				+
			
 
				+    def log(self):
			
 
				+        print(self.strsummary)
			
 
				+
			
 
				+    def get_results(self):
			
 
				+        return self.strsummary
			
--- a/paddlers/models/ppdet/metrics/munkres.py
+++ b/paddlers/models/ppdet/metrics/munkres.py
@@ -0,0 +1,428 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+"""
			
 
				+This code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/munkres.py
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+
			
 
				+__all__ = ['Munkres', 'make_cost_matrix']
			
 
				+
			
 
				+
			
 
				+class Munkres:
			
 
				+    """
			
 
				+    Calculate the Munkres solution to the classical assignment problem.
			
 
				+    See the module documentation for usage.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        """Create a new instance"""
			
 
				+        self.C = None
			
 
				+        self.row_covered = []
			
 
				+        self.col_covered = []
			
 
				+        self.n = 0
			
 
				+        self.Z0_r = 0
			
 
				+        self.Z0_c = 0
			
 
				+        self.marked = None
			
 
				+        self.path = None
			
 
				+
			
 
				+    def make_cost_matrix(profit_matrix, inversion_function):
			
 
				+        """
			
 
				+        **DEPRECATED**
			
 
				+
			
 
				+        Please use the module function ``make_cost_matrix()``.
			
 
				+        """
			
 
				+        import munkres
			
 
				+        return munkres.make_cost_matrix(profit_matrix, inversion_function)
			
 
				+
			
 
				+    make_cost_matrix = staticmethod(make_cost_matrix)
			
 
				+
			
 
				+    def pad_matrix(self, matrix, pad_value=0):
			
 
				+        """
			
 
				+        Pad a possibly non-square matrix to make it square.
			
 
				+
			
 
				+        :Parameters:
			
 
				+            matrix : list of lists
			
 
				+                matrix to pad
			
 
				+
			
 
				+            pad_value : int
			
 
				+                value to use to pad the matrix
			
 
				+
			
 
				+        :rtype: list of lists
			
 
				+        :return: a new, possibly padded, matrix
			
 
				+        """
			
 
				+        max_columns = 0
			
 
				+        total_rows = len(matrix)
			
 
				+
			
 
				+        for row in matrix:
			
 
				+            max_columns = max(max_columns, len(row))
			
 
				+
			
 
				+        total_rows = max(max_columns, total_rows)
			
 
				+
			
 
				+        new_matrix = []
			
 
				+        for row in matrix:
			
 
				+            row_len = len(row)
			
 
				+            new_row = row[:]
			
 
				+            if total_rows > row_len:
			
 
				+                # Row too short. Pad it.
			
 
				+                new_row += [0] * (total_rows - row_len)
			
 
				+            new_matrix += [new_row]
			
 
				+
			
 
				+        while len(new_matrix) < total_rows:
			
 
				+            new_matrix += [[0] * total_rows]
			
 
				+
			
 
				+        return new_matrix
			
 
				+
			
 
				+    def compute(self, cost_matrix):
			
 
				+        """
			
 
				+        Compute the indexes for the lowest-cost pairings between rows and
			
 
				+        columns in the database. Returns a list of (row, column) tuples
			
 
				+        that can be used to traverse the matrix.
			
 
				+
			
 
				+        :Parameters:
			
 
				+            cost_matrix : list of lists
			
 
				+                The cost matrix. If this cost matrix is not square, it
			
 
				+                will be padded with zeros, via a call to ``pad_matrix()``.
			
 
				+                (This method does *not* modify the caller's matrix. It
			
 
				+                operates on a copy of the matrix.)
			
 
				+
			
 
				+                **WARNING**: This code handles square and rectangular
			
 
				+                matrices. It does *not* handle irregular matrices.
			
 
				+
			
 
				+        :rtype: list
			
 
				+        :return: A list of ``(row, column)`` tuples that describe the lowest
			
 
				+                 cost path through the matrix
			
 
				+
			
 
				+        """
			
 
				+        self.C = self.pad_matrix(cost_matrix)
			
 
				+        self.n = len(self.C)
			
 
				+        self.original_length = len(cost_matrix)
			
 
				+        self.original_width = len(cost_matrix[0])
			
 
				+        self.row_covered = [False for i in range(self.n)]
			
 
				+        self.col_covered = [False for i in range(self.n)]
			
 
				+        self.Z0_r = 0
			
 
				+        self.Z0_c = 0
			
 
				+        self.path = self.__make_matrix(self.n * 2, 0)
			
 
				+        self.marked = self.__make_matrix(self.n, 0)
			
 
				+
			
 
				+        done = False
			
 
				+        step = 1
			
 
				+
			
 
				+        steps = {
			
 
				+            1: self.__step1,
			
 
				+            2: self.__step2,
			
 
				+            3: self.__step3,
			
 
				+            4: self.__step4,
			
 
				+            5: self.__step5,
			
 
				+            6: self.__step6
			
 
				+        }
			
 
				+
			
 
				+        while not done:
			
 
				+            try:
			
 
				+                func = steps[step]
			
 
				+                step = func()
			
 
				+            except KeyError:
			
 
				+                done = True
			
 
				+
			
 
				+        # Look for the starred columns
			
 
				+        results = []
			
 
				+        for i in range(self.original_length):
			
 
				+            for j in range(self.original_width):
			
 
				+                if self.marked[i][j] == 1:
			
 
				+                    results += [(i, j)]
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    def __copy_matrix(self, matrix):
			
 
				+        """Return an exact copy of the supplied matrix"""
			
 
				+        return copy.deepcopy(matrix)
			
 
				+
			
 
				+    def __make_matrix(self, n, val):
			
 
				+        """Create an *n*x*n* matrix, populating it with the specific value."""
			
 
				+        matrix = []
			
 
				+        for i in range(n):
			
 
				+            matrix += [[val for j in range(n)]]
			
 
				+        return matrix
			
 
				+
			
 
				+    def __step1(self):
			
 
				+        """
			
 
				+        For each row of the matrix, find the smallest element and
			
 
				+        subtract it from every element in its row. Go to Step 2.
			
 
				+        """
			
 
				+        C = self.C
			
 
				+        n = self.n
			
 
				+        for i in range(n):
			
 
				+            minval = min(self.C[i])
			
 
				+            # Find the minimum value for this row and subtract that minimum
			
 
				+            # from every element in the row.
			
 
				+            for j in range(n):
			
 
				+                self.C[i][j] -= minval
			
 
				+
			
 
				+        return 2
			
 
				+
			
 
				+    def __step2(self):
			
 
				+        """
			
 
				+        Find a zero (Z) in the resulting matrix. If there is no starred
			
 
				+        zero in its row or column, star Z. Repeat for each element in the
			
 
				+        matrix. Go to Step 3.
			
 
				+        """
			
 
				+        n = self.n
			
 
				+        for i in range(n):
			
 
				+            for j in range(n):
			
 
				+                if (self.C[i][j] == 0) and \
			
 
				+                   (not self.col_covered[j]) and \
			
 
				+                   (not self.row_covered[i]):
			
 
				+                    self.marked[i][j] = 1
			
 
				+                    self.col_covered[j] = True
			
 
				+                    self.row_covered[i] = True
			
 
				+
			
 
				+        self.__clear_covers()
			
 
				+        return 3
			
 
				+
			
 
				+    def __step3(self):
			
 
				+        """
			
 
				+        Cover each column containing a starred zero. If K columns are
			
 
				+        covered, the starred zeros describe a complete set of unique
			
 
				+        assignments. In this case, Go to DONE, otherwise, Go to Step 4.
			
 
				+        """
			
 
				+        n = self.n
			
 
				+        count = 0
			
 
				+        for i in range(n):
			
 
				+            for j in range(n):
			
 
				+                if self.marked[i][j] == 1:
			
 
				+                    self.col_covered[j] = True
			
 
				+                    count += 1
			
 
				+
			
 
				+        if count >= n:
			
 
				+            step = 7  # done
			
 
				+        else:
			
 
				+            step = 4
			
 
				+
			
 
				+        return step
			
 
				+
			
 
				+    def __step4(self):
			
 
				+        """
			
 
				+        Find a noncovered zero and prime it. If there is no starred zero
			
 
				+        in the row containing this primed zero, Go to Step 5. Otherwise,
			
 
				+        cover this row and uncover the column containing the starred
			
 
				+        zero. Continue in this manner until there are no uncovered zeros
			
 
				+        left. Save the smallest uncovered value and Go to Step 6.
			
 
				+        """
			
 
				+        step = 0
			
 
				+        done = False
			
 
				+        row = -1
			
 
				+        col = -1
			
 
				+        star_col = -1
			
 
				+        while not done:
			
 
				+            (row, col) = self.__find_a_zero()
			
 
				+            if row < 0:
			
 
				+                done = True
			
 
				+                step = 6
			
 
				+            else:
			
 
				+                self.marked[row][col] = 2
			
 
				+                star_col = self.__find_star_in_row(row)
			
 
				+                if star_col >= 0:
			
 
				+                    col = star_col
			
 
				+                    self.row_covered[row] = True
			
 
				+                    self.col_covered[col] = False
			
 
				+                else:
			
 
				+                    done = True
			
 
				+                    self.Z0_r = row
			
 
				+                    self.Z0_c = col
			
 
				+                    step = 5
			
 
				+
			
 
				+        return step
			
 
				+
			
 
				+    def __step5(self):
			
 
				+        """
			
 
				+        Construct a series of alternating primed and starred zeros as
			
 
				+        follows. Let Z0 represent the uncovered primed zero found in Step 4.
			
 
				+        Let Z1 denote the starred zero in the column of Z0 (if any).
			
 
				+        Let Z2 denote the primed zero in the row of Z1 (there will always
			
 
				+        be one). Continue until the series terminates at a primed zero
			
 
				+        that has no starred zero in its column. Unstar each starred zero
			
 
				+        of the series, star each primed zero of the series, erase all
			
 
				+        primes and uncover every line in the matrix. Return to Step 3
			
 
				+        """
			
 
				+        count = 0
			
 
				+        path = self.path
			
 
				+        path[count][0] = self.Z0_r
			
 
				+        path[count][1] = self.Z0_c
			
 
				+        done = False
			
 
				+        while not done:
			
 
				+            row = self.__find_star_in_col(path[count][1])
			
 
				+            if row >= 0:
			
 
				+                count += 1
			
 
				+                path[count][0] = row
			
 
				+                path[count][1] = path[count - 1][1]
			
 
				+            else:
			
 
				+                done = True
			
 
				+
			
 
				+            if not done:
			
 
				+                col = self.__find_prime_in_row(path[count][0])
			
 
				+                count += 1
			
 
				+                path[count][0] = path[count - 1][0]
			
 
				+                path[count][1] = col
			
 
				+
			
 
				+        self.__convert_path(path, count)
			
 
				+        self.__clear_covers()
			
 
				+        self.__erase_primes()
			
 
				+        return 3
			
 
				+
			
 
				+    def __step6(self):
			
 
				+        """
			
 
				+        Add the value found in Step 4 to every element of each covered
			
 
				+        row, and subtract it from every element of each uncovered column.
			
 
				+        Return to Step 4 without altering any stars, primes, or covered
			
 
				+        lines.
			
 
				+        """
			
 
				+        minval = self.__find_smallest()
			
 
				+        for i in range(self.n):
			
 
				+            for j in range(self.n):
			
 
				+                if self.row_covered[i]:
			
 
				+                    self.C[i][j] += minval
			
 
				+                if not self.col_covered[j]:
			
 
				+                    self.C[i][j] -= minval
			
 
				+        return 4
			
 
				+
			
 
				+    def __find_smallest(self):
			
 
				+        """Find the smallest uncovered value in the matrix."""
			
 
				+        minval = 2e9  # sys.maxint
			
 
				+        for i in range(self.n):
			
 
				+            for j in range(self.n):
			
 
				+                if (not self.row_covered[i]) and (not self.col_covered[j]):
			
 
				+                    if minval > self.C[i][j]:
			
 
				+                        minval = self.C[i][j]
			
 
				+        return minval
			
 
				+
			
 
				+    def __find_a_zero(self):
			
 
				+        """Find the first uncovered element with value 0"""
			
 
				+        row = -1
			
 
				+        col = -1
			
 
				+        i = 0
			
 
				+        n = self.n
			
 
				+        done = False
			
 
				+
			
 
				+        while not done:
			
 
				+            j = 0
			
 
				+            while True:
			
 
				+                if (self.C[i][j] == 0) and \
			
 
				+                   (not self.row_covered[i]) and \
			
 
				+                   (not self.col_covered[j]):
			
 
				+                    row = i
			
 
				+                    col = j
			
 
				+                    done = True
			
 
				+                j += 1
			
 
				+                if j >= n:
			
 
				+                    break
			
 
				+            i += 1
			
 
				+            if i >= n:
			
 
				+                done = True
			
 
				+
			
 
				+        return (row, col)
			
 
				+
			
 
				+    def __find_star_in_row(self, row):
			
 
				+        """
			
 
				+        Find the first starred element in the specified row. Returns
			
 
				+        the column index, or -1 if no starred element was found.
			
 
				+        """
			
 
				+        col = -1
			
 
				+        for j in range(self.n):
			
 
				+            if self.marked[row][j] == 1:
			
 
				+                col = j
			
 
				+                break
			
 
				+
			
 
				+        return col
			
 
				+
			
 
				+    def __find_star_in_col(self, col):
			
 
				+        """
			
 
				+        Find the first starred element in the specified row. Returns
			
 
				+        the row index, or -1 if no starred element was found.
			
 
				+        """
			
 
				+        row = -1
			
 
				+        for i in range(self.n):
			
 
				+            if self.marked[i][col] == 1:
			
 
				+                row = i
			
 
				+                break
			
 
				+
			
 
				+        return row
			
 
				+
			
 
				+    def __find_prime_in_row(self, row):
			
 
				+        """
			
 
				+        Find the first prime element in the specified row. Returns
			
 
				+        the column index, or -1 if no starred element was found.
			
 
				+        """
			
 
				+        col = -1
			
 
				+        for j in range(self.n):
			
 
				+            if self.marked[row][j] == 2:
			
 
				+                col = j
			
 
				+                break
			
 
				+
			
 
				+        return col
			
 
				+
			
 
				+    def __convert_path(self, path, count):
			
 
				+        for i in range(count + 1):
			
 
				+            if self.marked[path[i][0]][path[i][1]] == 1:
			
 
				+                self.marked[path[i][0]][path[i][1]] = 0
			
 
				+            else:
			
 
				+                self.marked[path[i][0]][path[i][1]] = 1
			
 
				+
			
 
				+    def __clear_covers(self):
			
 
				+        """Clear all covered matrix cells"""
			
 
				+        for i in range(self.n):
			
 
				+            self.row_covered[i] = False
			
 
				+            self.col_covered[i] = False
			
 
				+
			
 
				+    def __erase_primes(self):
			
 
				+        """Erase all prime markings"""
			
 
				+        for i in range(self.n):
			
 
				+            for j in range(self.n):
			
 
				+                if self.marked[i][j] == 2:
			
 
				+                    self.marked[i][j] = 0
			
 
				+
			
 
				+
			
 
				+def make_cost_matrix(profit_matrix, inversion_function):
			
 
				+    """
			
 
				+    Create a cost matrix from a profit matrix by calling
			
 
				+    'inversion_function' to invert each value. The inversion
			
 
				+    function must take one numeric argument (of any type) and return
			
 
				+    another numeric argument which is presumed to be the cost inverse
			
 
				+    of the original profit.
			
 
				+
			
 
				+    This is a static method. Call it like this:
			
 
				+
			
 
				+    .. python::
			
 
				+
			
 
				+        cost_matrix = Munkres.make_cost_matrix(matrix, inversion_func)
			
 
				+
			
 
				+    For example:
			
 
				+
			
 
				+    .. python::
			
 
				+
			
 
				+        cost_matrix = Munkres.make_cost_matrix(matrix, lambda x : sys.maxint - x)
			
 
				+
			
 
				+    :Parameters:
			
 
				+        profit_matrix : list of lists
			
 
				+            The matrix to convert from a profit to a cost matrix
			
 
				+
			
 
				+        inversion_function : function
			
 
				+            The function to use to invert each entry in the profit matrix
			
 
				+
			
 
				+    :rtype: list of lists
			
 
				+    :return: The converted matrix
			
 
				+    """
			
 
				+    cost_matrix = []
			
 
				+    for row in profit_matrix:
			
 
				+        cost_matrix.append([inversion_function(value) for value in row])
			
 
				+    return cost_matrix
			
--- a/paddlers/models/ppdet/metrics/widerface_utils.py
+++ b/paddlers/models/ppdet/metrics/widerface_utils.py
@@ -0,0 +1,393 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import os
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from collections import OrderedDict
			
 
				+
			
 
				+import paddle
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = ['face_eval_run', 'lmk2out']
			
 
				+
			
 
				+
			
 
				+def face_eval_run(model,
			
 
				+                  image_dir,
			
 
				+                  gt_file,
			
 
				+                  pred_dir='output/pred',
			
 
				+                  eval_mode='widerface',
			
 
				+                  multi_scale=False):
			
 
				+    # load ground truth files
			
 
				+    with open(gt_file, 'r') as f:
			
 
				+        gt_lines = f.readlines()
			
 
				+    imid2path = []
			
 
				+    pos_gt = 0
			
 
				+    while pos_gt < len(gt_lines):
			
 
				+        name_gt = gt_lines[pos_gt].strip('\n\t').split()[0]
			
 
				+        imid2path.append(name_gt)
			
 
				+        pos_gt += 1
			
 
				+        n_gt = int(gt_lines[pos_gt].strip('\n\t').split()[0])
			
 
				+        pos_gt += 1 + n_gt
			
 
				+    logger.info('The ground truth file load {} images'.format(len(imid2path)))
			
 
				+
			
 
				+    dets_dist = OrderedDict()
			
 
				+    for iter_id, im_path in enumerate(imid2path):
			
 
				+        image_path = os.path.join(image_dir, im_path)
			
 
				+        if eval_mode == 'fddb':
			
 
				+            image_path += '.jpg'
			
 
				+        assert os.path.exists(image_path)
			
 
				+        image = cv2.imread(image_path)
			
 
				+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
			
 
				+        if multi_scale:
			
 
				+            shrink, max_shrink = get_shrink(image.shape[0], image.shape[1])
			
 
				+            det0 = detect_face(model, image, shrink)
			
 
				+            det1 = flip_test(model, image, shrink)
			
 
				+            [det2, det3] = multi_scale_test(model, image, max_shrink)
			
 
				+            det4 = multi_scale_test_pyramid(model, image, max_shrink)
			
 
				+            det = np.row_stack((det0, det1, det2, det3, det4))
			
 
				+            dets = bbox_vote(det)
			
 
				+        else:
			
 
				+            dets = detect_face(model, image, 1)
			
 
				+        if eval_mode == 'widerface':
			
 
				+            save_widerface_bboxes(image_path, dets, pred_dir)
			
 
				+        else:
			
 
				+            dets_dist[im_path] = dets
			
 
				+        if iter_id % 100 == 0:
			
 
				+            logger.info('Test iter {}'.format(iter_id))
			
 
				+    if eval_mode == 'fddb':
			
 
				+        save_fddb_bboxes(dets_dist, pred_dir)
			
 
				+    logger.info("Finish evaluation.")
			
 
				+
			
 
				+
			
 
				+def detect_face(model, image, shrink):
			
 
				+    image_shape = [image.shape[0], image.shape[1]]
			
 
				+    if shrink != 1:
			
 
				+        h, w = int(image_shape[0] * shrink), int(image_shape[1] * shrink)
			
 
				+        image = cv2.resize(image, (w, h))
			
 
				+        image_shape = [h, w]
			
 
				+
			
 
				+    img = face_img_process(image)
			
 
				+    image_shape = np.asarray([image_shape])
			
 
				+    scale_factor = np.asarray([[shrink, shrink]])
			
 
				+    data = {
			
 
				+        "image": paddle.to_tensor(
			
 
				+            img, dtype='float32'),
			
 
				+        "im_shape": paddle.to_tensor(
			
 
				+            image_shape, dtype='float32'),
			
 
				+        "scale_factor": paddle.to_tensor(
			
 
				+            scale_factor, dtype='float32')
			
 
				+    }
			
 
				+    model.eval()
			
 
				+    detection = model(data)
			
 
				+    detection = detection['bbox'].numpy()
			
 
				+    # layout: xmin, ymin, xmax. ymax, score
			
 
				+    if np.prod(detection.shape) == 1:
			
 
				+        logger.info("No face detected")
			
 
				+        return np.array([[0, 0, 0, 0, 0]])
			
 
				+    det_conf = detection[:, 1]
			
 
				+    det_xmin = detection[:, 2]
			
 
				+    det_ymin = detection[:, 3]
			
 
				+    det_xmax = detection[:, 4]
			
 
				+    det_ymax = detection[:, 5]
			
 
				+
			
 
				+    det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
			
 
				+    return det
			
 
				+
			
 
				+
			
 
				+def flip_test(model, image, shrink):
			
 
				+    img = cv2.flip(image, 1)
			
 
				+    det_f = detect_face(model, img, shrink)
			
 
				+    det_t = np.zeros(det_f.shape)
			
 
				+    img_width = image.shape[1]
			
 
				+    det_t[:, 0] = img_width - det_f[:, 2]
			
 
				+    det_t[:, 1] = det_f[:, 1]
			
 
				+    det_t[:, 2] = img_width - det_f[:, 0]
			
 
				+    det_t[:, 3] = det_f[:, 3]
			
 
				+    det_t[:, 4] = det_f[:, 4]
			
 
				+    return det_t
			
 
				+
			
 
				+
			
 
				+def multi_scale_test(model, image, max_shrink):
			
 
				+    # Shrink detecting is only used to detect big faces
			
 
				+    st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink
			
 
				+    det_s = detect_face(model, image, st)
			
 
				+    index = np.where(
			
 
				+        np.maximum(det_s[:, 2] - det_s[:, 0] + 1,
			
 
				+                   det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]
			
 
				+    det_s = det_s[index, :]
			
 
				+    # Enlarge one times
			
 
				+    bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2
			
 
				+    det_b = detect_face(model, image, bt)
			
 
				+
			
 
				+    # Enlarge small image x times for small faces
			
 
				+    if max_shrink > 2:
			
 
				+        bt *= 2
			
 
				+        while bt < max_shrink:
			
 
				+            det_b = np.row_stack((det_b, detect_face(model, image, bt)))
			
 
				+            bt *= 2
			
 
				+        det_b = np.row_stack((det_b, detect_face(model, image, max_shrink)))
			
 
				+
			
 
				+    # Enlarged images are only used to detect small faces.
			
 
				+    if bt > 1:
			
 
				+        index = np.where(
			
 
				+            np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
			
 
				+                       det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
			
 
				+        det_b = det_b[index, :]
			
 
				+    # Shrinked images are only used to detect big faces.
			
 
				+    else:
			
 
				+        index = np.where(
			
 
				+            np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
			
 
				+                       det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
			
 
				+        det_b = det_b[index, :]
			
 
				+    return det_s, det_b
			
 
				+
			
 
				+
			
 
				+def multi_scale_test_pyramid(model, image, max_shrink):
			
 
				+    # Use image pyramids to detect faces
			
 
				+    det_b = detect_face(model, image, 0.25)
			
 
				+    index = np.where(
			
 
				+        np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
			
 
				+                   det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
			
 
				+    det_b = det_b[index, :]
			
 
				+
			
 
				+    st = [0.75, 1.25, 1.5, 1.75]
			
 
				+    for i in range(len(st)):
			
 
				+        if st[i] <= max_shrink:
			
 
				+            det_temp = detect_face(model, image, st[i])
			
 
				+            # Enlarged images are only used to detect small faces.
			
 
				+            if st[i] > 1:
			
 
				+                index = np.where(
			
 
				+                    np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1,
			
 
				+                               det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0]
			
 
				+                det_temp = det_temp[index, :]
			
 
				+            # Shrinked images are only used to detect big faces.
			
 
				+            else:
			
 
				+                index = np.where(
			
 
				+                    np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1,
			
 
				+                               det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0]
			
 
				+                det_temp = det_temp[index, :]
			
 
				+            det_b = np.row_stack((det_b, det_temp))
			
 
				+    return det_b
			
 
				+
			
 
				+
			
 
				+def to_chw(image):
			
 
				+    """
			
 
				+    Transpose image from HWC to CHW.
			
 
				+    Args:
			
 
				+        image (np.array): an image with HWC layout.
			
 
				+    """
			
 
				+    # HWC to CHW
			
 
				+    if len(image.shape) == 3:
			
 
				+        image = np.swapaxes(image, 1, 2)
			
 
				+        image = np.swapaxes(image, 1, 0)
			
 
				+    return image
			
 
				+
			
 
				+
			
 
				+def face_img_process(image,
			
 
				+                     mean=[104., 117., 123.],
			
 
				+                     std=[127.502231, 127.502231, 127.502231]):
			
 
				+    img = np.array(image)
			
 
				+    img = to_chw(img)
			
 
				+    img = img.astype('float32')
			
 
				+    img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32')
			
 
				+    img /= np.array(std)[:, np.newaxis, np.newaxis].astype('float32')
			
 
				+    img = [img]
			
 
				+    img = np.array(img)
			
 
				+    return img
			
 
				+
			
 
				+
			
 
				+def get_shrink(height, width):
			
 
				+    """
			
 
				+    Args:
			
 
				+        height (int): image height.
			
 
				+        width (int): image width.
			
 
				+    """
			
 
				+    # avoid out of memory
			
 
				+    max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5
			
 
				+    max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5
			
 
				+
			
 
				+    def get_round(x, loc):
			
 
				+        str_x = str(x)
			
 
				+        if '.' in str_x:
			
 
				+            str_before, str_after = str_x.split('.')
			
 
				+            len_after = len(str_after)
			
 
				+            if len_after >= 3:
			
 
				+                str_final = str_before + '.' + str_after[0:loc]
			
 
				+                return float(str_final)
			
 
				+            else:
			
 
				+                return x
			
 
				+
			
 
				+    max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3
			
 
				+    if max_shrink >= 1.5 and max_shrink < 2:
			
 
				+        max_shrink = max_shrink - 0.1
			
 
				+    elif max_shrink >= 2 and max_shrink < 3:
			
 
				+        max_shrink = max_shrink - 0.2
			
 
				+    elif max_shrink >= 3 and max_shrink < 4:
			
 
				+        max_shrink = max_shrink - 0.3
			
 
				+    elif max_shrink >= 4 and max_shrink < 5:
			
 
				+        max_shrink = max_shrink - 0.4
			
 
				+    elif max_shrink >= 5:
			
 
				+        max_shrink = max_shrink - 0.5
			
 
				+    elif max_shrink <= 0.1:
			
 
				+        max_shrink = 0.1
			
 
				+
			
 
				+    shrink = max_shrink if max_shrink < 1 else 1
			
 
				+    return shrink, max_shrink
			
 
				+
			
 
				+
			
 
				+def bbox_vote(det):
			
 
				+    order = det[:, 4].ravel().argsort()[::-1]
			
 
				+    det = det[order, :]
			
 
				+    if det.shape[0] == 0:
			
 
				+        dets = np.array([[10, 10, 20, 20, 0.002]])
			
 
				+        det = np.empty(shape=[0, 5])
			
 
				+    while det.shape[0] > 0:
			
 
				+        # IOU
			
 
				+        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
			
 
				+        xx1 = np.maximum(det[0, 0], det[:, 0])
			
 
				+        yy1 = np.maximum(det[0, 1], det[:, 1])
			
 
				+        xx2 = np.minimum(det[0, 2], det[:, 2])
			
 
				+        yy2 = np.minimum(det[0, 3], det[:, 3])
			
 
				+        w = np.maximum(0.0, xx2 - xx1 + 1)
			
 
				+        h = np.maximum(0.0, yy2 - yy1 + 1)
			
 
				+        inter = w * h
			
 
				+        o = inter / (area[0] + area[:] - inter)
			
 
				+
			
 
				+        # nms
			
 
				+        merge_index = np.where(o >= 0.3)[0]
			
 
				+        det_accu = det[merge_index, :]
			
 
				+        det = np.delete(det, merge_index, 0)
			
 
				+        if merge_index.shape[0] <= 1:
			
 
				+            if det.shape[0] == 0:
			
 
				+                try:
			
 
				+                    dets = np.row_stack((dets, det_accu))
			
 
				+                except:
			
 
				+                    dets = det_accu
			
 
				+            continue
			
 
				+        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
			
 
				+        max_score = np.max(det_accu[:, 4])
			
 
				+        det_accu_sum = np.zeros((1, 5))
			
 
				+        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
			
 
				+                                      axis=0) / np.sum(det_accu[:, -1:])
			
 
				+        det_accu_sum[:, 4] = max_score
			
 
				+        try:
			
 
				+            dets = np.row_stack((dets, det_accu_sum))
			
 
				+        except:
			
 
				+            dets = det_accu_sum
			
 
				+    dets = dets[0:750, :]
			
 
				+    keep_index = np.where(dets[:, 4] >= 0.01)[0]
			
 
				+    dets = dets[keep_index, :]
			
 
				+    return dets
			
 
				+
			
 
				+
			
 
				+def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
			
 
				+    image_name = image_path.split('/')[-1]
			
 
				+    image_class = image_path.split('/')[-2]
			
 
				+    odir = os.path.join(output_dir, image_class)
			
 
				+    if not os.path.exists(odir):
			
 
				+        os.makedirs(odir)
			
 
				+
			
 
				+    ofname = os.path.join(odir, '%s.txt' % (image_name[:-4]))
			
 
				+    f = open(ofname, 'w')
			
 
				+    f.write('{:s}\n'.format(image_class + '/' + image_name))
			
 
				+    f.write('{:d}\n'.format(bboxes_scores.shape[0]))
			
 
				+    for box_score in bboxes_scores:
			
 
				+        xmin, ymin, xmax, ymax, score = box_score
			
 
				+        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
			
 
				+            xmax - xmin + 1), (ymax - ymin + 1), score))
			
 
				+    f.close()
			
 
				+    logger.info("The predicted result is saved as {}".format(ofname))
			
 
				+
			
 
				+
			
 
				+def save_fddb_bboxes(bboxes_scores,
			
 
				+                     output_dir,
			
 
				+                     output_fname='pred_fddb_res.txt'):
			
 
				+    if not os.path.exists(output_dir):
			
 
				+        os.makedirs(output_dir)
			
 
				+    predict_file = os.path.join(output_dir, output_fname)
			
 
				+    f = open(predict_file, 'w')
			
 
				+    for image_path, dets in bboxes_scores.iteritems():
			
 
				+        f.write('{:s}\n'.format(image_path))
			
 
				+        f.write('{:d}\n'.format(dets.shape[0]))
			
 
				+        for box_score in dets:
			
 
				+            xmin, ymin, xmax, ymax, score = box_score
			
 
				+            width, height = xmax - xmin, ymax - ymin
			
 
				+            f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'
			
 
				+                    .format(xmin, ymin, width, height, score))
			
 
				+    logger.info("The predicted result is saved as {}".format(predict_file))
			
 
				+    return predict_file
			
 
				+
			
 
				+
			
 
				+def lmk2out(results, is_bbox_normalized=False):
			
 
				+    """
			
 
				+    Args:
			
 
				+        results: request a dict, should include: `landmark`, `im_id`,
			
 
				+                 if is_bbox_normalized=True, also need `im_shape`.
			
 
				+        is_bbox_normalized: whether or not landmark is normalized.
			
 
				+    """
			
 
				+    xywh_res = []
			
 
				+    for t in results:
			
 
				+        bboxes = t['bbox'][0]
			
 
				+        lengths = t['bbox'][1][0]
			
 
				+        im_ids = np.array(t['im_id'][0]).flatten()
			
 
				+        if bboxes.shape == (1, 1) or bboxes is None:
			
 
				+            continue
			
 
				+        face_index = t['face_index'][0]
			
 
				+        prior_box = t['prior_boxes'][0]
			
 
				+        predict_lmk = t['landmark'][0]
			
 
				+        prior = np.reshape(prior_box, (-1, 4))
			
 
				+        predictlmk = np.reshape(predict_lmk, (-1, 10))
			
 
				+
			
 
				+        k = 0
			
 
				+        for a in range(len(lengths)):
			
 
				+            num = lengths[a]
			
 
				+            im_id = int(im_ids[a])
			
 
				+            for i in range(num):
			
 
				+                score = bboxes[k][1]
			
 
				+                theindex = face_index[i][0]
			
 
				+                me_prior = prior[theindex, :]
			
 
				+                lmk_pred = predictlmk[theindex, :]
			
 
				+                prior_w = me_prior[2] - me_prior[0]
			
 
				+                prior_h = me_prior[3] - me_prior[1]
			
 
				+                prior_w_center = (me_prior[2] + me_prior[0]) / 2
			
 
				+                prior_h_center = (me_prior[3] + me_prior[1]) / 2
			
 
				+                lmk_decode = np.zeros((10))
			
 
				+                for j in [0, 2, 4, 6, 8]:
			
 
				+                    lmk_decode[j] = lmk_pred[
			
 
				+                        j] * 0.1 * prior_w + prior_w_center
			
 
				+                for j in [1, 3, 5, 7, 9]:
			
 
				+                    lmk_decode[j] = lmk_pred[
			
 
				+                        j] * 0.1 * prior_h + prior_h_center
			
 
				+                im_shape = t['im_shape'][0][a].tolist()
			
 
				+                image_h, image_w = int(im_shape[0]), int(im_shape[1])
			
 
				+                if is_bbox_normalized:
			
 
				+                    lmk_decode = lmk_decode * np.array([
			
 
				+                        image_w, image_h, image_w, image_h, image_w, image_h,
			
 
				+                        image_w, image_h, image_w, image_h
			
 
				+                    ])
			
 
				+                lmk_res = {
			
 
				+                    'image_id': im_id,
			
 
				+                    'landmark': lmk_decode,
			
 
				+                    'score': score,
			
 
				+                }
			
 
				+                xywh_res.append(lmk_res)
			
 
				+                k += 1
			
 
				+    return xywh_res
			
--- a/paddlers/models/ppdet/model_zoo/__init__.py
+++ b/paddlers/models/ppdet/model_zoo/__init__.py
@@ -0,0 +1,18 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import model_zoo
			
 
				+from .model_zoo import *
			
 
				+
			
 
				+__all__ = model_zoo.__all__
			
--- a/paddlers/models/ppdet/model_zoo/model_zoo.py
+++ b/paddlers/models/ppdet/model_zoo/model_zoo.py
@@ -0,0 +1,84 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os.path as osp
			
 
				+import pkg_resources
			
 
				+
			
 
				+try:
			
 
				+    from collections.abc import Sequence
			
 
				+except:
			
 
				+    from collections import Sequence
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import load_config, create
			
 
				+from paddlers.models.ppdet.utils.checkpoint import load_weight
			
 
				+from paddlers.models.ppdet.utils.download import get_config_path
			
 
				+
			
 
				+from paddlers.models.ppdet.utils.logger import setup_logger
			
 
				+logger = setup_logger(__name__)
			
 
				+
			
 
				+__all__ = [
			
 
				+    'list_model', 'get_config_file', 'get_weights_url', 'get_model',
			
 
				+    'MODEL_ZOO_FILENAME'
			
 
				+]
			
 
				+
			
 
				+MODEL_ZOO_FILENAME = 'MODEL_ZOO'
			
 
				+
			
 
				+
			
 
				+def list_model(filters=[]):
			
 
				+    model_zoo_file = pkg_resources.resource_filename('ppdet.model_zoo',
			
 
				+                                                     MODEL_ZOO_FILENAME)
			
 
				+    with open(model_zoo_file) as f:
			
 
				+        model_names = f.read().splitlines()
			
 
				+
			
 
				+    # filter model_name
			
 
				+    def filt(name):
			
 
				+        for f in filters:
			
 
				+            if name.find(f) < 0:
			
 
				+                return False
			
 
				+        return True
			
 
				+
			
 
				+    if isinstance(filters, str) or not isinstance(filters, Sequence):
			
 
				+        filters = [filters]
			
 
				+    model_names = [name for name in model_names if filt(name)]
			
 
				+    if len(model_names) == 0 and len(filters) > 0:
			
 
				+        raise ValueError("no model found, please check filters seeting, "
			
 
				+                         "filters can be set as following kinds:\n"
			
 
				+                         "\tDataset: coco, voc ...\n"
			
 
				+                         "\tArchitecture: yolo, rcnn, ssd ...\n"
			
 
				+                         "\tBackbone: resnet, vgg, darknet ...\n")
			
 
				+
			
 
				+    model_str = "Available Models:\n"
			
 
				+    for model_name in model_names:
			
 
				+        model_str += "\t{}\n".format(model_name)
			
 
				+    logger.info(model_str)
			
 
				+
			
 
				+
			
 
				+# models and configs save on bcebos under dygraph directory
			
 
				+def get_config_file(model_name):
			
 
				+    return get_config_path("ppdet://configs/{}.yml".format(model_name))
			
 
				+
			
 
				+
			
 
				+def get_weights_url(model_name):
			
 
				+    return "ppdet://models/{}.pdparams".format(osp.split(model_name)[-1])
			
 
				+
			
 
				+
			
 
				+def get_model(model_name, pretrained=True):
			
 
				+    cfg_file = get_config_file(model_name)
			
 
				+    cfg = load_config(cfg_file)
			
 
				+    model = create(cfg.architecture)
			
 
				+
			
 
				+    if pretrained:
			
 
				+        load_weight(model, get_weights_url(model_name))
			
 
				+
			
 
				+    return model
			
--- a/paddlers/models/ppdet/modeling/__init__.py
+++ b/paddlers/models/ppdet/modeling/__init__.py
@@ -0,0 +1,45 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import warnings
			
 
				+warnings.filterwarnings(
			
 
				+    action='ignore', category=DeprecationWarning, module='ops')
			
 
				+
			
 
				+from . import ops
			
 
				+from . import backbones
			
 
				+from . import necks
			
 
				+from . import proposal_generator
			
 
				+from . import heads
			
 
				+from . import losses
			
 
				+from . import architectures
			
 
				+from . import post_process
			
 
				+from . import layers
			
 
				+from . import reid
			
 
				+from . import mot
			
 
				+from . import transformers
			
 
				+from . import assigners
			
 
				+
			
 
				+from .ops import *
			
 
				+from .backbones import *
			
 
				+from .necks import *
			
 
				+from .proposal_generator import *
			
 
				+from .heads import *
			
 
				+from .losses import *
			
 
				+from .architectures import *
			
 
				+from .post_process import *
			
 
				+from .layers import *
			
 
				+from .reid import *
			
 
				+from .mot import *
			
 
				+from .transformers import *
			
 
				+from .assigners import *
			
--- a/paddlers/models/ppdet/modeling/architectures/__init__.py
+++ b/paddlers/models/ppdet/modeling/architectures/__init__.py
@@ -0,0 +1,51 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+from . import meta_arch
			
 
				+from . import faster_rcnn
			
 
				+from . import mask_rcnn
			
 
				+from . import yolo
			
 
				+from . import cascade_rcnn
			
 
				+from . import ssd
			
 
				+from . import fcos
			
 
				+from . import solov2
			
 
				+from . import ttfnet
			
 
				+from . import s2anet
			
 
				+from . import keypoint_hrhrnet
			
 
				+from . import keypoint_hrnet
			
 
				+from . import jde
			
 
				+from . import deepsort
			
 
				+from . import fairmot
			
 
				+from . import centernet
			
 
				+from . import gfl
			
 
				+from . import picodet
			
 
				+from . import detr
			
 
				+from . import sparse_rcnn
			
 
				+from . import tood
			
 
				+
			
 
				+from .meta_arch import *
			
 
				+from .faster_rcnn import *
			
 
				+from .mask_rcnn import *
			
 
				+from .yolo import *
			
 
				+from .cascade_rcnn import *
			
 
				+from .ssd import *
			
 
				+from .fcos import *
			
 
				+from .solov2 import *
			
 
				+from .ttfnet import *
			
 
				+from .s2anet import *
			
 
				+from .keypoint_hrhrnet import *
			
 
				+from .keypoint_hrnet import *
			
 
				+from .jde import *
			
 
				+from .deepsort import *
			
 
				+from .fairmot import *
			
 
				+from .centernet import *
			
 
				+from .blazeface import *
			
 
				+from .gfl import *
			
 
				+from .picodet import *
			
 
				+from .detr import *
			
 
				+from .sparse_rcnn import *
			
 
				+from .tood import *
			
--- a/paddlers/models/ppdet/modeling/architectures/blazeface.py
+++ b/paddlers/models/ppdet/modeling/architectures/blazeface.py
@@ -0,0 +1,91 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['BlazeFace']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class BlazeFace(BaseArch):
			
 
				+    """
			
 
				+    BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
			
 
				+               see https://arxiv.org/abs/1907.05047
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (nn.Layer): backbone instance
			
 
				+        neck (nn.Layer): neck instance
			
 
				+        blaze_head (nn.Layer): `blazeHead` instance
			
 
				+        post_process (object): `BBoxPostProcess` instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['post_process']
			
 
				+
			
 
				+    def __init__(self, backbone, blaze_head, neck, post_process):
			
 
				+        super(BlazeFace, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.blaze_head = blaze_head
			
 
				+        self.post_process = post_process
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        # backbone
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+        # fpn
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+        # head
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        blaze_head = create(cfg['blaze_head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            'blaze_head': blaze_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        # Backbone
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        # neck
			
 
				+        neck_feats = self.neck(body_feats)
			
 
				+        # blaze Head
			
 
				+        if self.training:
			
 
				+            return self.blaze_head(neck_feats, self.inputs['image'],
			
 
				+                                   self.inputs['gt_bbox'],
			
 
				+                                   self.inputs['gt_class'])
			
 
				+        else:
			
 
				+            preds, anchors = self.blaze_head(neck_feats, self.inputs['image'])
			
 
				+            bbox, bbox_num = self.post_process(preds, anchors,
			
 
				+                                               self.inputs['im_shape'],
			
 
				+                                               self.inputs['scale_factor'])
			
 
				+            return bbox, bbox_num
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        return {"loss": self._forward()}
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {
			
 
				+            "bbox": bbox_pred,
			
 
				+            "bbox_num": bbox_num,
			
 
				+        }
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/cascade_rcnn.py
@@ -0,0 +1,144 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['CascadeRCNN']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class CascadeRCNN(BaseArch):
			
 
				+    """
			
 
				+    Cascade R-CNN network, see https://arxiv.org/abs/1712.00726
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): backbone instance
			
 
				+        rpn_head (object): `RPNHead` instance
			
 
				+        bbox_head (object): `BBoxHead` instance
			
 
				+        bbox_post_process (object): `BBoxPostProcess` instance
			
 
				+        neck (object): 'FPN' instance
			
 
				+        mask_head (object): `MaskHead` instance
			
 
				+        mask_post_process (object): `MaskPostProcess` instance
			
 
				+    """
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = [
			
 
				+        'bbox_post_process',
			
 
				+        'mask_post_process',
			
 
				+    ]
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone,
			
 
				+                 rpn_head,
			
 
				+                 bbox_head,
			
 
				+                 bbox_post_process,
			
 
				+                 neck=None,
			
 
				+                 mask_head=None,
			
 
				+                 mask_post_process=None):
			
 
				+        super(CascadeRCNN, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.rpn_head = rpn_head
			
 
				+        self.bbox_head = bbox_head
			
 
				+        self.bbox_post_process = bbox_post_process
			
 
				+        self.neck = neck
			
 
				+        self.mask_head = mask_head
			
 
				+        self.mask_post_process = mask_post_process
			
 
				+        self.with_mask = mask_head is not None
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        out_shape = neck and neck.out_shape or backbone.out_shape
			
 
				+        kwargs = {'input_shape': out_shape}
			
 
				+        rpn_head = create(cfg['rpn_head'], **kwargs)
			
 
				+        bbox_head = create(cfg['bbox_head'], **kwargs)
			
 
				+
			
 
				+        out_shape = neck and out_shape or bbox_head.get_head().out_shape
			
 
				+        kwargs = {'input_shape': out_shape}
			
 
				+        mask_head = cfg['mask_head'] and create(cfg['mask_head'], **kwargs)
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "rpn_head": rpn_head,
			
 
				+            "bbox_head": bbox_head,
			
 
				+            "mask_head": mask_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        if self.neck is not None:
			
 
				+            body_feats = self.neck(body_feats)
			
 
				+
			
 
				+        if self.training:
			
 
				+            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
			
 
				+            bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
			
 
				+                                                  self.inputs)
			
 
				+            rois, rois_num = self.bbox_head.get_assigned_rois()
			
 
				+            bbox_targets = self.bbox_head.get_assigned_targets()
			
 
				+            if self.with_mask:
			
 
				+                mask_loss = self.mask_head(body_feats, rois, rois_num,
			
 
				+                                           self.inputs, bbox_targets,
			
 
				+                                           bbox_feat)
			
 
				+                return rpn_loss, bbox_loss, mask_loss
			
 
				+            else:
			
 
				+                return rpn_loss, bbox_loss, {}
			
 
				+        else:
			
 
				+            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
			
 
				+            preds, _ = self.bbox_head(body_feats, rois, rois_num, self.inputs)
			
 
				+            refined_rois = self.bbox_head.get_refined_rois()
			
 
				+
			
 
				+            im_shape = self.inputs['im_shape']
			
 
				+            scale_factor = self.inputs['scale_factor']
			
 
				+
			
 
				+            bbox, bbox_num = self.bbox_post_process(
			
 
				+                preds, (refined_rois, rois_num), im_shape, scale_factor)
			
 
				+            # rescale the prediction back to origin image
			
 
				+            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
			
 
				+                                                        im_shape, scale_factor)
			
 
				+            if not self.with_mask:
			
 
				+                return bbox_pred, bbox_num, None
			
 
				+            mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
			
 
				+            origin_shape = self.bbox_post_process.get_origin_shape()
			
 
				+            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
			
 
				+                                               bbox_num, origin_shape)
			
 
				+            return bbox_pred, bbox_num, mask_pred
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        rpn_loss, bbox_loss, mask_loss = self._forward()
			
 
				+        loss = {}
			
 
				+        loss.update(rpn_loss)
			
 
				+        loss.update(bbox_loss)
			
 
				+        if self.with_mask:
			
 
				+            loss.update(mask_loss)
			
 
				+        total_loss = paddle.add_n(list(loss.values()))
			
 
				+        loss.update({'loss': total_loss})
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num, mask_pred = self._forward()
			
 
				+        output = {
			
 
				+            'bbox': bbox_pred,
			
 
				+            'bbox_num': bbox_num,
			
 
				+        }
			
 
				+        if self.with_mask:
			
 
				+            output.update({'mask': mask_pred})
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/centernet.py
+++ b/paddlers/models/ppdet/modeling/architectures/centernet.py
@@ -0,0 +1,108 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['CenterNet']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class CenterNet(BaseArch):
			
 
				+    """
			
 
				+    CenterNet network, see http://arxiv.org/abs/1904.07850
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): backbone instance
			
 
				+        neck (object): FPN instance, default use 'CenterNetDLAFPN'
			
 
				+        head (object): 'CenterNetHead' instance
			
 
				+        post_process (object): 'CenterNetPostProcess' instance
			
 
				+        for_mot (bool): whether return other features used in tracking model
			
 
				+
			
 
				+    """
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['post_process']
			
 
				+    __shared__ = ['for_mot']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone,
			
 
				+                 neck='CenterNetDLAFPN',
			
 
				+                 head='CenterNetHead',
			
 
				+                 post_process='CenterNetPostProcess',
			
 
				+                 for_mot=False):
			
 
				+        super(CenterNet, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.head = head
			
 
				+        self.post_process = post_process
			
 
				+        self.for_mot = for_mot
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        out_shape = neck and neck.out_shape or backbone.out_shape
			
 
				+        kwargs = {'input_shape': out_shape}
			
 
				+        head = create(cfg['head'], **kwargs)
			
 
				+
			
 
				+        return {'backbone': backbone, 'neck': neck, "head": head}
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        neck_feat = self.backbone(self.inputs)
			
 
				+        if self.neck is not None:
			
 
				+            neck_feat = self.neck(neck_feat)
			
 
				+        head_out = self.head(neck_feat, self.inputs)
			
 
				+        if self.for_mot:
			
 
				+            head_out.update({'neck_feat': neck_feat})
			
 
				+        elif self.training:
			
 
				+            head_out['loss'] = head_out.pop('det_loss')
			
 
				+        return head_out
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        head_out = self._forward()
			
 
				+        if self.for_mot:
			
 
				+            bbox, bbox_inds, topk_clses = self.post_process(
			
 
				+                head_out['heatmap'],
			
 
				+                head_out['size'],
			
 
				+                head_out['offset'],
			
 
				+                im_shape=self.inputs['im_shape'],
			
 
				+                scale_factor=self.inputs['scale_factor'])
			
 
				+            output = {
			
 
				+                "bbox": bbox,
			
 
				+                "bbox_inds": bbox_inds,
			
 
				+                "topk_clses": topk_clses,
			
 
				+                "neck_feat": head_out['neck_feat']
			
 
				+            }
			
 
				+        else:
			
 
				+            bbox, bbox_num, _ = self.post_process(
			
 
				+                head_out['heatmap'],
			
 
				+                head_out['size'],
			
 
				+                head_out['offset'],
			
 
				+                im_shape=self.inputs['im_shape'],
			
 
				+                scale_factor=self.inputs['scale_factor'])
			
 
				+            output = {
			
 
				+                "bbox": bbox,
			
 
				+                "bbox_num": bbox_num,
			
 
				+            }
			
 
				+        return output
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
--- a/paddlers/models/ppdet/modeling/architectures/deepsort.py
+++ b/paddlers/models/ppdet/modeling/architectures/deepsort.py
@@ -0,0 +1,69 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+from paddlers.models.ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
			
 
				+
			
 
				+__all__ = ['DeepSORT']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class DeepSORT(BaseArch):
			
 
				+    """
			
 
				+    DeepSORT network, see https://arxiv.org/abs/1703.07402
			
 
				+
			
 
				+    Args:
			
 
				+        detector (object): detector model instance
			
 
				+        reid (object): reid model instance
			
 
				+        tracker (object): tracker instance
			
 
				+    """
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 detector='YOLOv3',
			
 
				+                 reid='PCBPyramid',
			
 
				+                 tracker='DeepSORTTracker'):
			
 
				+        super(DeepSORT, self).__init__()
			
 
				+        self.detector = detector
			
 
				+        self.reid = reid
			
 
				+        self.tracker = tracker
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        if cfg['detector'] != 'None':
			
 
				+            detector = create(cfg['detector'])
			
 
				+        else:
			
 
				+            detector = None
			
 
				+        reid = create(cfg['reid'])
			
 
				+        tracker = create(cfg['tracker'])
			
 
				+
			
 
				+        return {
			
 
				+            "detector": detector,
			
 
				+            "reid": reid,
			
 
				+            "tracker": tracker,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        crops = self.inputs['crops']
			
 
				+        features = self.reid(crops)
			
 
				+        return features
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        return self._forward()
			
--- a/paddlers/models/ppdet/modeling/architectures/detr.py
+++ b/paddlers/models/ppdet/modeling/architectures/detr.py
@@ -0,0 +1,93 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from .meta_arch import BaseArch
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+
			
 
				+__all__ = ['DETR']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class DETR(BaseArch):
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['post_process']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone,
			
 
				+                 transformer,
			
 
				+                 detr_head,
			
 
				+                 post_process='DETRBBoxPostProcess'):
			
 
				+        super(DETR, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.transformer = transformer
			
 
				+        self.detr_head = detr_head
			
 
				+        self.post_process = post_process
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        # backbone
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+        # transformer
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        transformer = create(cfg['transformer'], **kwargs)
			
 
				+        # head
			
 
				+        kwargs = {
			
 
				+            'hidden_dim': transformer.hidden_dim,
			
 
				+            'nhead': transformer.nhead,
			
 
				+            'input_shape': backbone.out_shape
			
 
				+        }
			
 
				+        detr_head = create(cfg['detr_head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'transformer': transformer,
			
 
				+            "detr_head": detr_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        # Backbone
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+
			
 
				+        # Transformer
			
 
				+        out_transformer = self.transformer(body_feats, self.inputs['pad_mask'])
			
 
				+
			
 
				+        # DETR Head
			
 
				+        if self.training:
			
 
				+            return self.detr_head(out_transformer, body_feats, self.inputs)
			
 
				+        else:
			
 
				+            preds = self.detr_head(out_transformer, body_feats)
			
 
				+            bbox, bbox_num = self.post_process(preds, self.inputs['im_shape'],
			
 
				+                                               self.inputs['scale_factor'])
			
 
				+            return bbox, bbox_num
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        losses = self._forward()
			
 
				+        losses.update({
			
 
				+            'loss':
			
 
				+            paddle.add_n([v for k, v in losses.items() if 'log' not in k])
			
 
				+        })
			
 
				+        return losses
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {
			
 
				+            "bbox": bbox_pred,
			
 
				+            "bbox_num": bbox_num,
			
 
				+        }
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/fairmot.py
+++ b/paddlers/models/ppdet/modeling/architectures/fairmot.py
@@ -0,0 +1,100 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['FairMOT']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class FairMOT(BaseArch):
			
 
				+    """
			
 
				+    FairMOT network, see http://arxiv.org/abs/2004.01888
			
 
				+
			
 
				+    Args:
			
 
				+        detector (object): 'CenterNet' instance
			
 
				+        reid (object): 'FairMOTEmbeddingHead' instance
			
 
				+        tracker (object): 'JDETracker' instance
			
 
				+        loss (object): 'FairMOTLoss' instance
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['loss']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 detector='CenterNet',
			
 
				+                 reid='FairMOTEmbeddingHead',
			
 
				+                 tracker='JDETracker',
			
 
				+                 loss='FairMOTLoss'):
			
 
				+        super(FairMOT, self).__init__()
			
 
				+        self.detector = detector
			
 
				+        self.reid = reid
			
 
				+        self.tracker = tracker
			
 
				+        self.loss = loss
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        detector = create(cfg['detector'])
			
 
				+        detector_out_shape = detector.neck and detector.neck.out_shape or detector.backbone.out_shape
			
 
				+
			
 
				+        kwargs = {'input_shape': detector_out_shape}
			
 
				+        reid = create(cfg['reid'], **kwargs)
			
 
				+        loss = create(cfg['loss'])
			
 
				+        tracker = create(cfg['tracker'])
			
 
				+
			
 
				+        return {
			
 
				+            'detector': detector,
			
 
				+            'reid': reid,
			
 
				+            'loss': loss,
			
 
				+            'tracker': tracker
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        loss = dict()
			
 
				+        # det_outs keys:
			
 
				+        # train: neck_feat, det_loss, heatmap_loss, size_loss, offset_loss (optional: iou_loss)
			
 
				+        # eval/infer: neck_feat, bbox, bbox_inds
			
 
				+        det_outs = self.detector(self.inputs)
			
 
				+        neck_feat = det_outs['neck_feat']
			
 
				+        if self.training:
			
 
				+            reid_loss = self.reid(neck_feat, self.inputs)
			
 
				+
			
 
				+            det_loss = det_outs['det_loss']
			
 
				+            loss = self.loss(det_loss, reid_loss)
			
 
				+            for k, v in det_outs.items():
			
 
				+                if 'loss' not in k:
			
 
				+                    continue
			
 
				+                loss.update({k: v})
			
 
				+            loss.update({'reid_loss': reid_loss})
			
 
				+            return loss
			
 
				+        else:
			
 
				+            pred_dets, pred_embs = self.reid(
			
 
				+                neck_feat, self.inputs, det_outs['bbox'],
			
 
				+                det_outs['bbox_inds'], det_outs['topk_clses'])
			
 
				+            return pred_dets, pred_embs
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        output = self._forward()
			
 
				+        return output
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        loss = self._forward()
			
 
				+        return loss
			
--- a/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/faster_rcnn.py
@@ -0,0 +1,106 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['FasterRCNN']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class FasterRCNN(BaseArch):
			
 
				+    """
			
 
				+    Faster R-CNN network, see https://arxiv.org/abs/1506.01497
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): backbone instance
			
 
				+        rpn_head (object): `RPNHead` instance
			
 
				+        bbox_head (object): `BBoxHead` instance
			
 
				+        bbox_post_process (object): `BBoxPostProcess` instance
			
 
				+        neck (object): 'FPN' instance
			
 
				+    """
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['bbox_post_process']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone,
			
 
				+                 rpn_head,
			
 
				+                 bbox_head,
			
 
				+                 bbox_post_process,
			
 
				+                 neck=None):
			
 
				+        super(FasterRCNN, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.rpn_head = rpn_head
			
 
				+        self.bbox_head = bbox_head
			
 
				+        self.bbox_post_process = bbox_post_process
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        out_shape = neck and neck.out_shape or backbone.out_shape
			
 
				+        kwargs = {'input_shape': out_shape}
			
 
				+        rpn_head = create(cfg['rpn_head'], **kwargs)
			
 
				+        bbox_head = create(cfg['bbox_head'], **kwargs)
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "rpn_head": rpn_head,
			
 
				+            "bbox_head": bbox_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        if self.neck is not None:
			
 
				+            body_feats = self.neck(body_feats)
			
 
				+        if self.training:
			
 
				+            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
			
 
				+            bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num,
			
 
				+                                          self.inputs)
			
 
				+            return rpn_loss, bbox_loss
			
 
				+        else:
			
 
				+            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
			
 
				+            preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
			
 
				+
			
 
				+            im_shape = self.inputs['im_shape']
			
 
				+            scale_factor = self.inputs['scale_factor']
			
 
				+            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
			
 
				+                                                    im_shape, scale_factor)
			
 
				+
			
 
				+            # rescale the prediction back to origin image
			
 
				+            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
			
 
				+                                                        im_shape, scale_factor)
			
 
				+            return bbox_pred, bbox_num
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        rpn_loss, bbox_loss = self._forward()
			
 
				+        loss = {}
			
 
				+        loss.update(rpn_loss)
			
 
				+        loss.update(bbox_loss)
			
 
				+        total_loss = paddle.add_n(list(loss.values()))
			
 
				+        loss.update({'loss': total_loss})
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/fcos.py
+++ b/paddlers/models/ppdet/modeling/architectures/fcos.py
@@ -0,0 +1,105 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['FCOS']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class FCOS(BaseArch):
			
 
				+    """
			
 
				+    FCOS network, see https://arxiv.org/abs/1904.01355
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): backbone instance
			
 
				+        neck (object): 'FPN' instance
			
 
				+        fcos_head (object): 'FCOSHead' instance
			
 
				+        post_process (object): 'FCOSPostProcess' instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['fcos_post_process']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone,
			
 
				+                 neck,
			
 
				+                 fcos_head='FCOSHead',
			
 
				+                 fcos_post_process='FCOSPostProcess'):
			
 
				+        super(FCOS, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.fcos_head = fcos_head
			
 
				+        self.fcos_post_process = fcos_post_process
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        fcos_head = create(cfg['fcos_head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "fcos_head": fcos_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        fpn_feats = self.neck(body_feats)
			
 
				+        fcos_head_outs = self.fcos_head(fpn_feats, self.training)
			
 
				+        if not self.training:
			
 
				+            scale_factor = self.inputs['scale_factor']
			
 
				+            bboxes = self.fcos_post_process(fcos_head_outs, scale_factor)
			
 
				+            return bboxes
			
 
				+        else:
			
 
				+            return fcos_head_outs
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        loss = {}
			
 
				+        tag_labels, tag_bboxes, tag_centerness = [], [], []
			
 
				+        for i in range(len(self.fcos_head.fpn_stride)):
			
 
				+            # labels, reg_target, centerness
			
 
				+            k_lbl = 'labels{}'.format(i)
			
 
				+            if k_lbl in self.inputs:
			
 
				+                tag_labels.append(self.inputs[k_lbl])
			
 
				+            k_box = 'reg_target{}'.format(i)
			
 
				+            if k_box in self.inputs:
			
 
				+                tag_bboxes.append(self.inputs[k_box])
			
 
				+            k_ctn = 'centerness{}'.format(i)
			
 
				+            if k_ctn in self.inputs:
			
 
				+                tag_centerness.append(self.inputs[k_ctn])
			
 
				+
			
 
				+        fcos_head_outs = self._forward()
			
 
				+        loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels,
			
 
				+                                            tag_bboxes, tag_centerness)
			
 
				+        loss.update(loss_fcos)
			
 
				+        total_loss = paddle.add_n(list(loss.values()))
			
 
				+        loss.update({'loss': total_loss})
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/gfl.py
+++ b/paddlers/models/ppdet/modeling/architectures/gfl.py
@@ -0,0 +1,87 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['GFL']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class GFL(BaseArch):
			
 
				+    """
			
 
				+    Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): backbone instance
			
 
				+        neck (object): 'FPN' instance
			
 
				+        head (object): 'GFLHead' instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self, backbone, neck, head='GFLHead'):
			
 
				+        super(GFL, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.head = head
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        head = create(cfg['head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "head": head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        fpn_feats = self.neck(body_feats)
			
 
				+        head_outs = self.head(fpn_feats)
			
 
				+        if not self.training:
			
 
				+            im_shape = self.inputs['im_shape']
			
 
				+            scale_factor = self.inputs['scale_factor']
			
 
				+            bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
			
 
				+                                                      scale_factor)
			
 
				+            return bboxes, bbox_num
			
 
				+        else:
			
 
				+            return head_outs
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        loss = {}
			
 
				+
			
 
				+        head_outs = self._forward()
			
 
				+        loss_gfl = self.head.get_loss(head_outs, self.inputs)
			
 
				+        loss.update(loss_gfl)
			
 
				+        total_loss = paddle.add_n(list(loss.values()))
			
 
				+        loss.update({'loss': total_loss})
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/jde.py
+++ b/paddlers/models/ppdet/modeling/architectures/jde.py
@@ -0,0 +1,111 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['JDE']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class JDE(BaseArch):
			
 
				+    __category__ = 'architecture'
			
 
				+    __shared__ = ['metric']
			
 
				+    """
			
 
				+    JDE network, see https://arxiv.org/abs/1909.12605v1
			
 
				+
			
 
				+    Args:
			
 
				+        detector (object): detector model instance
			
 
				+        reid (object): reid model instance
			
 
				+        tracker (object): tracker instance
			
 
				+        metric (str): 'MOTDet' for training and detection evaluation, 'ReID'
			
 
				+            for ReID embedding evaluation, or 'MOT' for multi object tracking
			
 
				+            evaluation.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 detector='YOLOv3',
			
 
				+                 reid='JDEEmbeddingHead',
			
 
				+                 tracker='JDETracker',
			
 
				+                 metric='MOT'):
			
 
				+        super(JDE, self).__init__()
			
 
				+        self.detector = detector
			
 
				+        self.reid = reid
			
 
				+        self.tracker = tracker
			
 
				+        self.metric = metric
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        detector = create(cfg['detector'])
			
 
				+        kwargs = {'input_shape': detector.neck.out_shape}
			
 
				+
			
 
				+        reid = create(cfg['reid'], **kwargs)
			
 
				+
			
 
				+        tracker = create(cfg['tracker'])
			
 
				+
			
 
				+        return {
			
 
				+            "detector": detector,
			
 
				+            "reid": reid,
			
 
				+            "tracker": tracker,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        det_outs = self.detector(self.inputs)
			
 
				+
			
 
				+        if self.training:
			
 
				+            emb_feats = det_outs['emb_feats']
			
 
				+            loss_confs = det_outs['det_losses']['loss_confs']
			
 
				+            loss_boxes = det_outs['det_losses']['loss_boxes']
			
 
				+            jde_losses = self.reid(
			
 
				+                emb_feats,
			
 
				+                self.inputs,
			
 
				+                loss_confs=loss_confs,
			
 
				+                loss_boxes=loss_boxes)
			
 
				+            return jde_losses
			
 
				+        else:
			
 
				+            if self.metric == 'MOTDet':
			
 
				+                det_results = {
			
 
				+                    'bbox': det_outs['bbox'],
			
 
				+                    'bbox_num': det_outs['bbox_num'],
			
 
				+                }
			
 
				+                return det_results
			
 
				+
			
 
				+            elif self.metric == 'MOT':
			
 
				+                emb_feats = det_outs['emb_feats']
			
 
				+                bboxes = det_outs['bbox']
			
 
				+                boxes_idx = det_outs['boxes_idx']
			
 
				+                nms_keep_idx = det_outs['nms_keep_idx']
			
 
				+
			
 
				+                pred_dets, pred_embs = self.reid(
			
 
				+                    emb_feats,
			
 
				+                    self.inputs,
			
 
				+                    bboxes=bboxes,
			
 
				+                    boxes_idx=boxes_idx,
			
 
				+                    nms_keep_idx=nms_keep_idx)
			
 
				+                return pred_dets, pred_embs
			
 
				+
			
 
				+            else:
			
 
				+                raise ValueError(
			
 
				+                    "Unknown metric {} for multi object tracking.".format(
			
 
				+                        self.metric))
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        return self._forward()
			
--- a/paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/keypoint_hrhrnet.py
@@ -0,0 +1,287 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from scipy.optimize import linear_sum_assignment
			
 
				+from collections import abc, defaultdict
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create, serializable
			
 
				+from .meta_arch import BaseArch
			
 
				+from .. import layers as L
			
 
				+from ..keypoint_utils import transpred
			
 
				+
			
 
				+__all__ = ['HigherHRNet']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class HigherHRNet(BaseArch):
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone='HRNet',
			
 
				+                 hrhrnet_head='HrHRNetHead',
			
 
				+                 post_process='HrHRNetPostProcess',
			
 
				+                 eval_flip=True,
			
 
				+                 flip_perm=None,
			
 
				+                 max_num_people=30):
			
 
				+        """
			
 
				+        HigherHRNet network, see https://arxiv.org/abs/1908.10357；
			
 
				+        HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175
			
 
				+
			
 
				+        Args:
			
 
				+            backbone (nn.Layer): backbone instance
			
 
				+            hrhrnet_head (nn.Layer): keypoint_head instance
			
 
				+            bbox_post_process (object): `BBoxPostProcess` instance
			
 
				+        """
			
 
				+        super(HigherHRNet, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.hrhrnet_head = hrhrnet_head
			
 
				+        self.post_process = post_process
			
 
				+        self.flip = eval_flip
			
 
				+        self.flip_perm = paddle.to_tensor(flip_perm)
			
 
				+        self.deploy = False
			
 
				+        self.interpolate = L.Upsample(2, mode='bilinear')
			
 
				+        self.pool = L.MaxPool(5, 1, 2)
			
 
				+        self.max_num_people = max_num_people
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        # backbone
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+        # head
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        hrhrnet_head = create(cfg['hrhrnet_head'], **kwargs)
			
 
				+        post_process = create(cfg['post_process'])
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            "hrhrnet_head": hrhrnet_head,
			
 
				+            "post_process": post_process,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        if self.flip and not self.training and not self.deploy:
			
 
				+            self.inputs['image'] = paddle.concat(
			
 
				+                (self.inputs['image'], paddle.flip(self.inputs['image'], [3])))
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+
			
 
				+        if self.training:
			
 
				+            return self.hrhrnet_head(body_feats, self.inputs)
			
 
				+        else:
			
 
				+            outputs = self.hrhrnet_head(body_feats)
			
 
				+
			
 
				+            if self.flip and not self.deploy:
			
 
				+                outputs = [paddle.split(o, 2) for o in outputs]
			
 
				+                output_rflip = [
			
 
				+                    paddle.flip(paddle.gather(o[1], self.flip_perm, 1), [3])
			
 
				+                    for o in outputs
			
 
				+                ]
			
 
				+                output1 = [o[0] for o in outputs]
			
 
				+                heatmap = (output1[0] + output_rflip[0]) / 2.
			
 
				+                tagmaps = [output1[1], output_rflip[1]]
			
 
				+                outputs = [heatmap] + tagmaps
			
 
				+            outputs = self.get_topk(outputs)
			
 
				+
			
 
				+            if self.deploy:
			
 
				+                return outputs
			
 
				+
			
 
				+            res_lst = []
			
 
				+            h = self.inputs['im_shape'][0, 0].numpy().item()
			
 
				+            w = self.inputs['im_shape'][0, 1].numpy().item()
			
 
				+            kpts, scores = self.post_process(*outputs, h, w)
			
 
				+            res_lst.append([kpts, scores])
			
 
				+            return res_lst
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        outputs = {}
			
 
				+        res_lst = self._forward()
			
 
				+        outputs['keypoint'] = res_lst
			
 
				+        return outputs
			
 
				+
			
 
				+    def get_topk(self, outputs):
			
 
				+        # resize to image size
			
 
				+        outputs = [self.interpolate(x) for x in outputs]
			
 
				+        if len(outputs) == 3:
			
 
				+            tagmap = paddle.concat(
			
 
				+                (outputs[1].unsqueeze(4), outputs[2].unsqueeze(4)), axis=4)
			
 
				+        else:
			
 
				+            tagmap = outputs[1].unsqueeze(4)
			
 
				+
			
 
				+        heatmap = outputs[0]
			
 
				+        N, J = 1, self.hrhrnet_head.num_joints
			
 
				+        heatmap_maxpool = self.pool(heatmap)
			
 
				+        # topk
			
 
				+        maxmap = heatmap * (heatmap == heatmap_maxpool)
			
 
				+        maxmap = maxmap.reshape([N, J, -1])
			
 
				+        heat_k, inds_k = maxmap.topk(self.max_num_people, axis=2)
			
 
				+
			
 
				+        outputs = [heatmap, tagmap, heat_k, inds_k]
			
 
				+        return outputs
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class HrHRNetPostProcess(object):
			
 
				+    '''
			
 
				+    HrHRNet postprocess contain:
			
 
				+        1) get topk keypoints in the output heatmap
			
 
				+        2) sample the tagmap's value corresponding to each of the topk coordinate
			
 
				+        3) match different joints to combine to some people with Hungary algorithm
			
 
				+        4) adjust the coordinate by +-0.25 to decrease error std
			
 
				+        5) salvage missing joints by check positivity of heatmap - tagdiff_norm
			
 
				+    Args:
			
 
				+        max_num_people (int): max number of people support in postprocess
			
 
				+        heat_thresh (float): value of topk below this threshhold will be ignored
			
 
				+        tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
			
 
				+
			
 
				+        inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
			
 
				+        original_height, original_width (float): the original image size
			
 
				+    '''
			
 
				+
			
 
				+    def __init__(self, max_num_people=30, heat_thresh=0.1, tag_thresh=1.):
			
 
				+        self.max_num_people = max_num_people
			
 
				+        self.heat_thresh = heat_thresh
			
 
				+        self.tag_thresh = tag_thresh
			
 
				+
			
 
				+    def lerp(self, j, y, x, heatmap):
			
 
				+        H, W = heatmap.shape[-2:]
			
 
				+        left = np.clip(x - 1, 0, W - 1)
			
 
				+        right = np.clip(x + 1, 0, W - 1)
			
 
				+        up = np.clip(y - 1, 0, H - 1)
			
 
				+        down = np.clip(y + 1, 0, H - 1)
			
 
				+        offset_y = np.where(heatmap[j, down, x] > heatmap[j, up, x], 0.25,
			
 
				+                            -0.25)
			
 
				+        offset_x = np.where(heatmap[j, y, right] > heatmap[j, y, left], 0.25,
			
 
				+                            -0.25)
			
 
				+        return offset_y + 0.5, offset_x + 0.5
			
 
				+
			
 
				+    def __call__(self, heatmap, tagmap, heat_k, inds_k, original_height,
			
 
				+                 original_width):
			
 
				+
			
 
				+        N, J, H, W = heatmap.shape
			
 
				+        assert N == 1, "only support batch size 1"
			
 
				+        heatmap = heatmap[0].cpu().detach().numpy()
			
 
				+        tagmap = tagmap[0].cpu().detach().numpy()
			
 
				+        heats = heat_k[0].cpu().detach().numpy()
			
 
				+        inds_np = inds_k[0].cpu().detach().numpy()
			
 
				+        y = inds_np // W
			
 
				+        x = inds_np % W
			
 
				+        tags = tagmap[np.arange(J)[None, :].repeat(self.max_num_people), y.
			
 
				+                      flatten(), x.flatten()].reshape(J, -1, tagmap.shape[-1])
			
 
				+        coords = np.stack((y, x), axis=2)
			
 
				+        # threshold
			
 
				+        mask = heats > self.heat_thresh
			
 
				+        # cluster
			
 
				+        cluster = defaultdict(lambda: {
			
 
				+            'coords': np.zeros((J, 2), dtype=np.float32),
			
 
				+            'scores': np.zeros(J, dtype=np.float32),
			
 
				+            'tags': []
			
 
				+        })
			
 
				+        for jid, m in enumerate(mask):
			
 
				+            num_valid = m.sum()
			
 
				+            if num_valid == 0:
			
 
				+                continue
			
 
				+            valid_inds = np.where(m)[0]
			
 
				+            valid_tags = tags[jid, m, :]
			
 
				+            if len(cluster) == 0:  # initialize
			
 
				+                for i in valid_inds:
			
 
				+                    tag = tags[jid, i]
			
 
				+                    key = tag[0]
			
 
				+                    cluster[key]['tags'].append(tag)
			
 
				+                    cluster[key]['scores'][jid] = heats[jid, i]
			
 
				+                    cluster[key]['coords'][jid] = coords[jid, i]
			
 
				+                continue
			
 
				+            candidates = list(cluster.keys())[:self.max_num_people]
			
 
				+            centroids = [
			
 
				+                np.mean(
			
 
				+                    cluster[k]['tags'], axis=0) for k in candidates
			
 
				+            ]
			
 
				+            num_clusters = len(centroids)
			
 
				+            # shape is (num_valid, num_clusters, tag_dim)
			
 
				+            dist = valid_tags[:, None, :] - np.array(centroids)[None, ...]
			
 
				+            l2_dist = np.linalg.norm(dist, ord=2, axis=2)
			
 
				+            # modulate dist with heat value, see `use_detection_val`
			
 
				+            cost = np.round(l2_dist) * 100 - heats[jid, m, None]
			
 
				+            # pad the cost matrix, otherwise new pose are ignored
			
 
				+            if num_valid > num_clusters:
			
 
				+                cost = np.pad(cost, ((0, 0), (0, num_valid - num_clusters)),
			
 
				+                              'constant',
			
 
				+                              constant_values=((0, 0), (0, 1e-10)))
			
 
				+            rows, cols = linear_sum_assignment(cost)
			
 
				+            for y, x in zip(rows, cols):
			
 
				+                tag = tags[jid, y]
			
 
				+                if y < num_valid and x < num_clusters and \
			
 
				+                   l2_dist[y, x] < self.tag_thresh:
			
 
				+                    key = candidates[x]  # merge to cluster
			
 
				+                else:
			
 
				+                    key = tag[0]  # initialize new cluster
			
 
				+                cluster[key]['tags'].append(tag)
			
 
				+                cluster[key]['scores'][jid] = heats[jid, y]
			
 
				+                cluster[key]['coords'][jid] = coords[jid, y]
			
 
				+
			
 
				+        # shape is [k, J, 2] and [k, J]
			
 
				+        pose_tags = np.array([cluster[k]['tags'] for k in cluster])
			
 
				+        pose_coords = np.array([cluster[k]['coords'] for k in cluster])
			
 
				+        pose_scores = np.array([cluster[k]['scores'] for k in cluster])
			
 
				+        valid = pose_scores > 0
			
 
				+
			
 
				+        pose_kpts = np.zeros((pose_scores.shape[0], J, 3), dtype=np.float32)
			
 
				+        if valid.sum() == 0:
			
 
				+            return pose_kpts, pose_kpts
			
 
				+
			
 
				+        # refine coords
			
 
				+        valid_coords = pose_coords[valid].astype(np.int32)
			
 
				+        y = valid_coords[..., 0].flatten()
			
 
				+        x = valid_coords[..., 1].flatten()
			
 
				+        _, j = np.nonzero(valid)
			
 
				+        offsets = self.lerp(j, y, x, heatmap)
			
 
				+        pose_coords[valid, 0] += offsets[0]
			
 
				+        pose_coords[valid, 1] += offsets[1]
			
 
				+
			
 
				+        # mean score before salvage
			
 
				+        mean_score = pose_scores.mean(axis=1)
			
 
				+        pose_kpts[valid, 2] = pose_scores[valid]
			
 
				+
			
 
				+        # salvage missing joints
			
 
				+        if True:
			
 
				+            for pid, coords in enumerate(pose_coords):
			
 
				+                tag_mean = np.array(pose_tags[pid]).mean(axis=0)
			
 
				+                norm = np.sum((tagmap - tag_mean)**2, axis=3)**0.5
			
 
				+                score = heatmap - np.round(norm)  # (J, H, W)
			
 
				+                flat_score = score.reshape(J, -1)
			
 
				+                max_inds = np.argmax(flat_score, axis=1)
			
 
				+                max_scores = np.max(flat_score, axis=1)
			
 
				+                salvage_joints = (pose_scores[pid] == 0) & (max_scores > 0)
			
 
				+                if salvage_joints.sum() == 0:
			
 
				+                    continue
			
 
				+                y = max_inds[salvage_joints] // W
			
 
				+                x = max_inds[salvage_joints] % W
			
 
				+                offsets = self.lerp(salvage_joints.nonzero()[0], y, x, heatmap)
			
 
				+                y = y.astype(np.float32) + offsets[0]
			
 
				+                x = x.astype(np.float32) + offsets[1]
			
 
				+                pose_coords[pid][salvage_joints, 0] = y
			
 
				+                pose_coords[pid][salvage_joints, 1] = x
			
 
				+                pose_kpts[pid][salvage_joints, 2] = max_scores[salvage_joints]
			
 
				+        pose_kpts[..., :2] = transpred(pose_coords[..., :2][..., ::-1],
			
 
				+                                       original_height, original_width,
			
 
				+                                       min(H, W))
			
 
				+        return pose_kpts, mean_score
			
--- a/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/keypoint_hrnet.py
@@ -0,0 +1,267 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import numpy as np
			
 
				+import math
			
 
				+import cv2
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+from ..keypoint_utils import transform_preds
			
 
				+from .. import layers as L
			
 
				+
			
 
				+__all__ = ['TopDownHRNet']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TopDownHRNet(BaseArch):
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['loss']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 width,
			
 
				+                 num_joints,
			
 
				+                 backbone='HRNet',
			
 
				+                 loss='KeyPointMSELoss',
			
 
				+                 post_process='HRNetPostProcess',
			
 
				+                 flip_perm=None,
			
 
				+                 flip=True,
			
 
				+                 shift_heatmap=True,
			
 
				+                 use_dark=True):
			
 
				+        """
			
 
				+        HRNet network, see https://arxiv.org/abs/1902.09212
			
 
				+
			
 
				+        Args:
			
 
				+            backbone (nn.Layer): backbone instance
			
 
				+            post_process (object): `HRNetPostProcess` instance
			
 
				+            flip_perm (list): The left-right joints exchange order list
			
 
				+            use_dark(bool): Whether to use DARK in post processing
			
 
				+        """
			
 
				+        super(TopDownHRNet, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.post_process = HRNetPostProcess(use_dark)
			
 
				+        self.loss = loss
			
 
				+        self.flip_perm = flip_perm
			
 
				+        self.flip = flip
			
 
				+        self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
			
 
				+        self.shift_heatmap = shift_heatmap
			
 
				+        self.deploy = False
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        # backbone
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        return {'backbone': backbone, }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        feats = self.backbone(self.inputs)
			
 
				+        hrnet_outputs = self.final_conv(feats[0])
			
 
				+
			
 
				+        if self.training:
			
 
				+            return self.loss(hrnet_outputs, self.inputs)
			
 
				+        elif self.deploy:
			
 
				+            outshape = hrnet_outputs.shape
			
 
				+            max_idx = paddle.argmax(
			
 
				+                hrnet_outputs.reshape(
			
 
				+                    (outshape[0], outshape[1], outshape[2] * outshape[3])),
			
 
				+                axis=-1)
			
 
				+            return hrnet_outputs, max_idx
			
 
				+        else:
			
 
				+            if self.flip:
			
 
				+                self.inputs['image'] = self.inputs['image'].flip([3])
			
 
				+                feats = self.backbone(self.inputs)
			
 
				+                output_flipped = self.final_conv(feats[0])
			
 
				+                output_flipped = self.flip_back(output_flipped.numpy(),
			
 
				+                                                self.flip_perm)
			
 
				+                output_flipped = paddle.to_tensor(output_flipped.copy())
			
 
				+                if self.shift_heatmap:
			
 
				+                    output_flipped[:, :, :, 1:] = output_flipped.clone(
			
 
				+                    )[:, :, :, 0:-1]
			
 
				+                hrnet_outputs = (hrnet_outputs + output_flipped) * 0.5
			
 
				+            imshape = (self.inputs['im_shape'].numpy()
			
 
				+                       )[:, ::-1] if 'im_shape' in self.inputs else None
			
 
				+            center = self.inputs['center'].numpy(
			
 
				+            ) if 'center' in self.inputs else np.round(imshape / 2.)
			
 
				+            scale = self.inputs['scale'].numpy(
			
 
				+            ) if 'scale' in self.inputs else imshape / 200.
			
 
				+            outputs = self.post_process(hrnet_outputs, center, scale)
			
 
				+            return outputs
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        res_lst = self._forward()
			
 
				+        outputs = {'keypoint': res_lst}
			
 
				+        return outputs
			
 
				+
			
 
				+    def flip_back(self, output_flipped, matched_parts):
			
 
				+        assert output_flipped.ndim == 4,\
			
 
				+                'output_flipped should be [batch_size, num_joints, height, width]'
			
 
				+
			
 
				+        output_flipped = output_flipped[:, :, :, ::-1]
			
 
				+
			
 
				+        for pair in matched_parts:
			
 
				+            tmp = output_flipped[:, pair[0], :, :].copy()
			
 
				+            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
			
 
				+            output_flipped[:, pair[1], :, :] = tmp
			
 
				+
			
 
				+        return output_flipped
			
 
				+
			
 
				+
			
 
				+class HRNetPostProcess(object):
			
 
				+    def __init__(self, use_dark=True):
			
 
				+        self.use_dark = use_dark
			
 
				+
			
 
				+    def get_max_preds(self, heatmaps):
			
 
				+        '''get predictions from score maps
			
 
				+
			
 
				+        Args:
			
 
				+            heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
			
 
				+
			
 
				+        Returns:
			
 
				+            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
			
 
				+            maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
			
 
				+        '''
			
 
				+        assert isinstance(heatmaps,
			
 
				+                          np.ndarray), 'heatmaps should be numpy.ndarray'
			
 
				+        assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
			
 
				+
			
 
				+        batch_size = heatmaps.shape[0]
			
 
				+        num_joints = heatmaps.shape[1]
			
 
				+        width = heatmaps.shape[3]
			
 
				+        heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
			
 
				+        idx = np.argmax(heatmaps_reshaped, 2)
			
 
				+        maxvals = np.amax(heatmaps_reshaped, 2)
			
 
				+
			
 
				+        maxvals = maxvals.reshape((batch_size, num_joints, 1))
			
 
				+        idx = idx.reshape((batch_size, num_joints, 1))
			
 
				+
			
 
				+        preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
			
 
				+
			
 
				+        preds[:, :, 0] = (preds[:, :, 0]) % width
			
 
				+        preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
			
 
				+
			
 
				+        pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
			
 
				+        pred_mask = pred_mask.astype(np.float32)
			
 
				+
			
 
				+        preds *= pred_mask
			
 
				+
			
 
				+        return preds, maxvals
			
 
				+
			
 
				+    def gaussian_blur(self, heatmap, kernel):
			
 
				+        border = (kernel - 1) // 2
			
 
				+        batch_size = heatmap.shape[0]
			
 
				+        num_joints = heatmap.shape[1]
			
 
				+        height = heatmap.shape[2]
			
 
				+        width = heatmap.shape[3]
			
 
				+        for i in range(batch_size):
			
 
				+            for j in range(num_joints):
			
 
				+                origin_max = np.max(heatmap[i, j])
			
 
				+                dr = np.zeros((height + 2 * border, width + 2 * border))
			
 
				+                dr[border:-border, border:-border] = heatmap[i, j].copy()
			
 
				+                dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
			
 
				+                heatmap[i, j] = dr[border:-border, border:-border].copy()
			
 
				+                heatmap[i, j] *= origin_max / np.max(heatmap[i, j])
			
 
				+        return heatmap
			
 
				+
			
 
				+    def dark_parse(self, hm, coord):
			
 
				+        heatmap_height = hm.shape[0]
			
 
				+        heatmap_width = hm.shape[1]
			
 
				+        px = int(coord[0])
			
 
				+        py = int(coord[1])
			
 
				+        if 1 < px < heatmap_width - 2 and 1 < py < heatmap_height - 2:
			
 
				+            dx = 0.5 * (hm[py][px + 1] - hm[py][px - 1])
			
 
				+            dy = 0.5 * (hm[py + 1][px] - hm[py - 1][px])
			
 
				+            dxx = 0.25 * (hm[py][px + 2] - 2 * hm[py][px] + hm[py][px - 2])
			
 
				+            dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] \
			
 
				+                + hm[py-1][px-1])
			
 
				+            dyy = 0.25 * (
			
 
				+                hm[py + 2 * 1][px] - 2 * hm[py][px] + hm[py - 2 * 1][px])
			
 
				+            derivative = np.matrix([[dx], [dy]])
			
 
				+            hessian = np.matrix([[dxx, dxy], [dxy, dyy]])
			
 
				+            if dxx * dyy - dxy**2 != 0:
			
 
				+                hessianinv = hessian.I
			
 
				+                offset = -hessianinv * derivative
			
 
				+                offset = np.squeeze(np.array(offset.T), axis=0)
			
 
				+                coord += offset
			
 
				+        return coord
			
 
				+
			
 
				+    def dark_postprocess(self, hm, coords, kernelsize):
			
 
				+        '''DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
			
 
				+        Representation for Human Pose Estimation (CVPR 2020).
			
 
				+        '''
			
 
				+
			
 
				+        hm = self.gaussian_blur(hm, kernelsize)
			
 
				+        hm = np.maximum(hm, 1e-10)
			
 
				+        hm = np.log(hm)
			
 
				+        for n in range(coords.shape[0]):
			
 
				+            for p in range(coords.shape[1]):
			
 
				+                coords[n, p] = self.dark_parse(hm[n][p], coords[n][p])
			
 
				+        return coords
			
 
				+
			
 
				+    def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
			
 
				+        """the highest heatvalue location with a quarter offset in the
			
 
				+        direction from the highest response to the second highest response.
			
 
				+
			
 
				+        Args:
			
 
				+            heatmaps (numpy.ndarray): The predicted heatmaps
			
 
				+            center (numpy.ndarray): The boxes center
			
 
				+            scale (numpy.ndarray): The scale factor
			
 
				+
			
 
				+        Returns:
			
 
				+            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
			
 
				+            maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
			
 
				+        """
			
 
				+        coords, maxvals = self.get_max_preds(heatmaps)
			
 
				+
			
 
				+        heatmap_height = heatmaps.shape[2]
			
 
				+        heatmap_width = heatmaps.shape[3]
			
 
				+
			
 
				+        if self.use_dark:
			
 
				+            coords = self.dark_postprocess(heatmaps, coords, kernelsize)
			
 
				+        else:
			
 
				+            for n in range(coords.shape[0]):
			
 
				+                for p in range(coords.shape[1]):
			
 
				+                    hm = heatmaps[n][p]
			
 
				+                    px = int(math.floor(coords[n][p][0] + 0.5))
			
 
				+                    py = int(math.floor(coords[n][p][1] + 0.5))
			
 
				+                    if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
			
 
				+                        diff = np.array([
			
 
				+                            hm[py][px + 1] - hm[py][px - 1],
			
 
				+                            hm[py + 1][px] - hm[py - 1][px]
			
 
				+                        ])
			
 
				+                        coords[n][p] += np.sign(diff) * .25
			
 
				+        preds = coords.copy()
			
 
				+
			
 
				+        # Transform back
			
 
				+        for i in range(coords.shape[0]):
			
 
				+            preds[i] = transform_preds(coords[i], center[i], scale[i],
			
 
				+                                       [heatmap_width, heatmap_height])
			
 
				+
			
 
				+        return preds, maxvals
			
 
				+
			
 
				+    def __call__(self, output, center, scale):
			
 
				+        preds, maxvals = self.get_final_preds(output.numpy(), center, scale)
			
 
				+        outputs = [[
			
 
				+            np.concatenate(
			
 
				+                (preds, maxvals), axis=-1), np.mean(
			
 
				+                    maxvals, axis=1)
			
 
				+        ]]
			
 
				+        return outputs
			
--- a/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/mask_rcnn.py
@@ -0,0 +1,135 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['MaskRCNN']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class MaskRCNN(BaseArch):
			
 
				+    """
			
 
				+    Mask R-CNN network, see https://arxiv.org/abs/1703.06870
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): backbone instance
			
 
				+        rpn_head (object): `RPNHead` instance
			
 
				+        bbox_head (object): `BBoxHead` instance
			
 
				+        mask_head (object): `MaskHead` instance
			
 
				+        bbox_post_process (object): `BBoxPostProcess` instance
			
 
				+        mask_post_process (object): `MaskPostProcess` instance
			
 
				+        neck (object): 'FPN' instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = [
			
 
				+        'bbox_post_process',
			
 
				+        'mask_post_process',
			
 
				+    ]
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone,
			
 
				+                 rpn_head,
			
 
				+                 bbox_head,
			
 
				+                 mask_head,
			
 
				+                 bbox_post_process,
			
 
				+                 mask_post_process,
			
 
				+                 neck=None):
			
 
				+        super(MaskRCNN, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.rpn_head = rpn_head
			
 
				+        self.bbox_head = bbox_head
			
 
				+        self.mask_head = mask_head
			
 
				+
			
 
				+        self.bbox_post_process = bbox_post_process
			
 
				+        self.mask_post_process = mask_post_process
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        out_shape = neck and neck.out_shape or backbone.out_shape
			
 
				+        kwargs = {'input_shape': out_shape}
			
 
				+        rpn_head = create(cfg['rpn_head'], **kwargs)
			
 
				+        bbox_head = create(cfg['bbox_head'], **kwargs)
			
 
				+
			
 
				+        out_shape = neck and out_shape or bbox_head.get_head().out_shape
			
 
				+        kwargs = {'input_shape': out_shape}
			
 
				+        mask_head = create(cfg['mask_head'], **kwargs)
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "rpn_head": rpn_head,
			
 
				+            "bbox_head": bbox_head,
			
 
				+            "mask_head": mask_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        if self.neck is not None:
			
 
				+            body_feats = self.neck(body_feats)
			
 
				+
			
 
				+        if self.training:
			
 
				+            rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
			
 
				+            bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
			
 
				+                                                  self.inputs)
			
 
				+            rois, rois_num = self.bbox_head.get_assigned_rois()
			
 
				+            bbox_targets = self.bbox_head.get_assigned_targets()
			
 
				+            # Mask Head needs bbox_feat in Mask RCNN
			
 
				+            mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs,
			
 
				+                                       bbox_targets, bbox_feat)
			
 
				+            return rpn_loss, bbox_loss, mask_loss
			
 
				+        else:
			
 
				+            rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
			
 
				+            preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None)
			
 
				+
			
 
				+            im_shape = self.inputs['im_shape']
			
 
				+            scale_factor = self.inputs['scale_factor']
			
 
				+
			
 
				+            bbox, bbox_num = self.bbox_post_process(preds, (rois, rois_num),
			
 
				+                                                    im_shape, scale_factor)
			
 
				+            mask_out = self.mask_head(
			
 
				+                body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
			
 
				+
			
 
				+            # rescale the prediction back to origin image
			
 
				+            bbox_pred = self.bbox_post_process.get_pred(bbox, bbox_num,
			
 
				+                                                        im_shape, scale_factor)
			
 
				+            origin_shape = self.bbox_post_process.get_origin_shape()
			
 
				+            mask_pred = self.mask_post_process(mask_out[:, 0, :, :], bbox_pred,
			
 
				+                                               bbox_num, origin_shape)
			
 
				+            return bbox_pred, bbox_num, mask_pred
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        bbox_loss, mask_loss, rpn_loss = self._forward()
			
 
				+        loss = {}
			
 
				+        loss.update(rpn_loss)
			
 
				+        loss.update(bbox_loss)
			
 
				+        loss.update(mask_loss)
			
 
				+        total_loss = paddle.add_n(list(loss.values()))
			
 
				+        loss.update({'loss': total_loss})
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num, mask_pred = self._forward()
			
 
				+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/meta_arch.py
+++ b/paddlers/models/ppdet/modeling/architectures/meta_arch.py
@@ -0,0 +1,141 @@
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import typing
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+from paddlers.models.ppdet.modeling.post_process import nms
			
 
				+
			
 
				+__all__ = ['BaseArch']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class BaseArch(nn.Layer):
			
 
				+    def __init__(self, data_format='NCHW'):
			
 
				+        super(BaseArch, self).__init__()
			
 
				+        self.data_format = data_format
			
 
				+        self.inputs = {}
			
 
				+        self.fuse_norm = False
			
 
				+
			
 
				+    def load_meanstd(self, cfg_transform):
			
 
				+        self.scale = 1.
			
 
				+        self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape(
			
 
				+            (1, 3, 1, 1))
			
 
				+        self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape(
			
 
				+            (1, 3, 1, 1))
			
 
				+        for item in cfg_transform:
			
 
				+            if 'NormalizeImage' in item:
			
 
				+                self.mean = paddle.to_tensor(item['NormalizeImage'][
			
 
				+                    'mean']).reshape((1, 3, 1, 1))
			
 
				+                self.std = paddle.to_tensor(item['NormalizeImage'][
			
 
				+                    'std']).reshape((1, 3, 1, 1))
			
 
				+                if item['NormalizeImage'].get('is_scale', True):
			
 
				+                    self.scale = 1. / 255.
			
 
				+                break
			
 
				+        if self.data_format == 'NHWC':
			
 
				+            self.mean = self.mean.reshape(1, 1, 1, 3)
			
 
				+            self.std = self.std.reshape(1, 1, 1, 3)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if self.data_format == 'NHWC':
			
 
				+            image = inputs['image']
			
 
				+            inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
			
 
				+
			
 
				+        if self.fuse_norm:
			
 
				+            image = inputs['image']
			
 
				+            self.inputs['image'] = (image * self.scale - self.mean) / self.std
			
 
				+            self.inputs['im_shape'] = inputs['im_shape']
			
 
				+            self.inputs['scale_factor'] = inputs['scale_factor']
			
 
				+        else:
			
 
				+            self.inputs = inputs
			
 
				+
			
 
				+        self.model_arch()
			
 
				+
			
 
				+        if self.training:
			
 
				+            out = self.get_loss()
			
 
				+        else:
			
 
				+            inputs_list = []
			
 
				+            # multi-scale input
			
 
				+            if not isinstance(inputs, typing.Sequence):
			
 
				+                inputs_list.append(inputs)
			
 
				+            else:
			
 
				+                inputs_list.extend(inputs)
			
 
				+
			
 
				+            outs = []
			
 
				+            for inp in inputs_list:
			
 
				+                self.inputs = inp
			
 
				+                outs.append(self.get_pred())
			
 
				+
			
 
				+            # multi-scale test
			
 
				+            if len(outs) > 1:
			
 
				+                out = self.merge_multi_scale_predictions(outs)
			
 
				+            else:
			
 
				+                out = outs[0]
			
 
				+        return out
			
 
				+
			
 
				+    def merge_multi_scale_predictions(self, outs):
			
 
				+        # default values for architectures not included in following list
			
 
				+        num_classes = 80
			
 
				+        nms_threshold = 0.5
			
 
				+        keep_top_k = 100
			
 
				+
			
 
				+        if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'
			
 
				+                                       ):
			
 
				+            num_classes = self.bbox_head.num_classes
			
 
				+            keep_top_k = self.bbox_post_process.nms.keep_top_k
			
 
				+            nms_threshold = self.bbox_post_process.nms.nms_threshold
			
 
				+        else:
			
 
				+            raise Exception(
			
 
				+                "Multi scale test only supports CascadeRCNN, FasterRCNN and MaskRCNN for now"
			
 
				+            )
			
 
				+
			
 
				+        final_boxes = []
			
 
				+        all_scale_outs = paddle.concat([o['bbox'] for o in outs]).numpy()
			
 
				+        for c in range(num_classes):
			
 
				+            idxs = all_scale_outs[:, 0] == c
			
 
				+            if np.count_nonzero(idxs) == 0:
			
 
				+                continue
			
 
				+            r = nms(all_scale_outs[idxs, 1:], nms_threshold)
			
 
				+            final_boxes.append(
			
 
				+                np.concatenate([np.full((r.shape[0], 1), c), r], 1))
			
 
				+        out = np.concatenate(final_boxes)
			
 
				+        out = np.concatenate(sorted(
			
 
				+            out, key=lambda e: e[1])[-keep_top_k:]).reshape((-1, 6))
			
 
				+        out = {
			
 
				+            'bbox': paddle.to_tensor(out),
			
 
				+            'bbox_num': paddle.to_tensor(np.array([out.shape[0], ]))
			
 
				+        }
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+    def build_inputs(self, data, input_def):
			
 
				+        inputs = {}
			
 
				+        for i, k in enumerate(input_def):
			
 
				+            inputs[k] = data[i]
			
 
				+        return inputs
			
 
				+
			
 
				+    def model_arch(self, ):
			
 
				+        pass
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        raise NotImplementedError("Should implement get_loss method!")
			
 
				+
			
 
				+    def get_pred(self, ):
			
 
				+        raise NotImplementedError("Should implement get_pred method!")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def convert_sync_batchnorm(cls, layer):
			
 
				+        layer_output = layer
			
 
				+        if getattr(layer, 'norm_type', None) == 'sync_bn':
			
 
				+            layer_output = nn.SyncBatchNorm.convert_sync_batchnorm(layer)
			
 
				+        else:
			
 
				+            for name, sublayer in layer.named_children():
			
 
				+                layer_output.add_sublayer(name,
			
 
				+                                          cls.convert_sync_batchnorm(sublayer))
			
 
				+
			
 
				+        del layer
			
 
				+        return layer_output
			
--- a/paddlers/models/ppdet/modeling/architectures/picodet.py
+++ b/paddlers/models/ppdet/modeling/architectures/picodet.py
@@ -0,0 +1,91 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['PicoDet']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class PicoDet(BaseArch):
			
 
				+    """
			
 
				+    Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): backbone instance
			
 
				+        neck (object): 'FPN' instance
			
 
				+        head (object): 'PicoHead' instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self, backbone, neck, head='PicoHead'):
			
 
				+        super(PicoDet, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.head = head
			
 
				+        self.deploy = False
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        head = create(cfg['head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "head": head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        fpn_feats = self.neck(body_feats)
			
 
				+        head_outs = self.head(fpn_feats, self.deploy)
			
 
				+        if self.training or self.deploy:
			
 
				+            return head_outs, None
			
 
				+        else:
			
 
				+            im_shape = self.inputs['im_shape']
			
 
				+            scale_factor = self.inputs['scale_factor']
			
 
				+            bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
			
 
				+                                                      scale_factor)
			
 
				+            return bboxes, bbox_num
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        loss = {}
			
 
				+
			
 
				+        head_outs, _ = self._forward()
			
 
				+        loss_gfl = self.head.get_loss(head_outs, self.inputs)
			
 
				+        loss.update(loss_gfl)
			
 
				+        total_loss = paddle.add_n(list(loss.values()))
			
 
				+        loss.update({'loss': total_loss})
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        if self.deploy:
			
 
				+            return {'picodet': self._forward()[0]}
			
 
				+        else:
			
 
				+            bbox_pred, bbox_num = self._forward()
			
 
				+            output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
			
 
				+            return output
			
--- a/paddlers/models/ppdet/modeling/architectures/s2anet.py
+++ b/paddlers/models/ppdet/modeling/architectures/s2anet.py
@@ -0,0 +1,102 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['S2ANet']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class S2ANet(BaseArch):
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = [
			
 
				+        's2anet_head',
			
 
				+        's2anet_bbox_post_process',
			
 
				+    ]
			
 
				+
			
 
				+    def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process):
			
 
				+        """
			
 
				+        S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
			
 
				+
			
 
				+        Args:
			
 
				+            backbone (object): backbone instance
			
 
				+            neck (object): `FPN` instance
			
 
				+            s2anet_head (object): `S2ANetHead` instance
			
 
				+            s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
			
 
				+        """
			
 
				+        super(S2ANet, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.s2anet_head = s2anet_head
			
 
				+        self.s2anet_bbox_post_process = s2anet_bbox_post_process
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = cfg['neck'] and create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        out_shape = neck and neck.out_shape or backbone.out_shape
			
 
				+        kwargs = {'input_shape': out_shape}
			
 
				+        s2anet_head = create(cfg['s2anet_head'], **kwargs)
			
 
				+        s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
			
 
				+                                          **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "s2anet_head": s2anet_head,
			
 
				+            "s2anet_bbox_post_process": s2anet_bbox_post_process,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        if self.neck is not None:
			
 
				+            body_feats = self.neck(body_feats)
			
 
				+        self.s2anet_head(body_feats)
			
 
				+        if self.training:
			
 
				+            loss = self.s2anet_head.get_loss(self.inputs)
			
 
				+            total_loss = paddle.add_n(list(loss.values()))
			
 
				+            loss.update({'loss': total_loss})
			
 
				+            return loss
			
 
				+        else:
			
 
				+            im_shape = self.inputs['im_shape']
			
 
				+            scale_factor = self.inputs['scale_factor']
			
 
				+            nms_pre = self.s2anet_bbox_post_process.nms_pre
			
 
				+            pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
			
 
				+
			
 
				+            # post_process
			
 
				+            pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores,
			
 
				+                                                                  pred_bboxes)
			
 
				+            # rescale the prediction back to origin image
			
 
				+            pred_bboxes = self.s2anet_bbox_post_process.get_pred(
			
 
				+                pred_bboxes, bbox_num, im_shape, scale_factor)
			
 
				+
			
 
				+            # output
			
 
				+            output = {'bbox': pred_bboxes, 'bbox_num': bbox_num}
			
 
				+            return output
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        loss = self._forward()
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        output = self._forward()
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/solov2.py
+++ b/paddlers/models/ppdet/modeling/architectures/solov2.py
@@ -0,0 +1,110 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['SOLOv2']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class SOLOv2(BaseArch):
			
 
				+    """
			
 
				+    SOLOv2 network, see https://arxiv.org/abs/2003.10152
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): an backbone instance
			
 
				+        solov2_head (object): an `SOLOv2Head` instance
			
 
				+        mask_head (object): an `SOLOv2MaskHead` instance
			
 
				+        neck (object): neck of network, such as feature pyramid network instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self, backbone, solov2_head, mask_head, neck=None):
			
 
				+        super(SOLOv2, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.solov2_head = solov2_head
			
 
				+        self.mask_head = mask_head
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        solov2_head = create(cfg['solov2_head'], **kwargs)
			
 
				+        mask_head = create(cfg['mask_head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            'solov2_head': solov2_head,
			
 
				+            'mask_head': mask_head,
			
 
				+        }
			
 
				+
			
 
				+    def model_arch(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+
			
 
				+        body_feats = self.neck(body_feats)
			
 
				+
			
 
				+        self.seg_pred = self.mask_head(body_feats)
			
 
				+
			
 
				+        self.cate_pred_list, self.kernel_pred_list = self.solov2_head(
			
 
				+            body_feats)
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        loss = {}
			
 
				+        # get gt_ins_labels, gt_cate_labels, etc.
			
 
				+        gt_ins_labels, gt_cate_labels, gt_grid_orders = [], [], []
			
 
				+        fg_num = self.inputs['fg_num']
			
 
				+        for i in range(len(self.solov2_head.seg_num_grids)):
			
 
				+            ins_label = 'ins_label{}'.format(i)
			
 
				+            if ins_label in self.inputs:
			
 
				+                gt_ins_labels.append(self.inputs[ins_label])
			
 
				+            cate_label = 'cate_label{}'.format(i)
			
 
				+            if cate_label in self.inputs:
			
 
				+                gt_cate_labels.append(self.inputs[cate_label])
			
 
				+            grid_order = 'grid_order{}'.format(i)
			
 
				+            if grid_order in self.inputs:
			
 
				+                gt_grid_orders.append(self.inputs[grid_order])
			
 
				+
			
 
				+        loss_solov2 = self.solov2_head.get_loss(
			
 
				+            self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
			
 
				+            gt_ins_labels, gt_cate_labels, gt_grid_orders, fg_num)
			
 
				+        loss.update(loss_solov2)
			
 
				+        total_loss = paddle.add_n(list(loss.values()))
			
 
				+        loss.update({'loss': total_loss})
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        seg_masks, cate_labels, cate_scores, bbox_num = self.solov2_head.get_prediction(
			
 
				+            self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
			
 
				+            self.inputs['im_shape'], self.inputs['scale_factor'])
			
 
				+        outs = {
			
 
				+            "segm": seg_masks,
			
 
				+            "bbox_num": bbox_num,
			
 
				+            'cate_label': cate_labels,
			
 
				+            'cate_score': cate_scores
			
 
				+        }
			
 
				+        return outs
			
--- a/paddlers/models/ppdet/modeling/architectures/sparse_rcnn.py
+++ b/paddlers/models/ppdet/modeling/architectures/sparse_rcnn.py
@@ -0,0 +1,99 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ["SparseRCNN"]
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class SparseRCNN(BaseArch):
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ["postprocess"]
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone,
			
 
				+                 neck,
			
 
				+                 head="SparsercnnHead",
			
 
				+                 postprocess="SparsePostProcess"):
			
 
				+        super(SparseRCNN, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.head = head
			
 
				+        self.postprocess = postprocess
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        kwargs = {'roi_input_shape': neck.out_shape}
			
 
				+        head = create(cfg['head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "head": head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        fpn_feats = self.neck(body_feats)
			
 
				+        head_outs = self.head(fpn_feats, self.inputs["img_whwh"])
			
 
				+
			
 
				+        if not self.training:
			
 
				+            bboxes = self.postprocess(
			
 
				+                head_outs["pred_logits"], head_outs["pred_boxes"],
			
 
				+                self.inputs["scale_factor_wh"], self.inputs["img_whwh"])
			
 
				+            return bboxes
			
 
				+        else:
			
 
				+            return head_outs
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        batch_gt_class = self.inputs["gt_class"]
			
 
				+        batch_gt_box = self.inputs["gt_bbox"]
			
 
				+        batch_whwh = self.inputs["img_whwh"]
			
 
				+        targets = []
			
 
				+
			
 
				+        for i in range(len(batch_gt_class)):
			
 
				+            boxes = batch_gt_box[i]
			
 
				+            labels = batch_gt_class[i].squeeze(-1)
			
 
				+            img_whwh = batch_whwh[i]
			
 
				+            img_whwh_tgt = img_whwh.unsqueeze(0).tile([int(boxes.shape[0]), 1])
			
 
				+            targets.append({
			
 
				+                "boxes": boxes,
			
 
				+                "labels": labels,
			
 
				+                "img_whwh": img_whwh,
			
 
				+                "img_whwh_tgt": img_whwh_tgt
			
 
				+            })
			
 
				+
			
 
				+        outputs = self._forward()
			
 
				+        loss_dict = self.head.get_loss(outputs, targets)
			
 
				+        acc = loss_dict["acc"]
			
 
				+        loss_dict.pop("acc")
			
 
				+        total_loss = sum(loss_dict.values())
			
 
				+        loss_dict.update({"loss": total_loss, "acc": acc})
			
 
				+        return loss_dict
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/ssd.py
+++ b/paddlers/models/ppdet/modeling/architectures/ssd.py
@@ -0,0 +1,93 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['SSD']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class SSD(BaseArch):
			
 
				+    """
			
 
				+    Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (nn.Layer): backbone instance
			
 
				+        ssd_head (nn.Layer): `SSDHead` instance
			
 
				+        post_process (object): `BBoxPostProcess` instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['post_process']
			
 
				+
			
 
				+    def __init__(self, backbone, ssd_head, post_process, r34_backbone=False):
			
 
				+        super(SSD, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.ssd_head = ssd_head
			
 
				+        self.post_process = post_process
			
 
				+        self.r34_backbone = r34_backbone
			
 
				+        if self.r34_backbone:
			
 
				+            from paddlers.models.ppdet.modeling.backbones.resnet import ResNet
			
 
				+            assert isinstance(self.backbone, ResNet) and \
			
 
				+                   self.backbone.depth == 34, \
			
 
				+                "If you set r34_backbone=True, please use ResNet-34 as backbone."
			
 
				+            self.backbone.res_layers[2].blocks[
			
 
				+                0].branch2a.conv._stride = [1, 1]
			
 
				+            self.backbone.res_layers[2].blocks[0].short.conv._stride = [1, 1]
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        # backbone
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        # head
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        ssd_head = create(cfg['ssd_head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            "ssd_head": ssd_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        # Backbone
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+
			
 
				+        # SSD Head
			
 
				+        if self.training:
			
 
				+            return self.ssd_head(body_feats, self.inputs['image'],
			
 
				+                                 self.inputs['gt_bbox'],
			
 
				+                                 self.inputs['gt_class'])
			
 
				+        else:
			
 
				+            preds, anchors = self.ssd_head(body_feats, self.inputs['image'])
			
 
				+            bbox, bbox_num = self.post_process(preds, anchors,
			
 
				+                                               self.inputs['im_shape'],
			
 
				+                                               self.inputs['scale_factor'])
			
 
				+            return bbox, bbox_num
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        return {"loss": self._forward()}
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {
			
 
				+            "bbox": bbox_pred,
			
 
				+            "bbox_num": bbox_num,
			
 
				+        }
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/tood.py
+++ b/paddlers/models/ppdet/modeling/architectures/tood.py
@@ -0,0 +1,78 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['TOOD']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TOOD(BaseArch):
			
 
				+    """
			
 
				+    TOOD: Task-aligned One-stage Object Detection, see https://arxiv.org/abs/2108.07755
			
 
				+    Args:
			
 
				+        backbone (nn.Layer): backbone instance
			
 
				+        neck (nn.Layer): 'FPN' instance
			
 
				+        head (nn.Layer): 'TOODHead' instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+
			
 
				+    def __init__(self, backbone, neck, head):
			
 
				+        super(TOOD, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.head = head
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        head = create(cfg['head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "head": head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        fpn_feats = self.neck(body_feats)
			
 
				+        head_outs = self.head(fpn_feats)
			
 
				+        if not self.training:
			
 
				+            bboxes, bbox_num = self.head.post_process(
			
 
				+                head_outs, self.inputs['im_shape'],
			
 
				+                self.inputs['scale_factor'])
			
 
				+            return bboxes, bbox_num
			
 
				+        else:
			
 
				+            loss = self.head.get_loss(head_outs, self.inputs)
			
 
				+            return loss
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/ttfnet.py
+++ b/paddlers/models/ppdet/modeling/architectures/ttfnet.py
@@ -0,0 +1,98 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+
			
 
				+__all__ = ['TTFNet']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TTFNet(BaseArch):
			
 
				+    """
			
 
				+    TTFNet network, see https://arxiv.org/abs/1909.00700
			
 
				+
			
 
				+    Args:
			
 
				+        backbone (object): backbone instance
			
 
				+        neck (object): 'TTFFPN' instance
			
 
				+        ttf_head (object): 'TTFHead' instance
			
 
				+        post_process (object): 'BBoxPostProcess' instance
			
 
				+    """
			
 
				+
			
 
				+    __category__ = 'architecture'
			
 
				+    __inject__ = ['post_process']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone='DarkNet',
			
 
				+                 neck='TTFFPN',
			
 
				+                 ttf_head='TTFHead',
			
 
				+                 post_process='BBoxPostProcess'):
			
 
				+        super(TTFNet, self).__init__()
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.ttf_head = ttf_head
			
 
				+        self.post_process = post_process
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        ttf_head = create(cfg['ttf_head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "ttf_head": ttf_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        body_feats = self.neck(body_feats)
			
 
				+        hm, wh = self.ttf_head(body_feats)
			
 
				+        if self.training:
			
 
				+            return hm, wh
			
 
				+        else:
			
 
				+            bbox, bbox_num = self.post_process(hm, wh, self.inputs['im_shape'],
			
 
				+                                               self.inputs['scale_factor'])
			
 
				+            return bbox, bbox_num
			
 
				+
			
 
				+    def get_loss(self, ):
			
 
				+        loss = {}
			
 
				+        heatmap = self.inputs['ttf_heatmap']
			
 
				+        box_target = self.inputs['ttf_box_target']
			
 
				+        reg_weight = self.inputs['ttf_reg_weight']
			
 
				+        hm, wh = self._forward()
			
 
				+        head_loss = self.ttf_head.get_loss(hm, wh, heatmap, box_target,
			
 
				+                                           reg_weight)
			
 
				+        loss.update(head_loss)
			
 
				+        total_loss = paddle.add_n(list(loss.values()))
			
 
				+        loss.update({'loss': total_loss})
			
 
				+        return loss
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        bbox_pred, bbox_num = self._forward()
			
 
				+        output = {
			
 
				+            "bbox": bbox_pred,
			
 
				+            "bbox_num": bbox_num,
			
 
				+        }
			
 
				+        return output
			
--- a/paddlers/models/ppdet/modeling/architectures/yolo.py
+++ b/paddlers/models/ppdet/modeling/architectures/yolo.py
@@ -0,0 +1,124 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, create
			
 
				+from .meta_arch import BaseArch
			
 
				+from ..post_process import JDEBBoxPostProcess
			
 
				+
			
 
				+__all__ = ['YOLOv3']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class YOLOv3(BaseArch):
			
 
				+    __category__ = 'architecture'
			
 
				+    __shared__ = ['data_format']
			
 
				+    __inject__ = ['post_process']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 backbone='DarkNet',
			
 
				+                 neck='YOLOv3FPN',
			
 
				+                 yolo_head='YOLOv3Head',
			
 
				+                 post_process='BBoxPostProcess',
			
 
				+                 data_format='NCHW',
			
 
				+                 for_mot=False):
			
 
				+        """
			
 
				+        YOLOv3 network, see https://arxiv.org/abs/1804.02767
			
 
				+
			
 
				+        Args:
			
 
				+            backbone (nn.Layer): backbone instance
			
 
				+            neck (nn.Layer): neck instance
			
 
				+            yolo_head (nn.Layer): anchor_head instance
			
 
				+            bbox_post_process (object): `BBoxPostProcess` instance
			
 
				+            data_format (str): data format, NCHW or NHWC
			
 
				+            for_mot (bool): whether return other features for multi-object tracking
			
 
				+                models, default False in pure object detection models.
			
 
				+        """
			
 
				+        super(YOLOv3, self).__init__(data_format=data_format)
			
 
				+        self.backbone = backbone
			
 
				+        self.neck = neck
			
 
				+        self.yolo_head = yolo_head
			
 
				+        self.post_process = post_process
			
 
				+        self.for_mot = for_mot
			
 
				+        self.return_idx = isinstance(post_process, JDEBBoxPostProcess)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, cfg, *args, **kwargs):
			
 
				+        # backbone
			
 
				+        backbone = create(cfg['backbone'])
			
 
				+
			
 
				+        # fpn
			
 
				+        kwargs = {'input_shape': backbone.out_shape}
			
 
				+        neck = create(cfg['neck'], **kwargs)
			
 
				+
			
 
				+        # head
			
 
				+        kwargs = {'input_shape': neck.out_shape}
			
 
				+        yolo_head = create(cfg['yolo_head'], **kwargs)
			
 
				+
			
 
				+        return {
			
 
				+            'backbone': backbone,
			
 
				+            'neck': neck,
			
 
				+            "yolo_head": yolo_head,
			
 
				+        }
			
 
				+
			
 
				+    def _forward(self):
			
 
				+        body_feats = self.backbone(self.inputs)
			
 
				+        neck_feats = self.neck(body_feats, self.for_mot)
			
 
				+
			
 
				+        if isinstance(neck_feats, dict):
			
 
				+            assert self.for_mot == True
			
 
				+            emb_feats = neck_feats['emb_feats']
			
 
				+            neck_feats = neck_feats['yolo_feats']
			
 
				+
			
 
				+        if self.training:
			
 
				+            yolo_losses = self.yolo_head(neck_feats, self.inputs)
			
 
				+
			
 
				+            if self.for_mot:
			
 
				+                return {'det_losses': yolo_losses, 'emb_feats': emb_feats}
			
 
				+            else:
			
 
				+                return yolo_losses
			
 
				+
			
 
				+        else:
			
 
				+            yolo_head_outs = self.yolo_head(neck_feats)
			
 
				+
			
 
				+            if self.for_mot:
			
 
				+                boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
			
 
				+                    yolo_head_outs, self.yolo_head.mask_anchors)
			
 
				+                output = {
			
 
				+                    'bbox': bbox,
			
 
				+                    'bbox_num': bbox_num,
			
 
				+                    'boxes_idx': boxes_idx,
			
 
				+                    'nms_keep_idx': nms_keep_idx,
			
 
				+                    'emb_feats': emb_feats,
			
 
				+                }
			
 
				+            else:
			
 
				+                if self.return_idx:
			
 
				+                    _, bbox, bbox_num, _ = self.post_process(
			
 
				+                        yolo_head_outs, self.yolo_head.mask_anchors)
			
 
				+                else:
			
 
				+                    bbox, bbox_num = self.post_process(
			
 
				+                        yolo_head_outs, self.yolo_head.mask_anchors,
			
 
				+                        self.inputs['im_shape'], self.inputs['scale_factor'])
			
 
				+                output = {'bbox': bbox, 'bbox_num': bbox_num}
			
 
				+
			
 
				+            return output
			
 
				+
			
 
				+    def get_loss(self):
			
 
				+        return self._forward()
			
 
				+
			
 
				+    def get_pred(self):
			
 
				+        return self._forward()
			
--- a/paddlers/models/ppdet/modeling/assigners/__init__.py
+++ b/paddlers/models/ppdet/modeling/assigners/__init__.py
@@ -0,0 +1,23 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import utils
			
 
				+from . import task_aligned_assigner
			
 
				+from . import atss_assigner
			
 
				+from . import simota_assigner
			
 
				+
			
 
				+from .utils import *
			
 
				+from .task_aligned_assigner import *
			
 
				+from .atss_assigner import *
			
 
				+from .simota_assigner import *
			
--- a/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/atss_assigner.py
@@ -0,0 +1,211 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+from ..ops import iou_similarity
			
 
				+from ..bbox_utils import bbox_center
			
 
				+from .utils import (pad_gt, check_points_inside_bboxes, compute_max_iou_anchor,
			
 
				+                    compute_max_iou_gt)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class ATSSAssigner(nn.Layer):
			
 
				+    """Bridging the Gap Between Anchor-based and Anchor-free Detection
			
 
				+     via Adaptive Training Sample Selection
			
 
				+    """
			
 
				+    __shared__ = ['num_classes']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 topk=9,
			
 
				+                 num_classes=80,
			
 
				+                 force_gt_matching=False,
			
 
				+                 eps=1e-9):
			
 
				+        super(ATSSAssigner, self).__init__()
			
 
				+        self.topk = topk
			
 
				+        self.num_classes = num_classes
			
 
				+        self.force_gt_matching = force_gt_matching
			
 
				+        self.eps = eps
			
 
				+
			
 
				+    def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
			
 
				+                             pad_gt_mask):
			
 
				+        pad_gt_mask = pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool)
			
 
				+        gt2anchor_distances_list = paddle.split(
			
 
				+            gt2anchor_distances, num_anchors_list, axis=-1)
			
 
				+        num_anchors_index = np.cumsum(num_anchors_list).tolist()
			
 
				+        num_anchors_index = [0, ] + num_anchors_index[:-1]
			
 
				+        is_in_topk_list = []
			
 
				+        topk_idxs_list = []
			
 
				+        for distances, anchors_index in zip(gt2anchor_distances_list,
			
 
				+                                            num_anchors_index):
			
 
				+            num_anchors = distances.shape[-1]
			
 
				+            topk_metrics, topk_idxs = paddle.topk(
			
 
				+                distances, self.topk, axis=-1, largest=False)
			
 
				+            topk_idxs_list.append(topk_idxs + anchors_index)
			
 
				+            topk_idxs = paddle.where(pad_gt_mask, topk_idxs,
			
 
				+                                     paddle.zeros_like(topk_idxs))
			
 
				+            is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
			
 
				+            is_in_topk = paddle.where(is_in_topk > 1,
			
 
				+                                      paddle.zeros_like(is_in_topk),
			
 
				+                                      is_in_topk)
			
 
				+            is_in_topk_list.append(
			
 
				+                is_in_topk.astype(gt2anchor_distances.dtype))
			
 
				+        is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
			
 
				+        topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
			
 
				+        return is_in_topk_list, topk_idxs_list
			
 
				+
			
 
				+    @paddle.no_grad()
			
 
				+    def forward(self,
			
 
				+                anchor_bboxes,
			
 
				+                num_anchors_list,
			
 
				+                gt_labels,
			
 
				+                gt_bboxes,
			
 
				+                bg_index,
			
 
				+                gt_scores=None):
			
 
				+        r"""This code is based on
			
 
				+            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
			
 
				+
			
 
				+        The assignment is done in following steps
			
 
				+        1. compute iou between all bbox (bbox of all pyramid levels) and gt
			
 
				+        2. compute center distance between all bbox and gt
			
 
				+        3. on each pyramid level, for each gt, select k bbox whose center
			
 
				+           are closest to the gt center, so we total select k*l bbox as
			
 
				+           candidates for each gt
			
 
				+        4. get corresponding iou for the these candidates, and compute the
			
 
				+           mean and std, set mean + std as the iou threshold
			
 
				+        5. select these candidates whose iou are greater than or equal to
			
 
				+           the threshold as positive
			
 
				+        6. limit the positive sample's center in gt
			
 
				+        7. if an anchor box is assigned to multiple gts, the one with the
			
 
				+           highest iou will be selected.
			
 
				+        Args:
			
 
				+            anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
			
 
				+                    "xmin, xmax, ymin, ymax" format
			
 
				+            num_anchors_list (List): num of anchors in each level
			
 
				+            gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
			
 
				+            gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
			
 
				+            bg_index (int): background index
			
 
				+            gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
			
 
				+                    shape(B, n, 1), if None, then it will initialize with one_hot label
			
 
				+        Returns:
			
 
				+            assigned_labels (Tensor): (B, L)
			
 
				+            assigned_bboxes (Tensor): (B, L, 4)
			
 
				+            assigned_scores (Tensor): (B, L, C)
			
 
				+        """
			
 
				+        gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
			
 
				+            gt_labels, gt_bboxes, gt_scores)
			
 
				+        assert gt_labels.ndim == gt_bboxes.ndim and \
			
 
				+               gt_bboxes.ndim == 3
			
 
				+
			
 
				+        num_anchors, _ = anchor_bboxes.shape
			
 
				+        batch_size, num_max_boxes, _ = gt_bboxes.shape
			
 
				+
			
 
				+        # negative batch
			
 
				+        if num_max_boxes == 0:
			
 
				+            assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
			
 
				+            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
			
 
				+            assigned_scores = paddle.zeros(
			
 
				+                [batch_size, num_anchors, self.num_classes])
			
 
				+            return assigned_labels, assigned_bboxes, assigned_scores
			
 
				+
			
 
				+        # 1. compute iou between gt and anchor bbox, [B, n, L]
			
 
				+        ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
			
 
				+        ious = ious.reshape([batch_size, -1, num_anchors])
			
 
				+
			
 
				+        # 2. compute center distance between all anchors and gt, [B, n, L]
			
 
				+        gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1)
			
 
				+        anchor_centers = bbox_center(anchor_bboxes)
			
 
				+        gt2anchor_distances = (gt_centers - anchor_centers.unsqueeze(0)) \
			
 
				+            .norm(2, axis=-1).reshape([batch_size, -1, num_anchors])
			
 
				+
			
 
				+        # 3. on each pyramid level, selecting topk closest candidates
			
 
				+        # based on the center distance, [B, n, L]
			
 
				+        is_in_topk, topk_idxs = self._gather_topk_pyramid(
			
 
				+            gt2anchor_distances, num_anchors_list, pad_gt_mask)
			
 
				+
			
 
				+        # 4. get corresponding iou for the these candidates, and compute the
			
 
				+        # mean and std, 5. set mean + std as the iou threshold
			
 
				+        iou_candidates = ious * is_in_topk
			
 
				+        iou_threshold = paddle.index_sample(
			
 
				+            iou_candidates.flatten(stop_axis=-2),
			
 
				+            topk_idxs.flatten(stop_axis=-2))
			
 
				+        iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
			
 
				+        iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
			
 
				+                        iou_threshold.std(axis=-1, keepdim=True)
			
 
				+        is_in_topk = paddle.where(
			
 
				+            iou_candidates > iou_threshold.tile([1, 1, num_anchors]),
			
 
				+            is_in_topk, paddle.zeros_like(is_in_topk))
			
 
				+
			
 
				+        # 6. check the positive sample's center in gt, [B, n, L]
			
 
				+        is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
			
 
				+
			
 
				+        # select positive sample, [B, n, L]
			
 
				+        mask_positive = is_in_topk * is_in_gts * pad_gt_mask
			
 
				+
			
 
				+        # 7. if an anchor box is assigned to multiple gts,
			
 
				+        # the one with the highest iou will be selected.
			
 
				+        mask_positive_sum = mask_positive.sum(axis=-2)
			
 
				+        if mask_positive_sum.max() > 1:
			
 
				+            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
			
 
				+                [1, num_max_boxes, 1])
			
 
				+            is_max_iou = compute_max_iou_anchor(ious)
			
 
				+            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
			
 
				+                                         mask_positive)
			
 
				+            mask_positive_sum = mask_positive.sum(axis=-2)
			
 
				+        # 8. make sure every gt_bbox matches the anchor
			
 
				+        if self.force_gt_matching:
			
 
				+            is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask
			
 
				+            mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile(
			
 
				+                [1, num_max_boxes, 1])
			
 
				+            mask_positive = paddle.where(mask_max_iou, is_max_iou,
			
 
				+                                         mask_positive)
			
 
				+            mask_positive_sum = mask_positive.sum(axis=-2)
			
 
				+        assigned_gt_index = mask_positive.argmax(axis=-2)
			
 
				+        assert mask_positive_sum.max() == 1, \
			
 
				+            ("one anchor just assign one gt, but received not equals 1. "
			
 
				+             "Received: %f" % mask_positive_sum.max().item())
			
 
				+
			
 
				+        # assigned target
			
 
				+        batch_ind = paddle.arange(
			
 
				+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
			
 
				+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
			
 
				+        assigned_labels = paddle.gather(
			
 
				+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
			
 
				+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
			
 
				+        assigned_labels = paddle.where(
			
 
				+            mask_positive_sum > 0, assigned_labels,
			
 
				+            paddle.full_like(assigned_labels, bg_index))
			
 
				+
			
 
				+        assigned_bboxes = paddle.gather(
			
 
				+            gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
			
 
				+        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
			
 
				+
			
 
				+        assigned_scores = F.one_hot(assigned_labels, self.num_classes)
			
 
				+        if gt_scores is not None:
			
 
				+            gather_scores = paddle.gather(
			
 
				+                pad_gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
			
 
				+            gather_scores = gather_scores.reshape([batch_size, num_anchors])
			
 
				+            gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
			
 
				+                                         paddle.zeros_like(gather_scores))
			
 
				+            assigned_scores *= gather_scores.unsqueeze(-1)
			
 
				+
			
 
				+        return assigned_labels, assigned_bboxes, assigned_scores
			
--- a/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/simota_assigner.py
@@ -0,0 +1,262 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# The code is based on:
			
 
				+# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/sim_ota_assigner.py
			
 
				+
			
 
				+import paddle
			
 
				+import numpy as np
			
 
				+import paddle.nn.functional as F
			
 
				+
			
 
				+from paddlers.models.ppdet.modeling.losses.varifocal_loss import varifocal_loss
			
 
				+from paddlers.models.ppdet.modeling.bbox_utils import batch_bbox_overlaps
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class SimOTAAssigner(object):
			
 
				+    """Computes matching between predictions and ground truth.
			
 
				+    Args:
			
 
				+        center_radius (int | float, optional): Ground truth center size
			
 
				+            to judge whether a prior is in center. Default 2.5.
			
 
				+        candidate_topk (int, optional): The candidate top-k which used to
			
 
				+            get top-k ious to calculate dynamic-k. Default 10.
			
 
				+        iou_weight (int | float, optional): The scale factor for regression
			
 
				+            iou cost. Default 3.0.
			
 
				+        cls_weight (int | float, optional): The scale factor for classification
			
 
				+            cost. Default 1.0.
			
 
				+        num_classes (int): The num_classes of dataset.
			
 
				+        use_vfl (int): Whether to use varifocal_loss when calculating the cost matrix.
			
 
				+    """
			
 
				+    __shared__ = ['num_classes']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 center_radius=2.5,
			
 
				+                 candidate_topk=10,
			
 
				+                 iou_weight=3.0,
			
 
				+                 cls_weight=1.0,
			
 
				+                 num_classes=80,
			
 
				+                 use_vfl=True):
			
 
				+        self.center_radius = center_radius
			
 
				+        self.candidate_topk = candidate_topk
			
 
				+        self.iou_weight = iou_weight
			
 
				+        self.cls_weight = cls_weight
			
 
				+        self.num_classes = num_classes
			
 
				+        self.use_vfl = use_vfl
			
 
				+
			
 
				+    def get_in_gt_and_in_center_info(self, flatten_center_and_stride,
			
 
				+                                     gt_bboxes):
			
 
				+        num_gt = gt_bboxes.shape[0]
			
 
				+
			
 
				+        flatten_x = flatten_center_and_stride[:, 0].unsqueeze(1).tile(
			
 
				+            [1, num_gt])
			
 
				+        flatten_y = flatten_center_and_stride[:, 1].unsqueeze(1).tile(
			
 
				+            [1, num_gt])
			
 
				+        flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile(
			
 
				+            [1, num_gt])
			
 
				+        flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile(
			
 
				+            [1, num_gt])
			
 
				+
			
 
				+        # is prior centers in gt bboxes, shape: [n_center, n_gt]
			
 
				+        l_ = flatten_x - gt_bboxes[:, 0]
			
 
				+        t_ = flatten_y - gt_bboxes[:, 1]
			
 
				+        r_ = gt_bboxes[:, 2] - flatten_x
			
 
				+        b_ = gt_bboxes[:, 3] - flatten_y
			
 
				+
			
 
				+        deltas = paddle.stack([l_, t_, r_, b_], axis=1)
			
 
				+        is_in_gts = deltas.min(axis=1) > 0
			
 
				+        is_in_gts_all = is_in_gts.sum(axis=1) > 0
			
 
				+
			
 
				+        # is prior centers in gt centers
			
 
				+        gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
			
 
				+        gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
			
 
				+        ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x
			
 
				+        ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y
			
 
				+        ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x
			
 
				+        ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y
			
 
				+
			
 
				+        cl_ = flatten_x - ct_bound_l
			
 
				+        ct_ = flatten_y - ct_bound_t
			
 
				+        cr_ = ct_bound_r - flatten_x
			
 
				+        cb_ = ct_bound_b - flatten_y
			
 
				+
			
 
				+        ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1)
			
 
				+        is_in_cts = ct_deltas.min(axis=1) > 0
			
 
				+        is_in_cts_all = is_in_cts.sum(axis=1) > 0
			
 
				+
			
 
				+        # in any of gts or gt centers, shape: [n_center]
			
 
				+        is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all,
			
 
				+                                                     is_in_cts_all)
			
 
				+
			
 
				+        is_in_gts_or_centers_all_inds = paddle.nonzero(
			
 
				+            is_in_gts_or_centers_all).squeeze(1)
			
 
				+
			
 
				+        # both in gts and gt centers, shape: [num_fg, num_gt]
			
 
				+        is_in_gts_and_centers = paddle.logical_and(
			
 
				+            paddle.gather(
			
 
				+                is_in_gts.cast('int'), is_in_gts_or_centers_all_inds,
			
 
				+                axis=0).cast('bool'),
			
 
				+            paddle.gather(
			
 
				+                is_in_cts.cast('int'), is_in_gts_or_centers_all_inds,
			
 
				+                axis=0).cast('bool'))
			
 
				+        return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers
			
 
				+
			
 
				+    def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
			
 
				+        match_matrix = np.zeros_like(cost_matrix.numpy())
			
 
				+        # select candidate topk ious for dynamic-k calculation
			
 
				+        topk_ious, _ = paddle.topk(pairwise_ious, self.candidate_topk, axis=0)
			
 
				+        # calculate dynamic k for each gt
			
 
				+        dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
			
 
				+        for gt_idx in range(num_gt):
			
 
				+            _, pos_idx = paddle.topk(
			
 
				+                cost_matrix[:, gt_idx], k=dynamic_ks[gt_idx], largest=False)
			
 
				+            match_matrix[:, gt_idx][pos_idx.numpy()] = 1.0
			
 
				+
			
 
				+        del topk_ious, dynamic_ks, pos_idx
			
 
				+
			
 
				+        # match points more than two gts
			
 
				+        extra_match_gts_mask = match_matrix.sum(1) > 1
			
 
				+        if extra_match_gts_mask.sum() > 0:
			
 
				+            cost_matrix = cost_matrix.numpy()
			
 
				+            cost_argmin = np.argmin(
			
 
				+                cost_matrix[extra_match_gts_mask, :], axis=1)
			
 
				+            match_matrix[extra_match_gts_mask, :] *= 0.0
			
 
				+            match_matrix[extra_match_gts_mask, cost_argmin] = 1.0
			
 
				+        # get foreground mask
			
 
				+        match_fg_mask_inmatrix = match_matrix.sum(1) > 0
			
 
				+        match_gt_inds_to_fg = match_matrix[match_fg_mask_inmatrix, :].argmax(1)
			
 
				+
			
 
				+        return match_gt_inds_to_fg, match_fg_mask_inmatrix
			
 
				+
			
 
				+    def get_sample(self, assign_gt_inds, gt_bboxes):
			
 
				+        pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
			
 
				+        neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
			
 
				+        pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
			
 
				+
			
 
				+        if gt_bboxes.size == 0:
			
 
				+            # hack for index error case
			
 
				+            assert pos_assigned_gt_inds.size == 0
			
 
				+            pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
			
 
				+        else:
			
 
				+            if len(gt_bboxes.shape) < 2:
			
 
				+                gt_bboxes = gt_bboxes.resize(-1, 4)
			
 
				+            pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
			
 
				+        return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
			
 
				+
			
 
				+    def __call__(self,
			
 
				+                 flatten_cls_pred_scores,
			
 
				+                 flatten_center_and_stride,
			
 
				+                 flatten_bboxes,
			
 
				+                 gt_bboxes,
			
 
				+                 gt_labels,
			
 
				+                 eps=1e-7):
			
 
				+        """Assign gt to priors using SimOTA.
			
 
				+        TODO: add comment.
			
 
				+        Returns:
			
 
				+            assign_result: The assigned result.
			
 
				+        """
			
 
				+        num_gt = gt_bboxes.shape[0]
			
 
				+        num_bboxes = flatten_bboxes.shape[0]
			
 
				+
			
 
				+        if num_gt == 0 or num_bboxes == 0:
			
 
				+            # No ground truth or boxes
			
 
				+            label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
			
 
				+            label_weight = np.ones([num_bboxes], dtype=np.float32)
			
 
				+            bbox_target = np.zeros_like(flatten_center_and_stride)
			
 
				+            return 0, label, label_weight, bbox_target
			
 
				+
			
 
				+        is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_boxes_and_center = self.get_in_gt_and_in_center_info(
			
 
				+            flatten_center_and_stride, gt_bboxes)
			
 
				+
			
 
				+        # bboxes and scores to calculate matrix
			
 
				+        valid_flatten_bboxes = flatten_bboxes[is_in_gts_or_centers_all_inds]
			
 
				+        valid_cls_pred_scores = flatten_cls_pred_scores[
			
 
				+            is_in_gts_or_centers_all_inds]
			
 
				+        num_valid_bboxes = valid_flatten_bboxes.shape[0]
			
 
				+
			
 
				+        pairwise_ious = batch_bbox_overlaps(valid_flatten_bboxes,
			
 
				+                                            gt_bboxes)  # [num_points,num_gts]
			
 
				+        if self.use_vfl:
			
 
				+            gt_vfl_labels = gt_labels.squeeze(-1).unsqueeze(0).tile(
			
 
				+                [num_valid_bboxes, 1]).reshape([-1])
			
 
				+            valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
			
 
				+                [1, num_gt, 1]).reshape([-1, self.num_classes])
			
 
				+            vfl_score = np.zeros(valid_pred_scores.shape)
			
 
				+            vfl_score[np.arange(0, vfl_score.shape[0]), gt_vfl_labels.numpy(
			
 
				+            )] = pairwise_ious.reshape([-1])
			
 
				+            vfl_score = paddle.to_tensor(vfl_score)
			
 
				+            losses_vfl = varifocal_loss(
			
 
				+                valid_pred_scores, vfl_score,
			
 
				+                use_sigmoid=False).reshape([num_valid_bboxes, num_gt])
			
 
				+            losses_giou = batch_bbox_overlaps(
			
 
				+                valid_flatten_bboxes, gt_bboxes, mode='giou')
			
 
				+            cost_matrix = (
			
 
				+                losses_vfl * self.cls_weight + losses_giou * self.iou_weight +
			
 
				+                paddle.logical_not(is_in_boxes_and_center).cast('float32') *
			
 
				+                100000000)
			
 
				+        else:
			
 
				+            iou_cost = -paddle.log(pairwise_ious + eps)
			
 
				+            gt_onehot_label = (F.one_hot(
			
 
				+                gt_labels.squeeze(-1).cast(paddle.int64),
			
 
				+                flatten_cls_pred_scores.shape[-1]).cast('float32').unsqueeze(0)
			
 
				+                               .tile([num_valid_bboxes, 1, 1]))
			
 
				+
			
 
				+            valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
			
 
				+                [1, num_gt, 1])
			
 
				+            cls_cost = F.binary_cross_entropy(
			
 
				+                valid_pred_scores, gt_onehot_label, reduction='none').sum(-1)
			
 
				+
			
 
				+            cost_matrix = (
			
 
				+                cls_cost * self.cls_weight + iou_cost * self.iou_weight +
			
 
				+                paddle.logical_not(is_in_boxes_and_center).cast('float32') *
			
 
				+                100000000)
			
 
				+
			
 
				+        match_gt_inds_to_fg, match_fg_mask_inmatrix = \
			
 
				+            self.dynamic_k_matching(
			
 
				+                cost_matrix, pairwise_ious, num_gt)
			
 
				+
			
 
				+        # sample and assign results
			
 
				+        assigned_gt_inds = np.zeros([num_bboxes], dtype=np.int64)
			
 
				+        match_fg_mask_inall = np.zeros_like(assigned_gt_inds)
			
 
				+        match_fg_mask_inall[is_in_gts_or_centers_all.numpy(
			
 
				+        )] = match_fg_mask_inmatrix
			
 
				+
			
 
				+        assigned_gt_inds[match_fg_mask_inall.astype(
			
 
				+            np.bool)] = match_gt_inds_to_fg + 1
			
 
				+
			
 
				+        pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds \
			
 
				+            = self.get_sample(assigned_gt_inds, gt_bboxes.numpy())
			
 
				+
			
 
				+        bbox_target = np.zeros_like(flatten_bboxes)
			
 
				+        bbox_weight = np.zeros_like(flatten_bboxes)
			
 
				+        label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
			
 
				+        label_weight = np.zeros([num_bboxes], dtype=np.float32)
			
 
				+
			
 
				+        if len(pos_inds) > 0:
			
 
				+            gt_labels = gt_labels.numpy()
			
 
				+            pos_bbox_targets = pos_gt_bboxes
			
 
				+            bbox_target[pos_inds, :] = pos_bbox_targets
			
 
				+            bbox_weight[pos_inds, :] = 1.0
			
 
				+            if not np.any(gt_labels):
			
 
				+                label[pos_inds] = 0
			
 
				+            else:
			
 
				+                label[pos_inds] = gt_labels.squeeze(-1)[pos_assigned_gt_inds]
			
 
				+
			
 
				+            label_weight[pos_inds] = 1.0
			
 
				+        if len(neg_inds) > 0:
			
 
				+            label_weight[neg_inds] = 1.0
			
 
				+
			
 
				+        pos_num = max(pos_inds.size, 1)
			
 
				+
			
 
				+        return pos_num, label, label_weight, bbox_target
			
--- a/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
+++ b/paddlers/models/ppdet/modeling/assigners/task_aligned_assigner.py
@@ -0,0 +1,158 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+from ..bbox_utils import iou_similarity
			
 
				+from .utils import (pad_gt, gather_topk_anchors, check_points_inside_bboxes,
			
 
				+                    compute_max_iou_anchor)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class TaskAlignedAssigner(nn.Layer):
			
 
				+    """TOOD: Task-aligned One-stage Object Detection
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
			
 
				+        super(TaskAlignedAssigner, self).__init__()
			
 
				+        self.topk = topk
			
 
				+        self.alpha = alpha
			
 
				+        self.beta = beta
			
 
				+        self.eps = eps
			
 
				+
			
 
				+    @paddle.no_grad()
			
 
				+    def forward(self,
			
 
				+                pred_scores,
			
 
				+                pred_bboxes,
			
 
				+                anchor_points,
			
 
				+                gt_labels,
			
 
				+                gt_bboxes,
			
 
				+                bg_index,
			
 
				+                gt_scores=None):
			
 
				+        r"""This code is based on
			
 
				+            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
			
 
				+
			
 
				+        The assignment is done in following steps
			
 
				+        1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
			
 
				+        2. select top-k bbox as candidates for each gt
			
 
				+        3. limit the positive sample's center in gt (because the anchor-free detector
			
 
				+           only can predict positive distance)
			
 
				+        4. if an anchor box is assigned to multiple gts, the one with the
			
 
				+           highest iou will be selected.
			
 
				+        Args:
			
 
				+            pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
			
 
				+            pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
			
 
				+            anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
			
 
				+            gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1)
			
 
				+            gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4)
			
 
				+            bg_index (int): background index
			
 
				+            gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes,
			
 
				+                    shape(B, n, 1), if None, then it will initialize with one_hot label
			
 
				+        Returns:
			
 
				+            assigned_labels (Tensor): (B, L)
			
 
				+            assigned_bboxes (Tensor): (B, L, 4)
			
 
				+            assigned_scores (Tensor): (B, L, C)
			
 
				+        """
			
 
				+        assert pred_scores.ndim == pred_bboxes.ndim
			
 
				+
			
 
				+        gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt(
			
 
				+            gt_labels, gt_bboxes, gt_scores)
			
 
				+        assert gt_labels.ndim == gt_bboxes.ndim and \
			
 
				+               gt_bboxes.ndim == 3
			
 
				+
			
 
				+        batch_size, num_anchors, num_classes = pred_scores.shape
			
 
				+        _, num_max_boxes, _ = gt_bboxes.shape
			
 
				+
			
 
				+        # negative batch
			
 
				+        if num_max_boxes == 0:
			
 
				+            assigned_labels = paddle.full([batch_size, num_anchors], bg_index)
			
 
				+            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
			
 
				+            assigned_scores = paddle.zeros(
			
 
				+                [batch_size, num_anchors, num_classes])
			
 
				+            return assigned_labels, assigned_bboxes, assigned_scores
			
 
				+
			
 
				+        # compute iou between gt and pred bbox, [B, n, L]
			
 
				+        ious = iou_similarity(gt_bboxes, pred_bboxes)
			
 
				+        # gather pred bboxes class score
			
 
				+        pred_scores = pred_scores.transpose([0, 2, 1])
			
 
				+        batch_ind = paddle.arange(
			
 
				+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
			
 
				+        gt_labels_ind = paddle.stack(
			
 
				+            [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
			
 
				+            axis=-1)
			
 
				+        bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
			
 
				+        # compute alignment metrics, [B, n, L]
			
 
				+        alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
			
 
				+            self.beta)
			
 
				+
			
 
				+        # check the positive sample's center in gt, [B, n, L]
			
 
				+        is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)
			
 
				+
			
 
				+        # select topk largest alignment metrics pred bbox as candidates
			
 
				+        # for each gt, [B, n, L]
			
 
				+        is_in_topk = gather_topk_anchors(
			
 
				+            alignment_metrics * is_in_gts,
			
 
				+            self.topk,
			
 
				+            topk_mask=pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool))
			
 
				+
			
 
				+        # select positive sample, [B, n, L]
			
 
				+        mask_positive = is_in_topk * is_in_gts * pad_gt_mask
			
 
				+
			
 
				+        # if an anchor box is assigned to multiple gts,
			
 
				+        # the one with the highest iou will be selected, [B, n, L]
			
 
				+        mask_positive_sum = mask_positive.sum(axis=-2)
			
 
				+        if mask_positive_sum.max() > 1:
			
 
				+            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
			
 
				+                [1, num_max_boxes, 1])
			
 
				+            is_max_iou = compute_max_iou_anchor(ious)
			
 
				+            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
			
 
				+                                         mask_positive)
			
 
				+            mask_positive_sum = mask_positive.sum(axis=-2)
			
 
				+        assigned_gt_index = mask_positive.argmax(axis=-2)
			
 
				+        assert mask_positive_sum.max() == 1, \
			
 
				+            ("one anchor just assign one gt, but received not equals 1. "
			
 
				+             "Received: %f" % mask_positive_sum.max().item())
			
 
				+
			
 
				+        # assigned target
			
 
				+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
			
 
				+        assigned_labels = paddle.gather(
			
 
				+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
			
 
				+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
			
 
				+        assigned_labels = paddle.where(
			
 
				+            mask_positive_sum > 0, assigned_labels,
			
 
				+            paddle.full_like(assigned_labels, bg_index))
			
 
				+
			
 
				+        assigned_bboxes = paddle.gather(
			
 
				+            gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
			
 
				+        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
			
 
				+
			
 
				+        assigned_scores = F.one_hot(assigned_labels, num_classes)
			
 
				+        # rescale alignment metrics
			
 
				+        alignment_metrics *= mask_positive
			
 
				+        max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
			
 
				+        max_ious_per_instance = (ious * mask_positive).max(axis=-1,
			
 
				+                                                           keepdim=True)
			
 
				+        alignment_metrics = alignment_metrics / (
			
 
				+            max_metrics_per_instance + self.eps) * max_ious_per_instance
			
 
				+        alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
			
 
				+        assigned_scores = assigned_scores * alignment_metrics
			
 
				+
			
 
				+        return assigned_labels, assigned_bboxes, assigned_scores
			
--- a/paddlers/models/ppdet/modeling/assigners/utils.py
+++ b/paddlers/models/ppdet/modeling/assigners/utils.py
@@ -0,0 +1,195 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn.functional as F
			
 
				+
			
 
				+__all__ = [
			
 
				+    'pad_gt', 'gather_topk_anchors', 'check_points_inside_bboxes',
			
 
				+    'compute_max_iou_anchor', 'compute_max_iou_gt',
			
 
				+    'generate_anchors_for_grid_cell'
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def pad_gt(gt_labels, gt_bboxes, gt_scores=None):
			
 
				+    r""" Pad 0 in gt_labels and gt_bboxes.
			
 
				+    Args:
			
 
				+        gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes,
			
 
				+            shape is [B, n, 1] or [[n_1, 1], [n_2, 1], ...], here n = sum(n_i)
			
 
				+        gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes,
			
 
				+            shape is [B, n, 4] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
			
 
				+        gt_scores (Tensor|List[Tensor]|None, float32): Score of gt_bboxes,
			
 
				+            shape is [B, n, 1] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
			
 
				+    Returns:
			
 
				+        pad_gt_labels (Tensor, int64): shape[B, n, 1]
			
 
				+        pad_gt_bboxes (Tensor, float32): shape[B, n, 4]
			
 
				+        pad_gt_scores (Tensor, float32): shape[B, n, 1]
			
 
				+        pad_gt_mask (Tensor, float32): shape[B, n, 1], 1 means bbox, 0 means no bbox
			
 
				+    """
			
 
				+    if isinstance(gt_labels, paddle.Tensor) and isinstance(gt_bboxes,
			
 
				+                                                           paddle.Tensor):
			
 
				+        assert gt_labels.ndim == gt_bboxes.ndim and \
			
 
				+               gt_bboxes.ndim == 3
			
 
				+        pad_gt_mask = (
			
 
				+            gt_bboxes.sum(axis=-1, keepdim=True) > 0).astype(gt_bboxes.dtype)
			
 
				+        if gt_scores is None:
			
 
				+            gt_scores = pad_gt_mask.clone()
			
 
				+        assert gt_labels.ndim == gt_scores.ndim
			
 
				+
			
 
				+        return gt_labels, gt_bboxes, gt_scores, pad_gt_mask
			
 
				+    elif isinstance(gt_labels, list) and isinstance(gt_bboxes, list):
			
 
				+        assert len(gt_labels) == len(gt_bboxes), \
			
 
				+            'The number of `gt_labels` and `gt_bboxes` is not equal. '
			
 
				+        num_max_boxes = max([len(a) for a in gt_bboxes])
			
 
				+        batch_size = len(gt_bboxes)
			
 
				+        # pad label and bbox
			
 
				+        pad_gt_labels = paddle.zeros(
			
 
				+            [batch_size, num_max_boxes, 1], dtype=gt_labels[0].dtype)
			
 
				+        pad_gt_bboxes = paddle.zeros(
			
 
				+            [batch_size, num_max_boxes, 4], dtype=gt_bboxes[0].dtype)
			
 
				+        pad_gt_scores = paddle.zeros(
			
 
				+            [batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
			
 
				+        pad_gt_mask = paddle.zeros(
			
 
				+            [batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
			
 
				+        for i, (label, bbox) in enumerate(zip(gt_labels, gt_bboxes)):
			
 
				+            if len(label) > 0 and len(bbox) > 0:
			
 
				+                pad_gt_labels[i, :len(label)] = label
			
 
				+                pad_gt_bboxes[i, :len(bbox)] = bbox
			
 
				+                pad_gt_mask[i, :len(bbox)] = 1.
			
 
				+                if gt_scores is not None:
			
 
				+                    pad_gt_scores[i, :len(gt_scores[i])] = gt_scores[i]
			
 
				+        if gt_scores is None:
			
 
				+            pad_gt_scores = pad_gt_mask.clone()
			
 
				+        return pad_gt_labels, pad_gt_bboxes, pad_gt_scores, pad_gt_mask
			
 
				+    else:
			
 
				+        raise ValueError('The input `gt_labels` or `gt_bboxes` is invalid! ')
			
 
				+
			
 
				+
			
 
				+def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
			
 
				+    r"""
			
 
				+    Args:
			
 
				+        metrics (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
			
 
				+        topk (int): The number of top elements to look for along the axis.
			
 
				+        largest (bool) : largest is a flag, if set to true,
			
 
				+            algorithm will sort by descending order, otherwise sort by
			
 
				+            ascending order. Default: True
			
 
				+        topk_mask (Tensor, bool|None): shape[B, n, topk], ignore bbox mask,
			
 
				+            Default: None
			
 
				+        eps (float): Default: 1e-9
			
 
				+    Returns:
			
 
				+        is_in_topk (Tensor, float32): shape[B, n, L], value=1. means selected
			
 
				+    """
			
 
				+    num_anchors = metrics.shape[-1]
			
 
				+    topk_metrics, topk_idxs = paddle.topk(
			
 
				+        metrics, topk, axis=-1, largest=largest)
			
 
				+    if topk_mask is None:
			
 
				+        topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > eps).tile(
			
 
				+            [1, 1, topk])
			
 
				+    topk_idxs = paddle.where(topk_mask, topk_idxs,
			
 
				+                             paddle.zeros_like(topk_idxs))
			
 
				+    is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
			
 
				+    is_in_topk = paddle.where(is_in_topk > 1,
			
 
				+                              paddle.zeros_like(is_in_topk), is_in_topk)
			
 
				+    return is_in_topk.astype(metrics.dtype)
			
 
				+
			
 
				+
			
 
				+def check_points_inside_bboxes(points, bboxes, eps=1e-9):
			
 
				+    r"""
			
 
				+    Args:
			
 
				+        points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
			
 
				+        bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
			
 
				+        eps (float): Default: 1e-9
			
 
				+    Returns:
			
 
				+        is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
			
 
				+    """
			
 
				+    points = points.unsqueeze([0, 1])
			
 
				+    x, y = points.chunk(2, axis=-1)
			
 
				+    xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
			
 
				+    l = x - xmin
			
 
				+    t = y - ymin
			
 
				+    r = xmax - x
			
 
				+    b = ymax - y
			
 
				+    bbox_ltrb = paddle.concat([l, t, r, b], axis=-1)
			
 
				+    return (bbox_ltrb.min(axis=-1) > eps).astype(bboxes.dtype)
			
 
				+
			
 
				+
			
 
				+def compute_max_iou_anchor(ious):
			
 
				+    r"""
			
 
				+    For each anchor, find the GT with the largest IOU.
			
 
				+    Args:
			
 
				+        ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
			
 
				+    Returns:
			
 
				+        is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
			
 
				+    """
			
 
				+    num_max_boxes = ious.shape[-2]
			
 
				+    max_iou_index = ious.argmax(axis=-2)
			
 
				+    is_max_iou = F.one_hot(max_iou_index, num_max_boxes).transpose([0, 2, 1])
			
 
				+    return is_max_iou.astype(ious.dtype)
			
 
				+
			
 
				+
			
 
				+def compute_max_iou_gt(ious):
			
 
				+    r"""
			
 
				+    For each GT, find the anchor with the largest IOU.
			
 
				+    Args:
			
 
				+        ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
			
 
				+    Returns:
			
 
				+        is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
			
 
				+    """
			
 
				+    num_anchors = ious.shape[-1]
			
 
				+    max_iou_index = ious.argmax(axis=-1)
			
 
				+    is_max_iou = F.one_hot(max_iou_index, num_anchors)
			
 
				+    return is_max_iou.astype(ious.dtype)
			
 
				+
			
 
				+
			
 
				+def generate_anchors_for_grid_cell(feats,
			
 
				+                                   fpn_strides,
			
 
				+                                   grid_cell_size=5.0,
			
 
				+                                   grid_cell_offset=0.5):
			
 
				+    r"""
			
 
				+    Like ATSS, generate anchors based on grid size.
			
 
				+    Args:
			
 
				+        feats (List[Tensor]): shape[s, (b, c, h, w)]
			
 
				+        fpn_strides (tuple|list): shape[s], stride for each scale feature
			
 
				+        grid_cell_size (float): anchor size
			
 
				+        grid_cell_offset (float): The range is between 0 and 1.
			
 
				+    Returns:
			
 
				+        anchors (List[Tensor]): shape[s, (l, 4)]
			
 
				+        num_anchors_list (List[int]): shape[s]
			
 
				+        stride_tensor_list (List[Tensor]): shape[s, (l, 1)]
			
 
				+    """
			
 
				+    assert len(feats) == len(fpn_strides)
			
 
				+    anchors = []
			
 
				+    num_anchors_list = []
			
 
				+    stride_tensor_list = []
			
 
				+    for feat, stride in zip(feats, fpn_strides):
			
 
				+        _, _, h, w = feat.shape
			
 
				+        cell_half_size = grid_cell_size * stride * 0.5
			
 
				+        shift_x = (paddle.arange(end=w) + grid_cell_offset) * stride
			
 
				+        shift_y = (paddle.arange(end=h) + grid_cell_offset) * stride
			
 
				+        shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
			
 
				+        anchor = paddle.stack(
			
 
				+            [
			
 
				+                shift_x - cell_half_size, shift_y - cell_half_size,
			
 
				+                shift_x + cell_half_size, shift_y + cell_half_size
			
 
				+            ],
			
 
				+            axis=-1).astype(feat.dtype)
			
 
				+        anchors.append(anchor.reshape([-1, 4]))
			
 
				+        num_anchors_list.append(len(anchors[-1]))
			
 
				+        stride_tensor_list.append(
			
 
				+            paddle.full([num_anchors_list[-1], 1], stride))
			
 
				+    return anchors, num_anchors_list, stride_tensor_list
			
--- a/paddlers/models/ppdet/modeling/backbones/__init__.py
+++ b/paddlers/models/ppdet/modeling/backbones/__init__.py
@@ -0,0 +1,49 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from . import vgg
			
 
				+from . import resnet
			
 
				+from . import darknet
			
 
				+from . import mobilenet_v1
			
 
				+from . import mobilenet_v3
			
 
				+from . import hrnet
			
 
				+from . import lite_hrnet
			
 
				+from . import blazenet
			
 
				+from . import ghostnet
			
 
				+from . import senet
			
 
				+from . import res2net
			
 
				+from . import dla
			
 
				+from . import shufflenet_v2
			
 
				+from . import swin_transformer
			
 
				+from . import lcnet
			
 
				+from . import hardnet
			
 
				+from . import esnet
			
 
				+
			
 
				+from .vgg import *
			
 
				+from .resnet import *
			
 
				+from .darknet import *
			
 
				+from .mobilenet_v1 import *
			
 
				+from .mobilenet_v3 import *
			
 
				+from .hrnet import *
			
 
				+from .lite_hrnet import *
			
 
				+from .blazenet import *
			
 
				+from .ghostnet import *
			
 
				+from .senet import *
			
 
				+from .res2net import *
			
 
				+from .dla import *
			
 
				+from .shufflenet_v2 import *
			
 
				+from .swin_transformer import *
			
 
				+from .lcnet import *
			
 
				+from .hardnet import *
			
 
				+from .esnet import *
			
--- a/paddlers/models/ppdet/modeling/backbones/blazenet.py
+++ b/paddlers/models/ppdet/modeling/backbones/blazenet.py
@@ -0,0 +1,320 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['BlazeNet']
			
 
				+
			
 
				+
			
 
				+def hard_swish(x):
			
 
				+    return x * F.relu6(x + 3) / 6.
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 num_groups=1,
			
 
				+                 act='relu',
			
 
				+                 conv_lr=0.1,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+        self.act = act
			
 
				+        self._conv = nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=num_groups,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                learning_rate=conv_lr, initializer=KaimingNormal()),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        if norm_type in ['bn', 'sync_bn']:
			
 
				+            self._batch_norm = nn.BatchNorm2D(out_channels)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self._conv(x)
			
 
				+        x = self._batch_norm(x)
			
 
				+        if self.act == "relu":
			
 
				+            x = F.relu(x)
			
 
				+        elif self.act == "relu6":
			
 
				+            x = F.relu6(x)
			
 
				+        elif self.act == 'leaky':
			
 
				+            x = F.leaky_relu(x)
			
 
				+        elif self.act == 'hard_swish':
			
 
				+            x = hard_swish(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class BlazeBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels1,
			
 
				+                 out_channels2,
			
 
				+                 double_channels=None,
			
 
				+                 stride=1,
			
 
				+                 use_5x5kernel=True,
			
 
				+                 act='relu',
			
 
				+                 name=None):
			
 
				+        super(BlazeBlock, self).__init__()
			
 
				+        assert stride in [1, 2]
			
 
				+        self.use_pool = not stride == 1
			
 
				+        self.use_double_block = double_channels is not None
			
 
				+        self.conv_dw = []
			
 
				+        if use_5x5kernel:
			
 
				+            self.conv_dw.append(
			
 
				+                self.add_sublayer(
			
 
				+                    name + "1_dw",
			
 
				+                    ConvBNLayer(
			
 
				+                        in_channels=in_channels,
			
 
				+                        out_channels=out_channels1,
			
 
				+                        kernel_size=5,
			
 
				+                        stride=stride,
			
 
				+                        padding=2,
			
 
				+                        num_groups=out_channels1,
			
 
				+                        name=name + "1_dw")))
			
 
				+        else:
			
 
				+            self.conv_dw.append(
			
 
				+                self.add_sublayer(
			
 
				+                    name + "1_dw_1",
			
 
				+                    ConvBNLayer(
			
 
				+                        in_channels=in_channels,
			
 
				+                        out_channels=out_channels1,
			
 
				+                        kernel_size=3,
			
 
				+                        stride=1,
			
 
				+                        padding=1,
			
 
				+                        num_groups=out_channels1,
			
 
				+                        name=name + "1_dw_1")))
			
 
				+            self.conv_dw.append(
			
 
				+                self.add_sublayer(
			
 
				+                    name + "1_dw_2",
			
 
				+                    ConvBNLayer(
			
 
				+                        in_channels=out_channels1,
			
 
				+                        out_channels=out_channels1,
			
 
				+                        kernel_size=3,
			
 
				+                        stride=stride,
			
 
				+                        padding=1,
			
 
				+                        num_groups=out_channels1,
			
 
				+                        name=name + "1_dw_2")))
			
 
				+        self.act = act if self.use_double_block else None
			
 
				+        self.conv_pw = ConvBNLayer(
			
 
				+            in_channels=out_channels1,
			
 
				+            out_channels=out_channels2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            act=self.act,
			
 
				+            name=name + "1_sep")
			
 
				+        if self.use_double_block:
			
 
				+            self.conv_dw2 = []
			
 
				+            if use_5x5kernel:
			
 
				+                self.conv_dw2.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        name + "2_dw",
			
 
				+                        ConvBNLayer(
			
 
				+                            in_channels=out_channels2,
			
 
				+                            out_channels=out_channels2,
			
 
				+                            kernel_size=5,
			
 
				+                            stride=1,
			
 
				+                            padding=2,
			
 
				+                            num_groups=out_channels2,
			
 
				+                            name=name + "2_dw")))
			
 
				+            else:
			
 
				+                self.conv_dw2.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        name + "2_dw_1",
			
 
				+                        ConvBNLayer(
			
 
				+                            in_channels=out_channels2,
			
 
				+                            out_channels=out_channels2,
			
 
				+                            kernel_size=3,
			
 
				+                            stride=1,
			
 
				+                            padding=1,
			
 
				+                            num_groups=out_channels2,
			
 
				+                            name=name + "1_dw_1")))
			
 
				+                self.conv_dw2.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        name + "2_dw_2",
			
 
				+                        ConvBNLayer(
			
 
				+                            in_channels=out_channels2,
			
 
				+                            out_channels=out_channels2,
			
 
				+                            kernel_size=3,
			
 
				+                            stride=1,
			
 
				+                            padding=1,
			
 
				+                            num_groups=out_channels2,
			
 
				+                            name=name + "2_dw_2")))
			
 
				+            self.conv_pw2 = ConvBNLayer(
			
 
				+                in_channels=out_channels2,
			
 
				+                out_channels=double_channels,
			
 
				+                kernel_size=1,
			
 
				+                stride=1,
			
 
				+                padding=0,
			
 
				+                name=name + "2_sep")
			
 
				+        # shortcut
			
 
				+        if self.use_pool:
			
 
				+            shortcut_channel = double_channels or out_channels2
			
 
				+            self._shortcut = []
			
 
				+            self._shortcut.append(
			
 
				+                self.add_sublayer(
			
 
				+                    name + '_shortcut_pool',
			
 
				+                    nn.MaxPool2D(
			
 
				+                        kernel_size=stride, stride=stride, ceil_mode=True)))
			
 
				+            self._shortcut.append(
			
 
				+                self.add_sublayer(
			
 
				+                    name + '_shortcut_conv',
			
 
				+                    ConvBNLayer(
			
 
				+                        in_channels=in_channels,
			
 
				+                        out_channels=shortcut_channel,
			
 
				+                        kernel_size=1,
			
 
				+                        stride=1,
			
 
				+                        padding=0,
			
 
				+                        name="shortcut" + name)))
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        y = x
			
 
				+        for conv_dw_block in self.conv_dw:
			
 
				+            y = conv_dw_block(y)
			
 
				+        y = self.conv_pw(y)
			
 
				+        if self.use_double_block:
			
 
				+            for conv_dw2_block in self.conv_dw2:
			
 
				+                y = conv_dw2_block(y)
			
 
				+            y = self.conv_pw2(y)
			
 
				+        if self.use_pool:
			
 
				+            for shortcut in self._shortcut:
			
 
				+                x = shortcut(x)
			
 
				+        return F.relu(paddle.add(x, y))
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class BlazeNet(nn.Layer):
			
 
				+    """
			
 
				+    BlazeFace, see https://arxiv.org/abs/1907.05047
			
 
				+
			
 
				+    Args:
			
 
				+        blaze_filters (list): number of filter for each blaze block.
			
 
				+        double_blaze_filters (list): number of filter for each double_blaze block.
			
 
				+        use_5x5kernel (bool): whether or not filter size is 5x5 in depth-wise conv.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48],
			
 
				+                                [48, 48]],
			
 
				+                 double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96],
			
 
				+                                       [96, 24, 96], [96, 24, 96, 2],
			
 
				+                                       [96, 24, 96], [96, 24, 96]],
			
 
				+                 use_5x5kernel=True,
			
 
				+                 act=None):
			
 
				+        super(BlazeNet, self).__init__()
			
 
				+        conv1_num_filters = blaze_filters[0][0]
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            in_channels=3,
			
 
				+            out_channels=conv1_num_filters,
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            name="conv1")
			
 
				+        in_channels = conv1_num_filters
			
 
				+        self.blaze_block = []
			
 
				+        self._out_channels = []
			
 
				+        for k, v in enumerate(blaze_filters):
			
 
				+            assert len(v) in [2, 3], \
			
 
				+                "blaze_filters {} not in [2, 3]"
			
 
				+            if len(v) == 2:
			
 
				+                self.blaze_block.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        'blaze_{}'.format(k),
			
 
				+                        BlazeBlock(
			
 
				+                            in_channels,
			
 
				+                            v[0],
			
 
				+                            v[1],
			
 
				+                            use_5x5kernel=use_5x5kernel,
			
 
				+                            act=act,
			
 
				+                            name='blaze_{}'.format(k))))
			
 
				+            elif len(v) == 3:
			
 
				+                self.blaze_block.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        'blaze_{}'.format(k),
			
 
				+                        BlazeBlock(
			
 
				+                            in_channels,
			
 
				+                            v[0],
			
 
				+                            v[1],
			
 
				+                            stride=v[2],
			
 
				+                            use_5x5kernel=use_5x5kernel,
			
 
				+                            act=act,
			
 
				+                            name='blaze_{}'.format(k))))
			
 
				+            in_channels = v[1]
			
 
				+
			
 
				+        for k, v in enumerate(double_blaze_filters):
			
 
				+            assert len(v) in [3, 4], \
			
 
				+                "blaze_filters {} not in [3, 4]"
			
 
				+            if len(v) == 3:
			
 
				+                self.blaze_block.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        'double_blaze_{}'.format(k),
			
 
				+                        BlazeBlock(
			
 
				+                            in_channels,
			
 
				+                            v[0],
			
 
				+                            v[1],
			
 
				+                            double_channels=v[2],
			
 
				+                            use_5x5kernel=use_5x5kernel,
			
 
				+                            act=act,
			
 
				+                            name='double_blaze_{}'.format(k))))
			
 
				+            elif len(v) == 4:
			
 
				+                self.blaze_block.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        'double_blaze_{}'.format(k),
			
 
				+                        BlazeBlock(
			
 
				+                            in_channels,
			
 
				+                            v[0],
			
 
				+                            v[1],
			
 
				+                            double_channels=v[2],
			
 
				+                            stride=v[3],
			
 
				+                            use_5x5kernel=use_5x5kernel,
			
 
				+                            act=act,
			
 
				+                            name='double_blaze_{}'.format(k))))
			
 
				+            in_channels = v[2]
			
 
				+            self._out_channels.append(in_channels)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        outs = []
			
 
				+        y = self.conv1(inputs['image'])
			
 
				+        for block in self.blaze_block:
			
 
				+            y = block(y)
			
 
				+            outs.append(y)
			
 
				+        return [outs[-4], outs[-1]]
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [
			
 
				+            ShapeSpec(channels=c)
			
 
				+            for c in [self._out_channels[-4], self._out_channels[-1]]
			
 
				+        ]
			
--- a/paddlers/models/ppdet/modeling/backbones/darknet.py
+++ b/paddlers/models/ppdet/modeling/backbones/darknet.py
@@ -0,0 +1,340 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from paddlers.models.ppdet.modeling.ops import batch_norm, mish
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['DarkNet', 'ConvBNLayer']
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 filter_size=3,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 padding=0,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 act="leaky",
			
 
				+                 freeze_norm=False,
			
 
				+                 data_format='NCHW',
			
 
				+                 name=''):
			
 
				+        """
			
 
				+        conv + bn + activation layer
			
 
				+
			
 
				+        Args:
			
 
				+            ch_in (int): input channel
			
 
				+            ch_out (int): output channel
			
 
				+            filter_size (int): filter size, default 3
			
 
				+            stride (int): stride, default 1
			
 
				+            groups (int): number of groups of conv layer, default 1
			
 
				+            padding (int): padding size, default 0
			
 
				+            norm_type (str): batch norm type, default bn
			
 
				+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
			
 
				+            act (str): activation function type, default 'leaky', which means leaky_relu
			
 
				+            freeze_norm (bool): whether to freeze norm, default False
			
 
				+            data_format (str): data format, NCHW or NHWC
			
 
				+        """
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=ch_in,
			
 
				+            out_channels=ch_out,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            data_format=data_format,
			
 
				+            bias_attr=False)
			
 
				+        self.batch_norm = batch_norm(
			
 
				+            ch_out,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            data_format=data_format)
			
 
				+        self.act = act
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        out = self.conv(inputs)
			
 
				+        out = self.batch_norm(out)
			
 
				+        if self.act == 'leaky':
			
 
				+            out = F.leaky_relu(out, 0.1)
			
 
				+        elif self.act == 'mish':
			
 
				+            out = mish(out)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class DownSample(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 filter_size=3,
			
 
				+                 stride=2,
			
 
				+                 padding=1,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 data_format='NCHW'):
			
 
				+        """
			
 
				+        downsample layer
			
 
				+
			
 
				+        Args:
			
 
				+            ch_in (int): input channel
			
 
				+            ch_out (int): output channel
			
 
				+            filter_size (int): filter size, default 3
			
 
				+            stride (int): stride, default 2
			
 
				+            padding (int): padding size, default 1
			
 
				+            norm_type (str): batch norm type, default bn
			
 
				+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
			
 
				+            freeze_norm (bool): whether to freeze norm, default False
			
 
				+            data_format (str): data format, NCHW or NHWC
			
 
				+        """
			
 
				+
			
 
				+        super(DownSample, self).__init__()
			
 
				+
			
 
				+        self.conv_bn_layer = ConvBNLayer(
			
 
				+            ch_in=ch_in,
			
 
				+            ch_out=ch_out,
			
 
				+            filter_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            data_format=data_format)
			
 
				+        self.ch_out = ch_out
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        out = self.conv_bn_layer(inputs)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 data_format='NCHW'):
			
 
				+        """
			
 
				+        BasicBlock layer of DarkNet
			
 
				+
			
 
				+        Args:
			
 
				+            ch_in (int): input channel
			
 
				+            ch_out (int): output channel
			
 
				+            norm_type (str): batch norm type, default bn
			
 
				+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
			
 
				+            freeze_norm (bool): whether to freeze norm, default False
			
 
				+            data_format (str): data format, NCHW or NHWC
			
 
				+        """
			
 
				+
			
 
				+        super(BasicBlock, self).__init__()
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            ch_in=ch_in,
			
 
				+            ch_out=ch_out,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            data_format=data_format)
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            ch_in=ch_out,
			
 
				+            ch_out=ch_out * 2,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            padding=1,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            data_format=data_format)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        conv1 = self.conv1(inputs)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+        out = paddle.add(x=inputs, y=conv2)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class Blocks(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 count,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 name=None,
			
 
				+                 data_format='NCHW'):
			
 
				+        """
			
 
				+        Blocks layer, which consist of some BaickBlock layers
			
 
				+
			
 
				+        Args:
			
 
				+            ch_in (int): input channel
			
 
				+            ch_out (int): output channel
			
 
				+            count (int): number of BasicBlock layer
			
 
				+            norm_type (str): batch norm type, default bn
			
 
				+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
			
 
				+            freeze_norm (bool): whether to freeze norm, default False
			
 
				+            name (str): layer name
			
 
				+            data_format (str): data format, NCHW or NHWC
			
 
				+        """
			
 
				+        super(Blocks, self).__init__()
			
 
				+
			
 
				+        self.basicblock0 = BasicBlock(
			
 
				+            ch_in,
			
 
				+            ch_out,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            data_format=data_format)
			
 
				+        self.res_out_list = []
			
 
				+        for i in range(1, count):
			
 
				+            block_name = '{}.{}'.format(name, i)
			
 
				+            res_out = self.add_sublayer(
			
 
				+                block_name,
			
 
				+                BasicBlock(
			
 
				+                    ch_out * 2,
			
 
				+                    ch_out,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    data_format=data_format))
			
 
				+            self.res_out_list.append(res_out)
			
 
				+        self.ch_out = ch_out
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.basicblock0(inputs)
			
 
				+        for basic_block_i in self.res_out_list:
			
 
				+            y = basic_block_i(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class DarkNet(nn.Layer):
			
 
				+    __shared__ = ['norm_type', 'data_format']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 depth=53,
			
 
				+                 freeze_at=-1,
			
 
				+                 return_idx=[2, 3, 4],
			
 
				+                 num_stages=5,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 data_format='NCHW'):
			
 
				+        """
			
 
				+        Darknet, see https://pjreddie.com/darknet/yolo/
			
 
				+
			
 
				+        Args:
			
 
				+            depth (int): depth of network
			
 
				+            freeze_at (int): freeze the backbone at which stage
			
 
				+            filter_size (int): filter size, default 3
			
 
				+            return_idx (list): index of stages whose feature maps are returned
			
 
				+            norm_type (str): batch norm type, default bn
			
 
				+            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
			
 
				+            data_format (str): data format, NCHW or NHWC
			
 
				+        """
			
 
				+        super(DarkNet, self).__init__()
			
 
				+        self.depth = depth
			
 
				+        self.freeze_at = freeze_at
			
 
				+        self.return_idx = return_idx
			
 
				+        self.num_stages = num_stages
			
 
				+        self.stages = DarkNet_cfg[self.depth][0:num_stages]
			
 
				+
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            ch_in=3,
			
 
				+            ch_out=32,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            padding=1,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            data_format=data_format)
			
 
				+
			
 
				+        self.downsample0 = DownSample(
			
 
				+            ch_in=32,
			
 
				+            ch_out=32 * 2,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            data_format=data_format)
			
 
				+
			
 
				+        self._out_channels = []
			
 
				+        self.darknet_conv_block_list = []
			
 
				+        self.downsample_list = []
			
 
				+        ch_in = [64, 128, 256, 512, 1024]
			
 
				+        for i, stage in enumerate(self.stages):
			
 
				+            name = 'stage.{}'.format(i)
			
 
				+            conv_block = self.add_sublayer(
			
 
				+                name,
			
 
				+                Blocks(
			
 
				+                    int(ch_in[i]),
			
 
				+                    32 * (2**i),
			
 
				+                    stage,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    data_format=data_format,
			
 
				+                    name=name))
			
 
				+            self.darknet_conv_block_list.append(conv_block)
			
 
				+            if i in return_idx:
			
 
				+                self._out_channels.append(64 * (2**i))
			
 
				+        for i in range(num_stages - 1):
			
 
				+            down_name = 'stage.{}.downsample'.format(i)
			
 
				+            downsample = self.add_sublayer(
			
 
				+                down_name,
			
 
				+                DownSample(
			
 
				+                    ch_in=32 * (2**(i + 1)),
			
 
				+                    ch_out=32 * (2**(i + 2)),
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    data_format=data_format))
			
 
				+            self.downsample_list.append(downsample)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+
			
 
				+        out = self.conv0(x)
			
 
				+        out = self.downsample0(out)
			
 
				+        blocks = []
			
 
				+        for i, conv_block_i in enumerate(self.darknet_conv_block_list):
			
 
				+            out = conv_block_i(out)
			
 
				+            if i == self.freeze_at:
			
 
				+                out.stop_gradient = True
			
 
				+            if i in self.return_idx:
			
 
				+                blocks.append(out)
			
 
				+            if i < self.num_stages - 1:
			
 
				+                out = self.downsample_list[i](out)
			
 
				+        return blocks
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=c) for c in self._out_channels]
			
--- a/paddlers/models/ppdet/modeling/backbones/dla.py
+++ b/paddlers/models/ppdet/modeling/backbones/dla.py
@@ -0,0 +1,244 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from paddlers.models.ppdet.modeling.layers import ConvNormLayer
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+DLA_cfg = {34: ([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512])}
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+    def __init__(self, ch_in, ch_out, stride=1):
			
 
				+        super(BasicBlock, self).__init__()
			
 
				+        self.conv1 = ConvNormLayer(
			
 
				+            ch_in,
			
 
				+            ch_out,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            bias_on=False,
			
 
				+            norm_decay=None)
			
 
				+        self.conv2 = ConvNormLayer(
			
 
				+            ch_out,
			
 
				+            ch_out,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            bias_on=False,
			
 
				+            norm_decay=None)
			
 
				+
			
 
				+    def forward(self, inputs, residual=None):
			
 
				+        if residual is None:
			
 
				+            residual = inputs
			
 
				+
			
 
				+        out = self.conv1(inputs)
			
 
				+        out = F.relu(out)
			
 
				+
			
 
				+        out = self.conv2(out)
			
 
				+
			
 
				+        out = paddle.add(x=out, y=residual)
			
 
				+        out = F.relu(out)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class Root(nn.Layer):
			
 
				+    def __init__(self, ch_in, ch_out, kernel_size, residual):
			
 
				+        super(Root, self).__init__()
			
 
				+        self.conv = ConvNormLayer(
			
 
				+            ch_in,
			
 
				+            ch_out,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            bias_on=False,
			
 
				+            norm_decay=None)
			
 
				+        self.residual = residual
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        children = inputs
			
 
				+        out = self.conv(paddle.concat(inputs, axis=1))
			
 
				+        if self.residual:
			
 
				+            out = paddle.add(x=out, y=children[0])
			
 
				+        out = F.relu(out)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class Tree(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 level,
			
 
				+                 block,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 stride=1,
			
 
				+                 level_root=False,
			
 
				+                 root_dim=0,
			
 
				+                 root_kernel_size=1,
			
 
				+                 root_residual=False):
			
 
				+        super(Tree, self).__init__()
			
 
				+        if root_dim == 0:
			
 
				+            root_dim = 2 * ch_out
			
 
				+        if level_root:
			
 
				+            root_dim += ch_in
			
 
				+        if level == 1:
			
 
				+            self.tree1 = block(ch_in, ch_out, stride)
			
 
				+            self.tree2 = block(ch_out, ch_out, 1)
			
 
				+        else:
			
 
				+            self.tree1 = Tree(
			
 
				+                level - 1,
			
 
				+                block,
			
 
				+                ch_in,
			
 
				+                ch_out,
			
 
				+                stride,
			
 
				+                root_dim=0,
			
 
				+                root_kernel_size=root_kernel_size,
			
 
				+                root_residual=root_residual)
			
 
				+            self.tree2 = Tree(
			
 
				+                level - 1,
			
 
				+                block,
			
 
				+                ch_out,
			
 
				+                ch_out,
			
 
				+                1,
			
 
				+                root_dim=root_dim + ch_out,
			
 
				+                root_kernel_size=root_kernel_size,
			
 
				+                root_residual=root_residual)
			
 
				+
			
 
				+        if level == 1:
			
 
				+            self.root = Root(root_dim, ch_out, root_kernel_size, root_residual)
			
 
				+        self.level_root = level_root
			
 
				+        self.root_dim = root_dim
			
 
				+        self.downsample = None
			
 
				+        self.project = None
			
 
				+        self.level = level
			
 
				+        if stride > 1:
			
 
				+            self.downsample = nn.MaxPool2D(stride, stride=stride)
			
 
				+        if ch_in != ch_out:
			
 
				+            self.project = ConvNormLayer(
			
 
				+                ch_in,
			
 
				+                ch_out,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                bias_on=False,
			
 
				+                norm_decay=None)
			
 
				+
			
 
				+    def forward(self, x, residual=None, children=None):
			
 
				+        children = [] if children is None else children
			
 
				+        bottom = self.downsample(x) if self.downsample else x
			
 
				+        residual = self.project(bottom) if self.project else bottom
			
 
				+        if self.level_root:
			
 
				+            children.append(bottom)
			
 
				+        x1 = self.tree1(x, residual)
			
 
				+        if self.level == 1:
			
 
				+            x2 = self.tree2(x1)
			
 
				+            x = self.root([x2, x1] + children)
			
 
				+        else:
			
 
				+            children.append(x1)
			
 
				+            x = self.tree2(x1, children=children)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class DLA(nn.Layer):
			
 
				+    """
			
 
				+    DLA, see https://arxiv.org/pdf/1707.06484.pdf
			
 
				+
			
 
				+    Args:
			
 
				+        depth (int): DLA depth, should be 34.
			
 
				+        residual_root (bool): whether use a reidual layer in the root block
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, depth=34, residual_root=False):
			
 
				+        super(DLA, self).__init__()
			
 
				+        levels, channels = DLA_cfg[depth]
			
 
				+        if depth == 34:
			
 
				+            block = BasicBlock
			
 
				+        self.channels = channels
			
 
				+        self.base_layer = nn.Sequential(
			
 
				+            ConvNormLayer(
			
 
				+                3,
			
 
				+                channels[0],
			
 
				+                filter_size=7,
			
 
				+                stride=1,
			
 
				+                bias_on=False,
			
 
				+                norm_decay=None),
			
 
				+            nn.ReLU())
			
 
				+        self.level0 = self._make_conv_level(channels[0], channels[0],
			
 
				+                                            levels[0])
			
 
				+        self.level1 = self._make_conv_level(
			
 
				+            channels[0], channels[1], levels[1], stride=2)
			
 
				+        self.level2 = Tree(
			
 
				+            levels[2],
			
 
				+            block,
			
 
				+            channels[1],
			
 
				+            channels[2],
			
 
				+            2,
			
 
				+            level_root=False,
			
 
				+            root_residual=residual_root)
			
 
				+        self.level3 = Tree(
			
 
				+            levels[3],
			
 
				+            block,
			
 
				+            channels[2],
			
 
				+            channels[3],
			
 
				+            2,
			
 
				+            level_root=True,
			
 
				+            root_residual=residual_root)
			
 
				+        self.level4 = Tree(
			
 
				+            levels[4],
			
 
				+            block,
			
 
				+            channels[3],
			
 
				+            channels[4],
			
 
				+            2,
			
 
				+            level_root=True,
			
 
				+            root_residual=residual_root)
			
 
				+        self.level5 = Tree(
			
 
				+            levels[5],
			
 
				+            block,
			
 
				+            channels[4],
			
 
				+            channels[5],
			
 
				+            2,
			
 
				+            level_root=True,
			
 
				+            root_residual=residual_root)
			
 
				+
			
 
				+    def _make_conv_level(self, ch_in, ch_out, conv_num, stride=1):
			
 
				+        modules = []
			
 
				+        for i in range(conv_num):
			
 
				+            modules.extend([
			
 
				+                ConvNormLayer(
			
 
				+                    ch_in,
			
 
				+                    ch_out,
			
 
				+                    filter_size=3,
			
 
				+                    stride=stride if i == 0 else 1,
			
 
				+                    bias_on=False,
			
 
				+                    norm_decay=None), nn.ReLU()
			
 
				+            ])
			
 
				+            ch_in = ch_out
			
 
				+        return nn.Sequential(*modules)
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=self.channels[i]) for i in range(6)]
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        outs = []
			
 
				+        im = inputs['image']
			
 
				+        feats = self.base_layer(im)
			
 
				+        for i in range(6):
			
 
				+            feats = getattr(self, 'level{}'.format(i))(feats)
			
 
				+            outs.append(feats)
			
 
				+
			
 
				+        return outs
			
--- a/paddlers/models/ppdet/modeling/backbones/esnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/esnet.py
@@ -0,0 +1,290 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+from paddle.regularizer import L2Decay
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from numbers import Integral
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+from paddlers.models.ppdet.modeling.ops import channel_shuffle
			
 
				+from paddlers.models.ppdet.modeling.backbones.shufflenet_v2 import ConvBNLayer
			
 
				+
			
 
				+__all__ = ['ESNet']
			
 
				+
			
 
				+
			
 
				+def make_divisible(v, divisor=16, min_value=None):
			
 
				+    if min_value is None:
			
 
				+        min_value = divisor
			
 
				+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
			
 
				+    if new_v < 0.9 * v:
			
 
				+        new_v += divisor
			
 
				+    return new_v
			
 
				+
			
 
				+
			
 
				+class SEModule(nn.Layer):
			
 
				+    def __init__(self, channel, reduction=4):
			
 
				+        super(SEModule, self).__init__()
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self.conv1 = Conv2D(
			
 
				+            in_channels=channel,
			
 
				+            out_channels=channel // reduction,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            weight_attr=ParamAttr(),
			
 
				+            bias_attr=ParamAttr())
			
 
				+        self.conv2 = Conv2D(
			
 
				+            in_channels=channel // reduction,
			
 
				+            out_channels=channel,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            weight_attr=ParamAttr(),
			
 
				+            bias_attr=ParamAttr())
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        outputs = self.avg_pool(inputs)
			
 
				+        outputs = self.conv1(outputs)
			
 
				+        outputs = F.relu(outputs)
			
 
				+        outputs = self.conv2(outputs)
			
 
				+        outputs = F.hardsigmoid(outputs)
			
 
				+        return paddle.multiply(x=inputs, y=outputs)
			
 
				+
			
 
				+
			
 
				+class InvertedResidual(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 mid_channels,
			
 
				+                 out_channels,
			
 
				+                 stride,
			
 
				+                 act="relu"):
			
 
				+        super(InvertedResidual, self).__init__()
			
 
				+        self._conv_pw = ConvBNLayer(
			
 
				+            in_channels=in_channels // 2,
			
 
				+            out_channels=mid_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+        self._conv_dw = ConvBNLayer(
			
 
				+            in_channels=mid_channels // 2,
			
 
				+            out_channels=mid_channels // 2,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=mid_channels // 2,
			
 
				+            act=None)
			
 
				+        self._se = SEModule(mid_channels)
			
 
				+
			
 
				+        self._conv_linear = ConvBNLayer(
			
 
				+            in_channels=mid_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x1, x2 = paddle.split(
			
 
				+            inputs,
			
 
				+            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
			
 
				+            axis=1)
			
 
				+        x2 = self._conv_pw(x2)
			
 
				+        x3 = self._conv_dw(x2)
			
 
				+        x3 = paddle.concat([x2, x3], axis=1)
			
 
				+        x3 = self._se(x3)
			
 
				+        x3 = self._conv_linear(x3)
			
 
				+        out = paddle.concat([x1, x3], axis=1)
			
 
				+        return channel_shuffle(out, 2)
			
 
				+
			
 
				+
			
 
				+class InvertedResidualDS(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 mid_channels,
			
 
				+                 out_channels,
			
 
				+                 stride,
			
 
				+                 act="relu"):
			
 
				+        super(InvertedResidualDS, self).__init__()
			
 
				+
			
 
				+        # branch1
			
 
				+        self._conv_dw_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=in_channels,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=in_channels,
			
 
				+            act=None)
			
 
				+        self._conv_linear_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+        # branch2
			
 
				+        self._conv_pw_2 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=mid_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+        self._conv_dw_2 = ConvBNLayer(
			
 
				+            in_channels=mid_channels // 2,
			
 
				+            out_channels=mid_channels // 2,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=mid_channels // 2,
			
 
				+            act=None)
			
 
				+        self._se = SEModule(mid_channels // 2)
			
 
				+        self._conv_linear_2 = ConvBNLayer(
			
 
				+            in_channels=mid_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+        self._conv_dw_mv1 = ConvBNLayer(
			
 
				+            in_channels=out_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=3,
			
 
				+            stride=1,
			
 
				+            padding=1,
			
 
				+            groups=out_channels,
			
 
				+            act="hard_swish")
			
 
				+        self._conv_pw_mv1 = ConvBNLayer(
			
 
				+            in_channels=out_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act="hard_swish")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x1 = self._conv_dw_1(inputs)
			
 
				+        x1 = self._conv_linear_1(x1)
			
 
				+        x2 = self._conv_pw_2(inputs)
			
 
				+        x2 = self._conv_dw_2(x2)
			
 
				+        x2 = self._se(x2)
			
 
				+        x2 = self._conv_linear_2(x2)
			
 
				+        out = paddle.concat([x1, x2], axis=1)
			
 
				+        out = self._conv_dw_mv1(out)
			
 
				+        out = self._conv_pw_mv1(out)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class ESNet(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 scale=1.0,
			
 
				+                 act="hard_swish",
			
 
				+                 feature_maps=[4, 11, 14],
			
 
				+                 channel_ratio=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]):
			
 
				+        super(ESNet, self).__init__()
			
 
				+        self.scale = scale
			
 
				+        if isinstance(feature_maps, Integral):
			
 
				+            feature_maps = [feature_maps]
			
 
				+        self.feature_maps = feature_maps
			
 
				+        stage_repeats = [3, 7, 3]
			
 
				+
			
 
				+        stage_out_channels = [
			
 
				+            -1, 24, make_divisible(128 * scale), make_divisible(256 * scale),
			
 
				+            make_divisible(512 * scale), 1024
			
 
				+        ]
			
 
				+
			
 
				+        self._out_channels = []
			
 
				+        self._feature_idx = 0
			
 
				+        # 1. conv1
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            in_channels=3,
			
 
				+            out_channels=stage_out_channels[1],
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            act=act)
			
 
				+        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+        self._feature_idx += 1
			
 
				+
			
 
				+        # 2. bottleneck sequences
			
 
				+        self._block_list = []
			
 
				+        arch_idx = 0
			
 
				+        for stage_id, num_repeat in enumerate(stage_repeats):
			
 
				+            for i in range(num_repeat):
			
 
				+                channels_scales = channel_ratio[arch_idx]
			
 
				+                mid_c = make_divisible(
			
 
				+                    int(stage_out_channels[stage_id + 2] * channels_scales),
			
 
				+                    divisor=8)
			
 
				+                if i == 0:
			
 
				+                    block = self.add_sublayer(
			
 
				+                        name=str(stage_id + 2) + '_' + str(i + 1),
			
 
				+                        sublayer=InvertedResidualDS(
			
 
				+                            in_channels=stage_out_channels[stage_id + 1],
			
 
				+                            mid_channels=mid_c,
			
 
				+                            out_channels=stage_out_channels[stage_id + 2],
			
 
				+                            stride=2,
			
 
				+                            act=act))
			
 
				+                else:
			
 
				+                    block = self.add_sublayer(
			
 
				+                        name=str(stage_id + 2) + '_' + str(i + 1),
			
 
				+                        sublayer=InvertedResidual(
			
 
				+                            in_channels=stage_out_channels[stage_id + 2],
			
 
				+                            mid_channels=mid_c,
			
 
				+                            out_channels=stage_out_channels[stage_id + 2],
			
 
				+                            stride=1,
			
 
				+                            act=act))
			
 
				+                self._block_list.append(block)
			
 
				+                arch_idx += 1
			
 
				+                self._feature_idx += 1
			
 
				+                self._update_out_channels(stage_out_channels[stage_id + 2],
			
 
				+                                          self._feature_idx, self.feature_maps)
			
 
				+
			
 
				+    def _update_out_channels(self, channel, feature_idx, feature_maps):
			
 
				+        if feature_idx in feature_maps:
			
 
				+            self._out_channels.append(channel)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv1(inputs['image'])
			
 
				+        y = self._max_pool(y)
			
 
				+        outs = []
			
 
				+        for i, inv in enumerate(self._block_list):
			
 
				+            y = inv(y)
			
 
				+            if i + 2 in self.feature_maps:
			
 
				+                outs.append(y)
			
 
				+
			
 
				+        return outs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=c) for c in self._out_channels]
			
--- a/paddlers/models/ppdet/modeling/backbones/ghostnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/ghostnet.py
@@ -0,0 +1,470 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import math
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import AdaptiveAvgPool2D, Linear
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from numbers import Integral
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+from .mobilenet_v3 import make_divisible, ConvBNLayer
			
 
				+
			
 
				+__all__ = ['GhostNet']
			
 
				+
			
 
				+
			
 
				+class ExtraBlockDW(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_c,
			
 
				+                 ch_1,
			
 
				+                 ch_2,
			
 
				+                 stride,
			
 
				+                 lr_mult,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 name=None):
			
 
				+        super(ExtraBlockDW, self).__init__()
			
 
				+        self.pointwise_conv = ConvBNLayer(
			
 
				+            in_c=in_c,
			
 
				+            out_c=ch_1,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            act='relu6',
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_extra1")
			
 
				+        self.depthwise_conv = ConvBNLayer(
			
 
				+            in_c=ch_1,
			
 
				+            out_c=ch_2,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,  #
			
 
				+            num_groups=int(ch_1),
			
 
				+            act='relu6',
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_extra2_dw")
			
 
				+        self.normal_conv = ConvBNLayer(
			
 
				+            in_c=ch_2,
			
 
				+            out_c=ch_2,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            act='relu6',
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_extra2_sep")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.pointwise_conv(inputs)
			
 
				+        x = self.depthwise_conv(x)
			
 
				+        x = self.normal_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SEBlock(nn.Layer):
			
 
				+    def __init__(self, num_channels, lr_mult, reduction_ratio=4, name=None):
			
 
				+        super(SEBlock, self).__init__()
			
 
				+        self.pool2d_gap = AdaptiveAvgPool2D(1)
			
 
				+        self._num_channels = num_channels
			
 
				+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
			
 
				+        med_ch = num_channels // reduction_ratio
			
 
				+        self.squeeze = Linear(
			
 
				+            num_channels,
			
 
				+            med_ch,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(learning_rate=lr_mult))
			
 
				+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
			
 
				+        self.excitation = Linear(
			
 
				+            med_ch,
			
 
				+            num_channels,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(learning_rate=lr_mult))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        pool = self.pool2d_gap(inputs)
			
 
				+        pool = paddle.squeeze(pool, axis=[2, 3])
			
 
				+        squeeze = self.squeeze(pool)
			
 
				+        squeeze = F.relu(squeeze)
			
 
				+        excitation = self.excitation(squeeze)
			
 
				+        excitation = paddle.clip(x=excitation, min=0, max=1)
			
 
				+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
			
 
				+        out = paddle.multiply(inputs, excitation)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class GhostModule(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 output_channels,
			
 
				+                 kernel_size=1,
			
 
				+                 ratio=2,
			
 
				+                 dw_size=3,
			
 
				+                 stride=1,
			
 
				+                 relu=True,
			
 
				+                 lr_mult=1.,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 name=None):
			
 
				+        super(GhostModule, self).__init__()
			
 
				+        init_channels = int(math.ceil(output_channels / ratio))
			
 
				+        new_channels = int(init_channels * (ratio - 1))
			
 
				+        self.primary_conv = ConvBNLayer(
			
 
				+            in_c=in_channels,
			
 
				+            out_c=init_channels,
			
 
				+            filter_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=int((kernel_size - 1) // 2),
			
 
				+            num_groups=1,
			
 
				+            act="relu" if relu else None,
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_primary_conv")
			
 
				+        self.cheap_operation = ConvBNLayer(
			
 
				+            in_c=init_channels,
			
 
				+            out_c=new_channels,
			
 
				+            filter_size=dw_size,
			
 
				+            stride=1,
			
 
				+            padding=int((dw_size - 1) // 2),
			
 
				+            num_groups=init_channels,
			
 
				+            act="relu" if relu else None,
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_cheap_operation")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.primary_conv(inputs)
			
 
				+        y = self.cheap_operation(x)
			
 
				+        out = paddle.concat([x, y], axis=1)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class GhostBottleneck(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 hidden_dim,
			
 
				+                 output_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 use_se,
			
 
				+                 lr_mult,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 return_list=False,
			
 
				+                 name=None):
			
 
				+        super(GhostBottleneck, self).__init__()
			
 
				+        self._stride = stride
			
 
				+        self._use_se = use_se
			
 
				+        self._num_channels = in_channels
			
 
				+        self._output_channels = output_channels
			
 
				+        self.return_list = return_list
			
 
				+
			
 
				+        self.ghost_module_1 = GhostModule(
			
 
				+            in_channels=in_channels,
			
 
				+            output_channels=hidden_dim,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            relu=True,
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_ghost_module_1")
			
 
				+        if stride == 2:
			
 
				+            self.depthwise_conv = ConvBNLayer(
			
 
				+                in_c=hidden_dim,
			
 
				+                out_c=hidden_dim,
			
 
				+                filter_size=kernel_size,
			
 
				+                stride=stride,
			
 
				+                padding=int((kernel_size - 1) // 2),
			
 
				+                num_groups=hidden_dim,
			
 
				+                act=None,
			
 
				+                lr_mult=lr_mult,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                norm_decay=norm_decay,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                name=name +
			
 
				+                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
			
 
				+            )
			
 
				+        if use_se:
			
 
				+            self.se_block = SEBlock(hidden_dim, lr_mult, name=name + "_se")
			
 
				+        self.ghost_module_2 = GhostModule(
			
 
				+            in_channels=hidden_dim,
			
 
				+            output_channels=output_channels,
			
 
				+            kernel_size=1,
			
 
				+            relu=False,
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_ghost_module_2")
			
 
				+        if stride != 1 or in_channels != output_channels:
			
 
				+            self.shortcut_depthwise = ConvBNLayer(
			
 
				+                in_c=in_channels,
			
 
				+                out_c=in_channels,
			
 
				+                filter_size=kernel_size,
			
 
				+                stride=stride,
			
 
				+                padding=int((kernel_size - 1) // 2),
			
 
				+                num_groups=in_channels,
			
 
				+                act=None,
			
 
				+                lr_mult=lr_mult,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                norm_decay=norm_decay,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                name=name +
			
 
				+                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
			
 
				+            )
			
 
				+            self.shortcut_conv = ConvBNLayer(
			
 
				+                in_c=in_channels,
			
 
				+                out_c=output_channels,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                padding=0,
			
 
				+                num_groups=1,
			
 
				+                act=None,
			
 
				+                lr_mult=lr_mult,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                norm_decay=norm_decay,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                name=name + "_shortcut_conv")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.ghost_module_1(inputs)
			
 
				+        x = y
			
 
				+        if self._stride == 2:
			
 
				+            x = self.depthwise_conv(x)
			
 
				+        if self._use_se:
			
 
				+            x = self.se_block(x)
			
 
				+        x = self.ghost_module_2(x)
			
 
				+
			
 
				+        if self._stride == 1 and self._num_channels == self._output_channels:
			
 
				+            shortcut = inputs
			
 
				+        else:
			
 
				+            shortcut = self.shortcut_depthwise(inputs)
			
 
				+            shortcut = self.shortcut_conv(shortcut)
			
 
				+        x = paddle.add(x=x, y=shortcut)
			
 
				+
			
 
				+        if self.return_list:
			
 
				+            return [y, x]
			
 
				+        else:
			
 
				+            return x
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class GhostNet(nn.Layer):
			
 
				+    __shared__ = ['norm_type']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 scale=1.3,
			
 
				+                 feature_maps=[6, 12, 15],
			
 
				+                 with_extra_blocks=False,
			
 
				+                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
			
 
				+                                      [64, 128]],
			
 
				+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.0,
			
 
				+                 freeze_norm=False):
			
 
				+        super(GhostNet, self).__init__()
			
 
				+        if isinstance(feature_maps, Integral):
			
 
				+            feature_maps = [feature_maps]
			
 
				+        if norm_type == 'sync_bn' and freeze_norm:
			
 
				+            raise ValueError(
			
 
				+                "The norm_type should not be sync_bn when freeze_norm is True")
			
 
				+        self.feature_maps = feature_maps
			
 
				+        self.with_extra_blocks = with_extra_blocks
			
 
				+        self.extra_block_filters = extra_block_filters
			
 
				+
			
 
				+        inplanes = 16
			
 
				+        self.cfgs = [
			
 
				+            # k, t, c, SE, s
			
 
				+            [3, 16, 16, 0, 1],
			
 
				+            [3, 48, 24, 0, 2],
			
 
				+            [3, 72, 24, 0, 1],
			
 
				+            [5, 72, 40, 1, 2],
			
 
				+            [5, 120, 40, 1, 1],
			
 
				+            [3, 240, 80, 0, 2],
			
 
				+            [3, 200, 80, 0, 1],
			
 
				+            [3, 184, 80, 0, 1],
			
 
				+            [3, 184, 80, 0, 1],
			
 
				+            [3, 480, 112, 1, 1],
			
 
				+            [3, 672, 112, 1, 1],
			
 
				+            [5, 672, 160, 1, 2],  # SSDLite output
			
 
				+            [5, 960, 160, 0, 1],
			
 
				+            [5, 960, 160, 1, 1],
			
 
				+            [5, 960, 160, 0, 1],
			
 
				+            [5, 960, 160, 1, 1]
			
 
				+        ]
			
 
				+        self.scale = scale
			
 
				+        conv1_out_ch = int(make_divisible(inplanes * self.scale, 4))
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            in_c=3,
			
 
				+            out_c=conv1_out_ch,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            num_groups=1,
			
 
				+            act="relu",
			
 
				+            lr_mult=1.,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name="conv1")
			
 
				+
			
 
				+        # build inverted residual blocks
			
 
				+        self._out_channels = []
			
 
				+        self.ghost_bottleneck_list = []
			
 
				+        idx = 0
			
 
				+        inplanes = conv1_out_ch
			
 
				+        for k, exp_size, c, use_se, s in self.cfgs:
			
 
				+            lr_idx = min(idx // 3, len(lr_mult_list) - 1)
			
 
				+            lr_mult = lr_mult_list[lr_idx]
			
 
				+
			
 
				+            # for SSD/SSDLite, first head input is after ResidualUnit expand_conv
			
 
				+            return_list = self.with_extra_blocks and idx + 2 in self.feature_maps
			
 
				+
			
 
				+            ghost_bottleneck = self.add_sublayer(
			
 
				+                "_ghostbottleneck_" + str(idx),
			
 
				+                sublayer=GhostBottleneck(
			
 
				+                    in_channels=inplanes,
			
 
				+                    hidden_dim=int(make_divisible(exp_size * self.scale, 4)),
			
 
				+                    output_channels=int(make_divisible(c * self.scale, 4)),
			
 
				+                    kernel_size=k,
			
 
				+                    stride=s,
			
 
				+                    use_se=use_se,
			
 
				+                    lr_mult=lr_mult,
			
 
				+                    conv_decay=conv_decay,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    return_list=return_list,
			
 
				+                    name="_ghostbottleneck_" + str(idx)))
			
 
				+            self.ghost_bottleneck_list.append(ghost_bottleneck)
			
 
				+            inplanes = int(make_divisible(c * self.scale, 4))
			
 
				+            idx += 1
			
 
				+            self._update_out_channels(
			
 
				+                int(make_divisible(exp_size * self.scale, 4))
			
 
				+                if return_list else inplanes, idx + 1, feature_maps)
			
 
				+
			
 
				+        if self.with_extra_blocks:
			
 
				+            self.extra_block_list = []
			
 
				+            extra_out_c = int(make_divisible(self.scale * self.cfgs[-1][1], 4))
			
 
				+            lr_idx = min(idx // 3, len(lr_mult_list) - 1)
			
 
				+            lr_mult = lr_mult_list[lr_idx]
			
 
				+
			
 
				+            conv_extra = self.add_sublayer(
			
 
				+                "conv" + str(idx + 2),
			
 
				+                sublayer=ConvBNLayer(
			
 
				+                    in_c=inplanes,
			
 
				+                    out_c=extra_out_c,
			
 
				+                    filter_size=1,
			
 
				+                    stride=1,
			
 
				+                    padding=0,
			
 
				+                    num_groups=1,
			
 
				+                    act="relu6",
			
 
				+                    lr_mult=lr_mult,
			
 
				+                    conv_decay=conv_decay,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    name="conv" + str(idx + 2)))
			
 
				+            self.extra_block_list.append(conv_extra)
			
 
				+            idx += 1
			
 
				+            self._update_out_channels(extra_out_c, idx + 1, feature_maps)
			
 
				+
			
 
				+            for j, block_filter in enumerate(self.extra_block_filters):
			
 
				+                in_c = extra_out_c if j == 0 else self.extra_block_filters[
			
 
				+                    j - 1][1]
			
 
				+                conv_extra = self.add_sublayer(
			
 
				+                    "conv" + str(idx + 2),
			
 
				+                    sublayer=ExtraBlockDW(
			
 
				+                        in_c,
			
 
				+                        block_filter[0],
			
 
				+                        block_filter[1],
			
 
				+                        stride=2,
			
 
				+                        lr_mult=lr_mult,
			
 
				+                        conv_decay=conv_decay,
			
 
				+                        norm_type=norm_type,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        name='conv' + str(idx + 2)))
			
 
				+                self.extra_block_list.append(conv_extra)
			
 
				+                idx += 1
			
 
				+                self._update_out_channels(block_filter[1], idx + 1,
			
 
				+                                          feature_maps)
			
 
				+
			
 
				+    def _update_out_channels(self, channel, feature_idx, feature_maps):
			
 
				+        if feature_idx in feature_maps:
			
 
				+            self._out_channels.append(channel)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.conv1(inputs['image'])
			
 
				+        outs = []
			
 
				+        for idx, ghost_bottleneck in enumerate(self.ghost_bottleneck_list):
			
 
				+            x = ghost_bottleneck(x)
			
 
				+            if idx + 2 in self.feature_maps:
			
 
				+                if isinstance(x, list):
			
 
				+                    outs.append(x[0])
			
 
				+                    x = x[1]
			
 
				+                else:
			
 
				+                    outs.append(x)
			
 
				+
			
 
				+        if not self.with_extra_blocks:
			
 
				+            return outs
			
 
				+
			
 
				+        for i, block in enumerate(self.extra_block_list):
			
 
				+            idx = i + len(self.ghost_bottleneck_list)
			
 
				+            x = block(x)
			
 
				+            if idx + 2 in self.feature_maps:
			
 
				+                outs.append(x)
			
 
				+        return outs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=c) for c in self._out_channels]
			
--- a/paddlers/models/ppdet/modeling/backbones/hardnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/hardnet.py
@@ -0,0 +1,224 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['HarDNet']
			
 
				+
			
 
				+
			
 
				+def ConvLayer(in_channels,
			
 
				+              out_channels,
			
 
				+              kernel_size=3,
			
 
				+              stride=1,
			
 
				+              bias_attr=False):
			
 
				+    layer = nn.Sequential(
			
 
				+        ('conv', nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=kernel_size // 2,
			
 
				+            groups=1,
			
 
				+            bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)),
			
 
				+        ('relu', nn.ReLU6()))
			
 
				+    return layer
			
 
				+
			
 
				+
			
 
				+def DWConvLayer(in_channels,
			
 
				+                out_channels,
			
 
				+                kernel_size=3,
			
 
				+                stride=1,
			
 
				+                bias_attr=False):
			
 
				+    layer = nn.Sequential(
			
 
				+        ('dwconv', nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=out_channels,
			
 
				+            bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)))
			
 
				+    return layer
			
 
				+
			
 
				+
			
 
				+def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
			
 
				+    layer = nn.Sequential(
			
 
				+        ('layer1', ConvLayer(
			
 
				+            in_channels, out_channels, kernel_size=kernel_size)),
			
 
				+        ('layer2', DWConvLayer(
			
 
				+            out_channels, out_channels, stride=stride)))
			
 
				+    return layer
			
 
				+
			
 
				+
			
 
				+class HarDBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 growth_rate,
			
 
				+                 grmul,
			
 
				+                 n_layers,
			
 
				+                 keepBase=False,
			
 
				+                 residual_out=False,
			
 
				+                 dwconv=False):
			
 
				+        super().__init__()
			
 
				+        self.keepBase = keepBase
			
 
				+        self.links = []
			
 
				+        layers_ = []
			
 
				+        self.out_channels = 0
			
 
				+        for i in range(n_layers):
			
 
				+            outch, inch, link = self.get_link(i + 1, in_channels, growth_rate,
			
 
				+                                              grmul)
			
 
				+            self.links.append(link)
			
 
				+            if dwconv:
			
 
				+                layers_.append(CombConvLayer(inch, outch))
			
 
				+            else:
			
 
				+                layers_.append(ConvLayer(inch, outch))
			
 
				+
			
 
				+            if (i % 2 == 0) or (i == n_layers - 1):
			
 
				+                self.out_channels += outch
			
 
				+        self.layers = nn.LayerList(layers_)
			
 
				+
			
 
				+    def get_out_ch(self):
			
 
				+        return self.out_channels
			
 
				+
			
 
				+    def get_link(self, layer, base_ch, growth_rate, grmul):
			
 
				+        if layer == 0:
			
 
				+            return base_ch, 0, []
			
 
				+        out_channels = growth_rate
			
 
				+
			
 
				+        link = []
			
 
				+        for i in range(10):
			
 
				+            dv = 2**i
			
 
				+            if layer % dv == 0:
			
 
				+                k = layer - dv
			
 
				+                link.append(k)
			
 
				+                if i > 0:
			
 
				+                    out_channels *= grmul
			
 
				+
			
 
				+        out_channels = int(int(out_channels + 1) / 2) * 2
			
 
				+        in_channels = 0
			
 
				+
			
 
				+        for i in link:
			
 
				+            ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
			
 
				+            in_channels += ch
			
 
				+
			
 
				+        return out_channels, in_channels, link
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        layers_ = [x]
			
 
				+
			
 
				+        for layer in range(len(self.layers)):
			
 
				+            link = self.links[layer]
			
 
				+            tin = []
			
 
				+            for i in link:
			
 
				+                tin.append(layers_[i])
			
 
				+            if len(tin) > 1:
			
 
				+                x = paddle.concat(tin, 1)
			
 
				+            else:
			
 
				+                x = tin[0]
			
 
				+            out = self.layers[layer](x)
			
 
				+            layers_.append(out)
			
 
				+
			
 
				+        t = len(layers_)
			
 
				+        out_ = []
			
 
				+        for i in range(t):
			
 
				+            if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1):
			
 
				+                out_.append(layers_[i])
			
 
				+        out = paddle.concat(out_, 1)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class HarDNet(nn.Layer):
			
 
				+    def __init__(self, depth_wise=False, return_idx=[1, 3, 8, 13], arch=85):
			
 
				+        super(HarDNet, self).__init__()
			
 
				+        assert arch in [39, 68, 85], "HarDNet-{} not support.".format(arch)
			
 
				+        if arch == 85:
			
 
				+            first_ch = [48, 96]
			
 
				+            second_kernel = 3
			
 
				+            ch_list = [192, 256, 320, 480, 720]
			
 
				+            grmul = 1.7
			
 
				+            gr = [24, 24, 28, 36, 48]
			
 
				+            n_layers = [8, 16, 16, 16, 16]
			
 
				+        elif arch == 68:
			
 
				+            first_ch = [32, 64]
			
 
				+            second_kernel = 3
			
 
				+            ch_list = [128, 256, 320, 640]
			
 
				+            grmul = 1.7
			
 
				+            gr = [14, 16, 20, 40]
			
 
				+            n_layers = [8, 16, 16, 16]
			
 
				+
			
 
				+        self.return_idx = return_idx
			
 
				+        self._out_channels = [96, 214, 458, 784]
			
 
				+
			
 
				+        avg_pool = True
			
 
				+        if depth_wise:
			
 
				+            second_kernel = 1
			
 
				+            avg_pool = False
			
 
				+
			
 
				+        blks = len(n_layers)
			
 
				+        self.base = nn.LayerList([])
			
 
				+
			
 
				+        # First Layer: Standard Conv3x3, Stride=2
			
 
				+        self.base.append(
			
 
				+            ConvLayer(
			
 
				+                in_channels=3,
			
 
				+                out_channels=first_ch[0],
			
 
				+                kernel_size=3,
			
 
				+                stride=2,
			
 
				+                bias_attr=False))
			
 
				+
			
 
				+        # Second Layer
			
 
				+        self.base.append(
			
 
				+            ConvLayer(
			
 
				+                first_ch[0], first_ch[1], kernel_size=second_kernel))
			
 
				+
			
 
				+        # Avgpooling or DWConv3x3 downsampling
			
 
				+        if avg_pool:
			
 
				+            self.base.append(nn.AvgPool2D(kernel_size=3, stride=2, padding=1))
			
 
				+        else:
			
 
				+            self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
			
 
				+
			
 
				+        # Build all HarDNet blocks
			
 
				+        ch = first_ch[1]
			
 
				+        for i in range(blks):
			
 
				+            blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
			
 
				+            ch = blk.out_channels
			
 
				+            self.base.append(blk)
			
 
				+
			
 
				+            if i != blks - 1:
			
 
				+                self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
			
 
				+            ch = ch_list[i]
			
 
				+            if i == 0:
			
 
				+                self.base.append(
			
 
				+                    nn.AvgPool2D(
			
 
				+                        kernel_size=2, stride=2, ceil_mode=True))
			
 
				+            elif i != blks - 1 and i != 1 and i != 3:
			
 
				+                self.base.append(nn.AvgPool2D(kernel_size=2, stride=2))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+        outs = []
			
 
				+        for i, layer in enumerate(self.base):
			
 
				+            x = layer(x)
			
 
				+            if i in self.return_idx:
			
 
				+                outs.append(x)
			
 
				+        return outs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=self._out_channels[i]) for i in range(4)]
			
--- a/paddlers/models/ppdet/modeling/backbones/hrnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/hrnet.py
@@ -0,0 +1,727 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import AdaptiveAvgPool2D, Linear
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn.initializer import Normal, Uniform
			
 
				+from numbers import Integral
			
 
				+import math
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['HRNet']
			
 
				+
			
 
				+
			
 
				+class ConvNormLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_groups=32,
			
 
				+                 use_dcn=False,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvNormLayer, self).__init__()
			
 
				+        assert norm_type in ['bn', 'sync_bn', 'gn']
			
 
				+
			
 
				+        self.act = act
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=ch_in,
			
 
				+            out_channels=ch_out,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=1,
			
 
				+            weight_attr=ParamAttr(initializer=Normal(
			
 
				+                mean=0., std=0.01)),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        norm_lr = 0. if freeze_norm else 1.
			
 
				+
			
 
				+        param_attr = ParamAttr(
			
 
				+            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
			
 
				+        bias_attr = ParamAttr(
			
 
				+            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
			
 
				+        global_stats = True if freeze_norm else None
			
 
				+        if norm_type in ['bn', 'sync_bn']:
			
 
				+            self.norm = nn.BatchNorm2D(
			
 
				+                ch_out,
			
 
				+                weight_attr=param_attr,
			
 
				+                bias_attr=bias_attr,
			
 
				+                use_global_stats=global_stats)
			
 
				+        elif norm_type == 'gn':
			
 
				+            self.norm = nn.GroupNorm(
			
 
				+                num_groups=norm_groups,
			
 
				+                num_channels=ch_out,
			
 
				+                weight_attr=param_attr,
			
 
				+                bias_attr=bias_attr)
			
 
				+        norm_params = self.norm.parameters()
			
 
				+        if freeze_norm:
			
 
				+            for param in norm_params:
			
 
				+                param.stop_gradient = True
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        out = self.conv(inputs)
			
 
				+        out = self.norm(out)
			
 
				+
			
 
				+        if self.act == 'relu':
			
 
				+            out = F.relu(out)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class Layer1(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 has_se=False,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 name=None):
			
 
				+        super(Layer1, self).__init__()
			
 
				+
			
 
				+        self.bottleneck_block_list = []
			
 
				+
			
 
				+        for i in range(4):
			
 
				+            bottleneck_block = self.add_sublayer(
			
 
				+                "block_{}_{}".format(name, i + 1),
			
 
				+                BottleneckBlock(
			
 
				+                    num_channels=num_channels if i == 0 else 256,
			
 
				+                    num_filters=64,
			
 
				+                    has_se=has_se,
			
 
				+                    stride=1,
			
 
				+                    downsample=True if i == 0 else False,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    name=name + '_' + str(i + 1)))
			
 
				+            self.bottleneck_block_list.append(bottleneck_block)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        conv = input
			
 
				+        for block_func in self.bottleneck_block_list:
			
 
				+            conv = block_func(conv)
			
 
				+        return conv
			
 
				+
			
 
				+
			
 
				+class TransitionLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 name=None):
			
 
				+        super(TransitionLayer, self).__init__()
			
 
				+
			
 
				+        num_in = len(in_channels)
			
 
				+        num_out = len(out_channels)
			
 
				+        out = []
			
 
				+        self.conv_bn_func_list = []
			
 
				+        for i in range(num_out):
			
 
				+            residual = None
			
 
				+            if i < num_in:
			
 
				+                if in_channels[i] != out_channels[i]:
			
 
				+                    residual = self.add_sublayer(
			
 
				+                        "transition_{}_layer_{}".format(name, i + 1),
			
 
				+                        ConvNormLayer(
			
 
				+                            ch_in=in_channels[i],
			
 
				+                            ch_out=out_channels[i],
			
 
				+                            filter_size=3,
			
 
				+                            norm_decay=norm_decay,
			
 
				+                            freeze_norm=freeze_norm,
			
 
				+                            act='relu',
			
 
				+                            name=name + '_layer_' + str(i + 1)))
			
 
				+            else:
			
 
				+                residual = self.add_sublayer(
			
 
				+                    "transition_{}_layer_{}".format(name, i + 1),
			
 
				+                    ConvNormLayer(
			
 
				+                        ch_in=in_channels[-1],
			
 
				+                        ch_out=out_channels[i],
			
 
				+                        filter_size=3,
			
 
				+                        stride=2,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        act='relu',
			
 
				+                        name=name + '_layer_' + str(i + 1)))
			
 
				+            self.conv_bn_func_list.append(residual)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        outs = []
			
 
				+        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
			
 
				+            if conv_bn_func is None:
			
 
				+                outs.append(input[idx])
			
 
				+            else:
			
 
				+                if idx < len(input):
			
 
				+                    outs.append(conv_bn_func(input[idx]))
			
 
				+                else:
			
 
				+                    outs.append(conv_bn_func(input[-1]))
			
 
				+        return outs
			
 
				+
			
 
				+
			
 
				+class Branches(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 block_num,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 has_se=False,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 name=None):
			
 
				+        super(Branches, self).__init__()
			
 
				+
			
 
				+        self.basic_block_list = []
			
 
				+        for i in range(len(out_channels)):
			
 
				+            self.basic_block_list.append([])
			
 
				+            for j in range(block_num):
			
 
				+                in_ch = in_channels[i] if j == 0 else out_channels[i]
			
 
				+                basic_block_func = self.add_sublayer(
			
 
				+                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
			
 
				+                    BasicBlock(
			
 
				+                        num_channels=in_ch,
			
 
				+                        num_filters=out_channels[i],
			
 
				+                        has_se=has_se,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        name=name + '_branch_layer_' + str(i + 1) + '_' +
			
 
				+                        str(j + 1)))
			
 
				+                self.basic_block_list[i].append(basic_block_func)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        outs = []
			
 
				+        for idx, input in enumerate(inputs):
			
 
				+            conv = input
			
 
				+            basic_block_list = self.basic_block_list[idx]
			
 
				+            for basic_block_func in basic_block_list:
			
 
				+                conv = basic_block_func(conv)
			
 
				+            outs.append(conv)
			
 
				+        return outs
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 has_se,
			
 
				+                 stride=1,
			
 
				+                 downsample=False,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self.has_se = has_se
			
 
				+        self.downsample = downsample
			
 
				+
			
 
				+        self.conv1 = ConvNormLayer(
			
 
				+            ch_in=num_channels,
			
 
				+            ch_out=num_filters,
			
 
				+            filter_size=1,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            act="relu",
			
 
				+            name=name + "_conv1")
			
 
				+        self.conv2 = ConvNormLayer(
			
 
				+            ch_in=num_filters,
			
 
				+            ch_out=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            act="relu",
			
 
				+            name=name + "_conv2")
			
 
				+        self.conv3 = ConvNormLayer(
			
 
				+            ch_in=num_filters,
			
 
				+            ch_out=num_filters * 4,
			
 
				+            filter_size=1,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            act=None,
			
 
				+            name=name + "_conv3")
			
 
				+
			
 
				+        if self.downsample:
			
 
				+            self.conv_down = ConvNormLayer(
			
 
				+                ch_in=num_channels,
			
 
				+                ch_out=num_filters * 4,
			
 
				+                filter_size=1,
			
 
				+                norm_decay=norm_decay,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                act=None,
			
 
				+                name=name + "_downsample")
			
 
				+
			
 
				+        if self.has_se:
			
 
				+            self.se = SELayer(
			
 
				+                num_channels=num_filters * 4,
			
 
				+                num_filters=num_filters * 4,
			
 
				+                reduction_ratio=16,
			
 
				+                name='fc' + name)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        residual = input
			
 
				+        conv1 = self.conv1(input)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+        conv3 = self.conv3(conv2)
			
 
				+
			
 
				+        if self.downsample:
			
 
				+            residual = self.conv_down(input)
			
 
				+
			
 
				+        if self.has_se:
			
 
				+            conv3 = self.se(conv3)
			
 
				+
			
 
				+        y = paddle.add(x=residual, y=conv3)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride=1,
			
 
				+                 has_se=False,
			
 
				+                 downsample=False,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 name=None):
			
 
				+        super(BasicBlock, self).__init__()
			
 
				+
			
 
				+        self.has_se = has_se
			
 
				+        self.downsample = downsample
			
 
				+        self.conv1 = ConvNormLayer(
			
 
				+            ch_in=num_channels,
			
 
				+            ch_out=num_filters,
			
 
				+            filter_size=3,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            stride=stride,
			
 
				+            act="relu",
			
 
				+            name=name + "_conv1")
			
 
				+        self.conv2 = ConvNormLayer(
			
 
				+            ch_in=num_filters,
			
 
				+            ch_out=num_filters,
			
 
				+            filter_size=3,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            stride=1,
			
 
				+            act=None,
			
 
				+            name=name + "_conv2")
			
 
				+
			
 
				+        if self.downsample:
			
 
				+            self.conv_down = ConvNormLayer(
			
 
				+                ch_in=num_channels,
			
 
				+                ch_out=num_filters * 4,
			
 
				+                filter_size=1,
			
 
				+                norm_decay=norm_decay,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                act=None,
			
 
				+                name=name + "_downsample")
			
 
				+
			
 
				+        if self.has_se:
			
 
				+            self.se = SELayer(
			
 
				+                num_channels=num_filters,
			
 
				+                num_filters=num_filters,
			
 
				+                reduction_ratio=16,
			
 
				+                name='fc' + name)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        residual = input
			
 
				+        conv1 = self.conv1(input)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+
			
 
				+        if self.downsample:
			
 
				+            residual = self.conv_down(input)
			
 
				+
			
 
				+        if self.has_se:
			
 
				+            conv2 = self.se(conv2)
			
 
				+
			
 
				+        y = paddle.add(x=residual, y=conv2)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class SELayer(nn.Layer):
			
 
				+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
			
 
				+        super(SELayer, self).__init__()
			
 
				+
			
 
				+        self.pool2d_gap = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self._num_channels = num_channels
			
 
				+
			
 
				+        med_ch = int(num_channels / reduction_ratio)
			
 
				+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
			
 
				+        self.squeeze = Linear(
			
 
				+            num_channels,
			
 
				+            med_ch,
			
 
				+            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
			
 
				+        self.excitation = Linear(
			
 
				+            med_ch,
			
 
				+            num_filters,
			
 
				+            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        pool = self.pool2d_gap(input)
			
 
				+        pool = paddle.squeeze(pool, axis=[2, 3])
			
 
				+        squeeze = self.squeeze(pool)
			
 
				+        squeeze = F.relu(squeeze)
			
 
				+        excitation = self.excitation(squeeze)
			
 
				+        excitation = F.sigmoid(excitation)
			
 
				+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
			
 
				+        out = input * excitation
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class Stage(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_modules,
			
 
				+                 num_filters,
			
 
				+                 has_se=False,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 multi_scale_output=True,
			
 
				+                 name=None):
			
 
				+        super(Stage, self).__init__()
			
 
				+
			
 
				+        self._num_modules = num_modules
			
 
				+        self.stage_func_list = []
			
 
				+        for i in range(num_modules):
			
 
				+            if i == num_modules - 1 and not multi_scale_output:
			
 
				+                stage_func = self.add_sublayer(
			
 
				+                    "stage_{}_{}".format(name, i + 1),
			
 
				+                    HighResolutionModule(
			
 
				+                        num_channels=num_channels,
			
 
				+                        num_filters=num_filters,
			
 
				+                        has_se=has_se,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        multi_scale_output=False,
			
 
				+                        name=name + '_' + str(i + 1)))
			
 
				+            else:
			
 
				+                stage_func = self.add_sublayer(
			
 
				+                    "stage_{}_{}".format(name, i + 1),
			
 
				+                    HighResolutionModule(
			
 
				+                        num_channels=num_channels,
			
 
				+                        num_filters=num_filters,
			
 
				+                        has_se=has_se,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        name=name + '_' + str(i + 1)))
			
 
				+
			
 
				+            self.stage_func_list.append(stage_func)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        out = input
			
 
				+        for idx in range(self._num_modules):
			
 
				+            out = self.stage_func_list[idx](out)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class HighResolutionModule(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 has_se=False,
			
 
				+                 multi_scale_output=True,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 name=None):
			
 
				+        super(HighResolutionModule, self).__init__()
			
 
				+        self.branches_func = Branches(
			
 
				+            block_num=4,
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            has_se=has_se,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name)
			
 
				+
			
 
				+        self.fuse_func = FuseLayers(
			
 
				+            in_channels=num_filters,
			
 
				+            out_channels=num_filters,
			
 
				+            multi_scale_output=multi_scale_output,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        out = self.branches_func(input)
			
 
				+        out = self.fuse_func(out)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class FuseLayers(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 multi_scale_output=True,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 name=None):
			
 
				+        super(FuseLayers, self).__init__()
			
 
				+
			
 
				+        self._actual_ch = len(in_channels) if multi_scale_output else 1
			
 
				+        self._in_channels = in_channels
			
 
				+
			
 
				+        self.residual_func_list = []
			
 
				+        for i in range(self._actual_ch):
			
 
				+            for j in range(len(in_channels)):
			
 
				+                residual_func = None
			
 
				+                if j > i:
			
 
				+                    residual_func = self.add_sublayer(
			
 
				+                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
			
 
				+                        ConvNormLayer(
			
 
				+                            ch_in=in_channels[j],
			
 
				+                            ch_out=out_channels[i],
			
 
				+                            filter_size=1,
			
 
				+                            stride=1,
			
 
				+                            act=None,
			
 
				+                            norm_decay=norm_decay,
			
 
				+                            freeze_norm=freeze_norm,
			
 
				+                            name=name + '_layer_' + str(i + 1) + '_' +
			
 
				+                            str(j + 1)))
			
 
				+                    self.residual_func_list.append(residual_func)
			
 
				+                elif j < i:
			
 
				+                    pre_num_filters = in_channels[j]
			
 
				+                    for k in range(i - j):
			
 
				+                        if k == i - j - 1:
			
 
				+                            residual_func = self.add_sublayer(
			
 
				+                                "residual_{}_layer_{}_{}_{}".format(
			
 
				+                                    name, i + 1, j + 1, k + 1),
			
 
				+                                ConvNormLayer(
			
 
				+                                    ch_in=pre_num_filters,
			
 
				+                                    ch_out=out_channels[i],
			
 
				+                                    filter_size=3,
			
 
				+                                    stride=2,
			
 
				+                                    norm_decay=norm_decay,
			
 
				+                                    freeze_norm=freeze_norm,
			
 
				+                                    act=None,
			
 
				+                                    name=name + '_layer_' + str(i + 1) + '_' +
			
 
				+                                    str(j + 1) + '_' + str(k + 1)))
			
 
				+                            pre_num_filters = out_channels[i]
			
 
				+                        else:
			
 
				+                            residual_func = self.add_sublayer(
			
 
				+                                "residual_{}_layer_{}_{}_{}".format(
			
 
				+                                    name, i + 1, j + 1, k + 1),
			
 
				+                                ConvNormLayer(
			
 
				+                                    ch_in=pre_num_filters,
			
 
				+                                    ch_out=out_channels[j],
			
 
				+                                    filter_size=3,
			
 
				+                                    stride=2,
			
 
				+                                    norm_decay=norm_decay,
			
 
				+                                    freeze_norm=freeze_norm,
			
 
				+                                    act="relu",
			
 
				+                                    name=name + '_layer_' + str(i + 1) + '_' +
			
 
				+                                    str(j + 1) + '_' + str(k + 1)))
			
 
				+                            pre_num_filters = out_channels[j]
			
 
				+                        self.residual_func_list.append(residual_func)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        outs = []
			
 
				+        residual_func_idx = 0
			
 
				+        for i in range(self._actual_ch):
			
 
				+            residual = input[i]
			
 
				+            for j in range(len(self._in_channels)):
			
 
				+                if j > i:
			
 
				+                    y = self.residual_func_list[residual_func_idx](input[j])
			
 
				+                    residual_func_idx += 1
			
 
				+                    y = F.interpolate(y, scale_factor=2**(j - i))
			
 
				+                    residual = paddle.add(x=residual, y=y)
			
 
				+                elif j < i:
			
 
				+                    y = input[j]
			
 
				+                    for k in range(i - j):
			
 
				+                        y = self.residual_func_list[residual_func_idx](y)
			
 
				+                        residual_func_idx += 1
			
 
				+
			
 
				+                    residual = paddle.add(x=residual, y=y)
			
 
				+            residual = F.relu(residual)
			
 
				+            outs.append(residual)
			
 
				+
			
 
				+        return outs
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class HRNet(nn.Layer):
			
 
				+    """
			
 
				+    HRNet, see https://arxiv.org/abs/1908.07919
			
 
				+
			
 
				+    Args:
			
 
				+        width (int): the width of HRNet
			
 
				+        has_se (bool): whether to add SE block for each stage
			
 
				+        freeze_at (int): the stage to freeze
			
 
				+        freeze_norm (bool): whether to freeze norm in HRNet
			
 
				+        norm_decay (float): weight decay for normalization layer weights
			
 
				+        return_idx (List): the stage to return
			
 
				+        upsample (bool): whether to upsample and concat the backbone feats
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 width=18,
			
 
				+                 has_se=False,
			
 
				+                 freeze_at=0,
			
 
				+                 freeze_norm=True,
			
 
				+                 norm_decay=0.,
			
 
				+                 return_idx=[0, 1, 2, 3],
			
 
				+                 upsample=False):
			
 
				+        super(HRNet, self).__init__()
			
 
				+
			
 
				+        self.width = width
			
 
				+        self.has_se = has_se
			
 
				+        if isinstance(return_idx, Integral):
			
 
				+            return_idx = [return_idx]
			
 
				+
			
 
				+        assert len(return_idx) > 0, "need one or more return index"
			
 
				+        self.freeze_at = freeze_at
			
 
				+        self.return_idx = return_idx
			
 
				+        self.upsample = upsample
			
 
				+
			
 
				+        self.channels = {
			
 
				+            18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
			
 
				+            30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
			
 
				+            32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
			
 
				+            40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
			
 
				+            44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
			
 
				+            48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
			
 
				+            60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
			
 
				+            64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]]
			
 
				+        }
			
 
				+
			
 
				+        channels_2, channels_3, channels_4 = self.channels[width]
			
 
				+        num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
			
 
				+        self._out_channels = [sum(channels_4)] if self.upsample else channels_4
			
 
				+        self._out_strides = [4] if self.upsample else [4, 8, 16, 32]
			
 
				+
			
 
				+        self.conv_layer1_1 = ConvNormLayer(
			
 
				+            ch_in=3,
			
 
				+            ch_out=64,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            act='relu',
			
 
				+            name="layer1_1")
			
 
				+
			
 
				+        self.conv_layer1_2 = ConvNormLayer(
			
 
				+            ch_in=64,
			
 
				+            ch_out=64,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            act='relu',
			
 
				+            name="layer1_2")
			
 
				+
			
 
				+        self.la1 = Layer1(
			
 
				+            num_channels=64,
			
 
				+            has_se=has_se,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name="layer2")
			
 
				+
			
 
				+        self.tr1 = TransitionLayer(
			
 
				+            in_channels=[256],
			
 
				+            out_channels=channels_2,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name="tr1")
			
 
				+
			
 
				+        self.st2 = Stage(
			
 
				+            num_channels=channels_2,
			
 
				+            num_modules=num_modules_2,
			
 
				+            num_filters=channels_2,
			
 
				+            has_se=self.has_se,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name="st2")
			
 
				+
			
 
				+        self.tr2 = TransitionLayer(
			
 
				+            in_channels=channels_2,
			
 
				+            out_channels=channels_3,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name="tr2")
			
 
				+
			
 
				+        self.st3 = Stage(
			
 
				+            num_channels=channels_3,
			
 
				+            num_modules=num_modules_3,
			
 
				+            num_filters=channels_3,
			
 
				+            has_se=self.has_se,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name="st3")
			
 
				+
			
 
				+        self.tr3 = TransitionLayer(
			
 
				+            in_channels=channels_3,
			
 
				+            out_channels=channels_4,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name="tr3")
			
 
				+        self.st4 = Stage(
			
 
				+            num_channels=channels_4,
			
 
				+            num_modules=num_modules_4,
			
 
				+            num_filters=channels_4,
			
 
				+            has_se=self.has_se,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            multi_scale_output=len(return_idx) > 1,
			
 
				+            name="st4")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+        conv1 = self.conv_layer1_1(x)
			
 
				+        conv2 = self.conv_layer1_2(conv1)
			
 
				+
			
 
				+        la1 = self.la1(conv2)
			
 
				+        tr1 = self.tr1([la1])
			
 
				+        st2 = self.st2(tr1)
			
 
				+        tr2 = self.tr2(st2)
			
 
				+
			
 
				+        st3 = self.st3(tr2)
			
 
				+        tr3 = self.tr3(st3)
			
 
				+
			
 
				+        st4 = self.st4(tr3)
			
 
				+
			
 
				+        if self.upsample:
			
 
				+            # Upsampling
			
 
				+            x0_h, x0_w = st4[0].shape[2:4]
			
 
				+            x1 = F.upsample(st4[1], size=(x0_h, x0_w), mode='bilinear')
			
 
				+            x2 = F.upsample(st4[2], size=(x0_h, x0_w), mode='bilinear')
			
 
				+            x3 = F.upsample(st4[3], size=(x0_h, x0_w), mode='bilinear')
			
 
				+            x = paddle.concat([st4[0], x1, x2, x3], 1)
			
 
				+            return x
			
 
				+
			
 
				+        res = []
			
 
				+        for i, layer in enumerate(st4):
			
 
				+            if i == self.freeze_at:
			
 
				+                layer.stop_gradient = True
			
 
				+            if i in self.return_idx:
			
 
				+                res.append(layer)
			
 
				+
			
 
				+        return res
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        if self.upsample:
			
 
				+            self.return_idx = [0]
			
 
				+        return [
			
 
				+            ShapeSpec(
			
 
				+                channels=self._out_channels[i], stride=self._out_strides[i])
			
 
				+            for i in self.return_idx
			
 
				+        ]
			
--- a/paddlers/models/ppdet/modeling/backbones/lcnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/lcnet.py
@@ -0,0 +1,259 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn import AdaptiveAvgPool2D, Conv2D
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from numbers import Integral
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['LCNet']
			
 
				+
			
 
				+NET_CONFIG = {
			
 
				+    "blocks2":
			
 
				+    #k, in_c, out_c, s, use_se
			
 
				+    [[3, 16, 32, 1, False], ],
			
 
				+    "blocks3": [
			
 
				+        [3, 32, 64, 2, False],
			
 
				+        [3, 64, 64, 1, False],
			
 
				+    ],
			
 
				+    "blocks4": [
			
 
				+        [3, 64, 128, 2, False],
			
 
				+        [3, 128, 128, 1, False],
			
 
				+    ],
			
 
				+    "blocks5": [
			
 
				+        [3, 128, 256, 2, False],
			
 
				+        [5, 256, 256, 1, False],
			
 
				+        [5, 256, 256, 1, False],
			
 
				+        [5, 256, 256, 1, False],
			
 
				+        [5, 256, 256, 1, False],
			
 
				+        [5, 256, 256, 1, False],
			
 
				+    ],
			
 
				+    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def make_divisible(v, divisor=8, min_value=None):
			
 
				+    if min_value is None:
			
 
				+        min_value = divisor
			
 
				+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
			
 
				+    if new_v < 0.9 * v:
			
 
				+        new_v += divisor
			
 
				+    return new_v
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 filter_size,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 num_groups=1):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=num_groups,
			
 
				+            weight_attr=ParamAttr(initializer=KaimingNormal()),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self.bn = nn.BatchNorm2D(
			
 
				+            num_filters,
			
 
				+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+        self.hardswish = nn.Hardswish()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        x = self.hardswish(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class DepthwiseSeparable(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 dw_size=3,
			
 
				+                 use_se=False):
			
 
				+        super().__init__()
			
 
				+        self.use_se = use_se
			
 
				+        self.dw_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_channels,
			
 
				+            filter_size=dw_size,
			
 
				+            stride=stride,
			
 
				+            num_groups=num_channels)
			
 
				+        if use_se:
			
 
				+            self.se = SEModule(num_channels)
			
 
				+        self.pw_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            filter_size=1,
			
 
				+            num_filters=num_filters,
			
 
				+            stride=1)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.dw_conv(x)
			
 
				+        if self.use_se:
			
 
				+            x = self.se(x)
			
 
				+        x = self.pw_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SEModule(nn.Layer):
			
 
				+    def __init__(self, channel, reduction=4):
			
 
				+        super().__init__()
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self.conv1 = Conv2D(
			
 
				+            in_channels=channel,
			
 
				+            out_channels=channel // reduction,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+        self.relu = nn.ReLU()
			
 
				+        self.conv2 = Conv2D(
			
 
				+            in_channels=channel // reduction,
			
 
				+            out_channels=channel,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+        self.hardsigmoid = nn.Hardsigmoid()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        identity = x
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.conv2(x)
			
 
				+        x = self.hardsigmoid(x)
			
 
				+        x = paddle.multiply(x=identity, y=x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class LCNet(nn.Layer):
			
 
				+    def __init__(self, scale=1.0, feature_maps=[3, 4, 5]):
			
 
				+        super().__init__()
			
 
				+        self.scale = scale
			
 
				+        self.feature_maps = feature_maps
			
 
				+
			
 
				+        out_channels = []
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            filter_size=3,
			
 
				+            num_filters=make_divisible(16 * scale),
			
 
				+            stride=2)
			
 
				+
			
 
				+        self.blocks2 = nn.Sequential(*[
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
			
 
				+        ])
			
 
				+
			
 
				+        self.blocks3 = nn.Sequential(*[
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
			
 
				+        ])
			
 
				+
			
 
				+        out_channels.append(
			
 
				+            make_divisible(NET_CONFIG["blocks3"][-1][2] * scale))
			
 
				+
			
 
				+        self.blocks4 = nn.Sequential(*[
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
			
 
				+        ])
			
 
				+
			
 
				+        out_channels.append(
			
 
				+            make_divisible(NET_CONFIG["blocks4"][-1][2] * scale))
			
 
				+
			
 
				+        self.blocks5 = nn.Sequential(*[
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
			
 
				+        ])
			
 
				+
			
 
				+        out_channels.append(
			
 
				+            make_divisible(NET_CONFIG["blocks5"][-1][2] * scale))
			
 
				+
			
 
				+        self.blocks6 = nn.Sequential(*[
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
			
 
				+        ])
			
 
				+
			
 
				+        out_channels.append(
			
 
				+            make_divisible(NET_CONFIG["blocks6"][-1][2] * scale))
			
 
				+        self._out_channels = [
			
 
				+            ch for idx, ch in enumerate(out_channels)
			
 
				+            if idx + 2 in feature_maps
			
 
				+        ]
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+        outs = []
			
 
				+
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.blocks2(x)
			
 
				+        x = self.blocks3(x)
			
 
				+        outs.append(x)
			
 
				+        x = self.blocks4(x)
			
 
				+        outs.append(x)
			
 
				+        x = self.blocks5(x)
			
 
				+        outs.append(x)
			
 
				+        x = self.blocks6(x)
			
 
				+        outs.append(x)
			
 
				+        outs = [o for i, o in enumerate(outs) if i + 2 in self.feature_maps]
			
 
				+        return outs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=c) for c in self._out_channels]
			
--- a/paddlers/models/ppdet/modeling/backbones/lite_hrnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/lite_hrnet.py
@@ -0,0 +1,886 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+"""
			
 
				+This code is based on
			
 
				+https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
			
 
				+"""
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+
			
 
				+from numbers import Integral
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle.nn.initializer import Normal, Constant
			
 
				+from paddlers.models.ppdet.core.workspace import register
			
 
				+from paddlers.models.ppdet.modeling.shape_spec import ShapeSpec
			
 
				+from paddlers.models.ppdet.modeling.ops import channel_shuffle
			
 
				+from .. import layers as L
			
 
				+
			
 
				+__all__ = ['LiteHRNet']
			
 
				+
			
 
				+
			
 
				+class ConvNormLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 norm_type=None,
			
 
				+                 norm_groups=32,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 act=None):
			
 
				+        super(ConvNormLayer, self).__init__()
			
 
				+        self.act = act
			
 
				+        norm_lr = 0. if freeze_norm else 1.
			
 
				+        if norm_type is not None:
			
 
				+            assert norm_type in ['bn', 'sync_bn', 'gn'], \
			
 
				+                "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
			
 
				+            param_attr = ParamAttr(
			
 
				+                initializer=Constant(1.0),
			
 
				+                learning_rate=norm_lr,
			
 
				+                regularizer=L2Decay(norm_decay), )
			
 
				+            bias_attr = ParamAttr(
			
 
				+                learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
			
 
				+            global_stats = True if freeze_norm else None
			
 
				+            if norm_type in ['bn', 'sync_bn']:
			
 
				+                self.norm = nn.BatchNorm2D(
			
 
				+                    ch_out,
			
 
				+                    weight_attr=param_attr,
			
 
				+                    bias_attr=bias_attr,
			
 
				+                    use_global_stats=global_stats, )
			
 
				+            elif norm_type == 'gn':
			
 
				+                self.norm = nn.GroupNorm(
			
 
				+                    num_groups=norm_groups,
			
 
				+                    num_channels=ch_out,
			
 
				+                    weight_attr=param_attr,
			
 
				+                    bias_attr=bias_attr)
			
 
				+            norm_params = self.norm.parameters()
			
 
				+            if freeze_norm:
			
 
				+                for param in norm_params:
			
 
				+                    param.stop_gradient = True
			
 
				+            conv_bias_attr = False
			
 
				+        else:
			
 
				+            conv_bias_attr = True
			
 
				+            self.norm = None
			
 
				+
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=ch_in,
			
 
				+            out_channels=ch_out,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(initializer=Normal(
			
 
				+                mean=0., std=0.001)),
			
 
				+            bias_attr=conv_bias_attr)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        out = self.conv(inputs)
			
 
				+        if self.norm is not None:
			
 
				+            out = self.norm(out)
			
 
				+
			
 
				+        if self.act == 'relu':
			
 
				+            out = F.relu(out)
			
 
				+        elif self.act == 'sigmoid':
			
 
				+            out = F.sigmoid(out)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class DepthWiseSeparableConvNormLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 dw_norm_type=None,
			
 
				+                 pw_norm_type=None,
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 dw_act=None,
			
 
				+                 pw_act=None):
			
 
				+        super(DepthWiseSeparableConvNormLayer, self).__init__()
			
 
				+        self.depthwise_conv = ConvNormLayer(
			
 
				+            ch_in=ch_in,
			
 
				+            ch_out=ch_in,
			
 
				+            filter_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            groups=ch_in,
			
 
				+            norm_type=dw_norm_type,
			
 
				+            act=dw_act,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm, )
			
 
				+        self.pointwise_conv = ConvNormLayer(
			
 
				+            ch_in=ch_in,
			
 
				+            ch_out=ch_out,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            norm_type=pw_norm_type,
			
 
				+            act=pw_act,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm, )
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.depthwise_conv(x)
			
 
				+        x = self.pointwise_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class CrossResolutionWeightingModule(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 channels,
			
 
				+                 ratio=16,
			
 
				+                 norm_type='bn',
			
 
				+                 freeze_norm=False,
			
 
				+                 norm_decay=0.):
			
 
				+        super(CrossResolutionWeightingModule, self).__init__()
			
 
				+        self.channels = channels
			
 
				+        total_channel = sum(channels)
			
 
				+        self.conv1 = ConvNormLayer(
			
 
				+            ch_in=total_channel,
			
 
				+            ch_out=total_channel // ratio,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            norm_type=norm_type,
			
 
				+            act='relu',
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+        self.conv2 = ConvNormLayer(
			
 
				+            ch_in=total_channel // ratio,
			
 
				+            ch_out=total_channel,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            norm_type=norm_type,
			
 
				+            act='sigmoid',
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        mini_size = x[-1].shape[-2:]
			
 
				+        out = [F.adaptive_avg_pool2d(s, mini_size) for s in x[:-1]] + [x[-1]]
			
 
				+        out = paddle.concat(out, 1)
			
 
				+        out = self.conv1(out)
			
 
				+        out = self.conv2(out)
			
 
				+        out = paddle.split(out, self.channels, 1)
			
 
				+        out = [
			
 
				+            s * F.interpolate(
			
 
				+                a, s.shape[-2:], mode='nearest') for s, a in zip(x, out)
			
 
				+        ]
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class SpatialWeightingModule(nn.Layer):
			
 
				+    def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
			
 
				+        super(SpatialWeightingModule, self).__init__()
			
 
				+        self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
			
 
				+        self.conv1 = ConvNormLayer(
			
 
				+            ch_in=in_channel,
			
 
				+            ch_out=in_channel // ratio,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+        self.conv2 = ConvNormLayer(
			
 
				+            ch_in=in_channel // ratio,
			
 
				+            ch_out=in_channel,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            act='sigmoid',
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        out = self.global_avgpooling(x)
			
 
				+        out = self.conv1(out)
			
 
				+        out = self.conv2(out)
			
 
				+        return x * out
			
 
				+
			
 
				+
			
 
				+class ConditionalChannelWeightingBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 stride,
			
 
				+                 reduce_ratio,
			
 
				+                 norm_type='bn',
			
 
				+                 freeze_norm=False,
			
 
				+                 norm_decay=0.):
			
 
				+        super(ConditionalChannelWeightingBlock, self).__init__()
			
 
				+        assert stride in [1, 2]
			
 
				+        branch_channels = [channel // 2 for channel in in_channels]
			
 
				+
			
 
				+        self.cross_resolution_weighting = CrossResolutionWeightingModule(
			
 
				+            branch_channels,
			
 
				+            ratio=reduce_ratio,
			
 
				+            norm_type=norm_type,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+        self.depthwise_convs = nn.LayerList([
			
 
				+            ConvNormLayer(
			
 
				+                channel,
			
 
				+                channel,
			
 
				+                filter_size=3,
			
 
				+                stride=stride,
			
 
				+                groups=channel,
			
 
				+                norm_type=norm_type,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay) for channel in branch_channels
			
 
				+        ])
			
 
				+
			
 
				+        self.spatial_weighting = nn.LayerList([
			
 
				+            SpatialWeightingModule(
			
 
				+                channel,
			
 
				+                ratio=4,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay) for channel in branch_channels
			
 
				+        ])
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = [s.chunk(2, axis=1) for s in x]
			
 
				+        x1 = [s[0] for s in x]
			
 
				+        x2 = [s[1] for s in x]
			
 
				+
			
 
				+        x2 = self.cross_resolution_weighting(x2)
			
 
				+        x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
			
 
				+        x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
			
 
				+
			
 
				+        out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
			
 
				+        out = [channel_shuffle(s, groups=2) for s in out]
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class ShuffleUnit(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channel,
			
 
				+                 out_channel,
			
 
				+                 stride,
			
 
				+                 norm_type='bn',
			
 
				+                 freeze_norm=False,
			
 
				+                 norm_decay=0.):
			
 
				+        super(ShuffleUnit, self).__init__()
			
 
				+        branch_channel = out_channel // 2
			
 
				+        self.stride = stride
			
 
				+        if self.stride == 1:
			
 
				+            assert in_channel == branch_channel * 2, \
			
 
				+                "when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
			
 
				+        if stride > 1:
			
 
				+            self.branch1 = nn.Sequential(
			
 
				+                ConvNormLayer(
			
 
				+                    ch_in=in_channel,
			
 
				+                    ch_out=in_channel,
			
 
				+                    filter_size=3,
			
 
				+                    stride=self.stride,
			
 
				+                    groups=in_channel,
			
 
				+                    norm_type=norm_type,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    norm_decay=norm_decay),
			
 
				+                ConvNormLayer(
			
 
				+                    ch_in=in_channel,
			
 
				+                    ch_out=branch_channel,
			
 
				+                    filter_size=1,
			
 
				+                    stride=1,
			
 
				+                    norm_type=norm_type,
			
 
				+                    act='relu',
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    norm_decay=norm_decay), )
			
 
				+        self.branch2 = nn.Sequential(
			
 
				+            ConvNormLayer(
			
 
				+                ch_in=branch_channel if stride == 1 else in_channel,
			
 
				+                ch_out=branch_channel,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                norm_type=norm_type,
			
 
				+                act='relu',
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay),
			
 
				+            ConvNormLayer(
			
 
				+                ch_in=branch_channel,
			
 
				+                ch_out=branch_channel,
			
 
				+                filter_size=3,
			
 
				+                stride=self.stride,
			
 
				+                groups=branch_channel,
			
 
				+                norm_type=norm_type,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay),
			
 
				+            ConvNormLayer(
			
 
				+                ch_in=branch_channel,
			
 
				+                ch_out=branch_channel,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                norm_type=norm_type,
			
 
				+                act='relu',
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay), )
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.stride > 1:
			
 
				+            x1 = self.branch1(x)
			
 
				+            x2 = self.branch2(x)
			
 
				+        else:
			
 
				+            x1, x2 = x.chunk(2, axis=1)
			
 
				+            x2 = self.branch2(x2)
			
 
				+        out = paddle.concat([x1, x2], axis=1)
			
 
				+        out = channel_shuffle(out, groups=2)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class IterativeHead(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 norm_type='bn',
			
 
				+                 freeze_norm=False,
			
 
				+                 norm_decay=0.):
			
 
				+        super(IterativeHead, self).__init__()
			
 
				+        num_branches = len(in_channels)
			
 
				+        self.in_channels = in_channels[::-1]
			
 
				+
			
 
				+        projects = []
			
 
				+        for i in range(num_branches):
			
 
				+            if i != num_branches - 1:
			
 
				+                projects.append(
			
 
				+                    DepthWiseSeparableConvNormLayer(
			
 
				+                        ch_in=self.in_channels[i],
			
 
				+                        ch_out=self.in_channels[i + 1],
			
 
				+                        filter_size=3,
			
 
				+                        stride=1,
			
 
				+                        dw_act=None,
			
 
				+                        pw_act='relu',
			
 
				+                        dw_norm_type=norm_type,
			
 
				+                        pw_norm_type=norm_type,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        norm_decay=norm_decay))
			
 
				+            else:
			
 
				+                projects.append(
			
 
				+                    DepthWiseSeparableConvNormLayer(
			
 
				+                        ch_in=self.in_channels[i],
			
 
				+                        ch_out=self.in_channels[i],
			
 
				+                        filter_size=3,
			
 
				+                        stride=1,
			
 
				+                        dw_act=None,
			
 
				+                        pw_act='relu',
			
 
				+                        dw_norm_type=norm_type,
			
 
				+                        pw_norm_type=norm_type,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        norm_decay=norm_decay))
			
 
				+        self.projects = nn.LayerList(projects)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = x[::-1]
			
 
				+        y = []
			
 
				+        last_x = None
			
 
				+        for i, s in enumerate(x):
			
 
				+            if last_x is not None:
			
 
				+                last_x = F.interpolate(
			
 
				+                    last_x,
			
 
				+                    size=s.shape[-2:],
			
 
				+                    mode='bilinear',
			
 
				+                    align_corners=True)
			
 
				+                s = s + last_x
			
 
				+            s = self.projects[i](s)
			
 
				+            y.append(s)
			
 
				+            last_x = s
			
 
				+
			
 
				+        return y[::-1]
			
 
				+
			
 
				+
			
 
				+class Stem(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channel,
			
 
				+                 stem_channel,
			
 
				+                 out_channel,
			
 
				+                 expand_ratio,
			
 
				+                 norm_type='bn',
			
 
				+                 freeze_norm=False,
			
 
				+                 norm_decay=0.):
			
 
				+        super(Stem, self).__init__()
			
 
				+        self.conv1 = ConvNormLayer(
			
 
				+            in_channel,
			
 
				+            stem_channel,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            norm_type=norm_type,
			
 
				+            act='relu',
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+        mid_channel = int(round(stem_channel * expand_ratio))
			
 
				+        branch_channel = stem_channel // 2
			
 
				+        if stem_channel == out_channel:
			
 
				+            inc_channel = out_channel - branch_channel
			
 
				+        else:
			
 
				+            inc_channel = out_channel - stem_channel
			
 
				+        self.branch1 = nn.Sequential(
			
 
				+            ConvNormLayer(
			
 
				+                ch_in=branch_channel,
			
 
				+                ch_out=branch_channel,
			
 
				+                filter_size=3,
			
 
				+                stride=2,
			
 
				+                groups=branch_channel,
			
 
				+                norm_type=norm_type,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay),
			
 
				+            ConvNormLayer(
			
 
				+                ch_in=branch_channel,
			
 
				+                ch_out=inc_channel,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                norm_type=norm_type,
			
 
				+                act='relu',
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay), )
			
 
				+        self.expand_conv = ConvNormLayer(
			
 
				+            ch_in=branch_channel,
			
 
				+            ch_out=mid_channel,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            norm_type=norm_type,
			
 
				+            act='relu',
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+        self.depthwise_conv = ConvNormLayer(
			
 
				+            ch_in=mid_channel,
			
 
				+            ch_out=mid_channel,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            groups=mid_channel,
			
 
				+            norm_type=norm_type,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+        self.linear_conv = ConvNormLayer(
			
 
				+            ch_in=mid_channel,
			
 
				+            ch_out=branch_channel
			
 
				+            if stem_channel == out_channel else stem_channel,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            norm_type=norm_type,
			
 
				+            act='relu',
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            norm_decay=norm_decay)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv1(x)
			
 
				+        x1, x2 = x.chunk(2, axis=1)
			
 
				+        x1 = self.branch1(x1)
			
 
				+        x2 = self.expand_conv(x2)
			
 
				+        x2 = self.depthwise_conv(x2)
			
 
				+        x2 = self.linear_conv(x2)
			
 
				+        out = paddle.concat([x1, x2], axis=1)
			
 
				+        out = channel_shuffle(out, groups=2)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class LiteHRNetModule(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_branches,
			
 
				+                 num_blocks,
			
 
				+                 in_channels,
			
 
				+                 reduce_ratio,
			
 
				+                 module_type,
			
 
				+                 multiscale_output=False,
			
 
				+                 with_fuse=True,
			
 
				+                 norm_type='bn',
			
 
				+                 freeze_norm=False,
			
 
				+                 norm_decay=0.):
			
 
				+        super(LiteHRNetModule, self).__init__()
			
 
				+        assert num_branches == len(in_channels),\
			
 
				+            "num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
			
 
				+        assert module_type in [
			
 
				+            'LITE', 'NAIVE'
			
 
				+        ], "module_type should be one of ['LITE', 'NAIVE']"
			
 
				+        self.num_branches = num_branches
			
 
				+        self.in_channels = in_channels
			
 
				+        self.multiscale_output = multiscale_output
			
 
				+        self.with_fuse = with_fuse
			
 
				+        self.norm_type = 'bn'
			
 
				+        self.module_type = module_type
			
 
				+
			
 
				+        if self.module_type == 'LITE':
			
 
				+            self.layers = self._make_weighting_blocks(
			
 
				+                num_blocks,
			
 
				+                reduce_ratio,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay)
			
 
				+        elif self.module_type == 'NAIVE':
			
 
				+            self.layers = self._make_naive_branches(
			
 
				+                num_branches,
			
 
				+                num_blocks,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                norm_decay=norm_decay)
			
 
				+
			
 
				+        if self.with_fuse:
			
 
				+            self.fuse_layers = self._make_fuse_layers(
			
 
				+                freeze_norm=freeze_norm, norm_decay=norm_decay)
			
 
				+            self.relu = nn.ReLU()
			
 
				+
			
 
				+    def _make_weighting_blocks(self,
			
 
				+                               num_blocks,
			
 
				+                               reduce_ratio,
			
 
				+                               stride=1,
			
 
				+                               freeze_norm=False,
			
 
				+                               norm_decay=0.):
			
 
				+        layers = []
			
 
				+        for i in range(num_blocks):
			
 
				+            layers.append(
			
 
				+                ConditionalChannelWeightingBlock(
			
 
				+                    self.in_channels,
			
 
				+                    stride=stride,
			
 
				+                    reduce_ratio=reduce_ratio,
			
 
				+                    norm_type=self.norm_type,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    norm_decay=norm_decay))
			
 
				+        return nn.Sequential(*layers)
			
 
				+
			
 
				+    def _make_naive_branches(self,
			
 
				+                             num_branches,
			
 
				+                             num_blocks,
			
 
				+                             freeze_norm=False,
			
 
				+                             norm_decay=0.):
			
 
				+        branches = []
			
 
				+        for branch_idx in range(num_branches):
			
 
				+            layers = []
			
 
				+            for i in range(num_blocks):
			
 
				+                layers.append(
			
 
				+                    ShuffleUnit(
			
 
				+                        self.in_channels[branch_idx],
			
 
				+                        self.in_channels[branch_idx],
			
 
				+                        stride=1,
			
 
				+                        norm_type=self.norm_type,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        norm_decay=norm_decay))
			
 
				+            branches.append(nn.Sequential(*layers))
			
 
				+        return nn.LayerList(branches)
			
 
				+
			
 
				+    def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
			
 
				+        if self.num_branches == 1:
			
 
				+            return None
			
 
				+        fuse_layers = []
			
 
				+        num_out_branches = self.num_branches if self.multiscale_output else 1
			
 
				+        for i in range(num_out_branches):
			
 
				+            fuse_layer = []
			
 
				+            for j in range(self.num_branches):
			
 
				+                if j > i:
			
 
				+                    fuse_layer.append(
			
 
				+                        nn.Sequential(
			
 
				+                            L.Conv2d(
			
 
				+                                self.in_channels[j],
			
 
				+                                self.in_channels[i],
			
 
				+                                kernel_size=1,
			
 
				+                                stride=1,
			
 
				+                                padding=0,
			
 
				+                                bias=False, ),
			
 
				+                            nn.BatchNorm2D(self.in_channels[i]),
			
 
				+                            nn.Upsample(
			
 
				+                                scale_factor=2**(j - i), mode='nearest')))
			
 
				+                elif j == i:
			
 
				+                    fuse_layer.append(None)
			
 
				+                else:
			
 
				+                    conv_downsamples = []
			
 
				+                    for k in range(i - j):
			
 
				+                        if k == i - j - 1:
			
 
				+                            conv_downsamples.append(
			
 
				+                                nn.Sequential(
			
 
				+                                    L.Conv2d(
			
 
				+                                        self.in_channels[j],
			
 
				+                                        self.in_channels[j],
			
 
				+                                        kernel_size=3,
			
 
				+                                        stride=2,
			
 
				+                                        padding=1,
			
 
				+                                        groups=self.in_channels[j],
			
 
				+                                        bias=False, ),
			
 
				+                                    nn.BatchNorm2D(self.in_channels[j]),
			
 
				+                                    L.Conv2d(
			
 
				+                                        self.in_channels[j],
			
 
				+                                        self.in_channels[i],
			
 
				+                                        kernel_size=1,
			
 
				+                                        stride=1,
			
 
				+                                        padding=0,
			
 
				+                                        bias=False, ),
			
 
				+                                    nn.BatchNorm2D(self.in_channels[i])))
			
 
				+                        else:
			
 
				+                            conv_downsamples.append(
			
 
				+                                nn.Sequential(
			
 
				+                                    L.Conv2d(
			
 
				+                                        self.in_channels[j],
			
 
				+                                        self.in_channels[j],
			
 
				+                                        kernel_size=3,
			
 
				+                                        stride=2,
			
 
				+                                        padding=1,
			
 
				+                                        groups=self.in_channels[j],
			
 
				+                                        bias=False, ),
			
 
				+                                    nn.BatchNorm2D(self.in_channels[j]),
			
 
				+                                    L.Conv2d(
			
 
				+                                        self.in_channels[j],
			
 
				+                                        self.in_channels[j],
			
 
				+                                        kernel_size=1,
			
 
				+                                        stride=1,
			
 
				+                                        padding=0,
			
 
				+                                        bias=False, ),
			
 
				+                                    nn.BatchNorm2D(self.in_channels[j]),
			
 
				+                                    nn.ReLU()))
			
 
				+
			
 
				+                    fuse_layer.append(nn.Sequential(*conv_downsamples))
			
 
				+            fuse_layers.append(nn.LayerList(fuse_layer))
			
 
				+
			
 
				+        return nn.LayerList(fuse_layers)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.num_branches == 1:
			
 
				+            return [self.layers[0](x[0])]
			
 
				+        if self.module_type == 'LITE':
			
 
				+            out = self.layers(x)
			
 
				+        elif self.module_type == 'NAIVE':
			
 
				+            for i in range(self.num_branches):
			
 
				+                x[i] = self.layers[i](x[i])
			
 
				+            out = x
			
 
				+        if self.with_fuse:
			
 
				+            out_fuse = []
			
 
				+            for i in range(len(self.fuse_layers)):
			
 
				+                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
			
 
				+                for j in range(self.num_branches):
			
 
				+                    if j == 0:
			
 
				+                        y += y
			
 
				+                    elif i == j:
			
 
				+                        y += out[j]
			
 
				+                    else:
			
 
				+                        y += self.fuse_layers[i][j](out[j])
			
 
				+                    if i == 0:
			
 
				+                        out[i] = y
			
 
				+                out_fuse.append(self.relu(y))
			
 
				+            out = out_fuse
			
 
				+        elif not self.multiscale_output:
			
 
				+            out = [out[0]]
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class LiteHRNet(nn.Layer):
			
 
				+    """
			
 
				+    @inproceedings{Yulitehrnet21,
			
 
				+    title={Lite-HRNet: A Lightweight High-Resolution Network},
			
 
				+        author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
			
 
				+        booktitle={CVPR},year={2021}
			
 
				+    }
			
 
				+    Args:
			
 
				+        network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
			
 
				+            "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
			
 
				+            "wider_naive": Naive network with wider channels in each block.
			
 
				+            "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
			
 
				+            "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
			
 
				+        freeze_at (int): the stage to freeze
			
 
				+        freeze_norm (bool): whether to freeze norm in HRNet
			
 
				+        norm_decay (float): weight decay for normalization layer weights
			
 
				+        return_idx (List): the stage to return
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 network_type,
			
 
				+                 freeze_at=0,
			
 
				+                 freeze_norm=True,
			
 
				+                 norm_decay=0.,
			
 
				+                 return_idx=[0, 1, 2, 3]):
			
 
				+        super(LiteHRNet, self).__init__()
			
 
				+        if isinstance(return_idx, Integral):
			
 
				+            return_idx = [return_idx]
			
 
				+        assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
			
 
				+            "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
			
 
				+        assert len(return_idx) > 0, "need one or more return index"
			
 
				+        self.freeze_at = freeze_at
			
 
				+        self.freeze_norm = freeze_norm
			
 
				+        self.norm_decay = norm_decay
			
 
				+        self.return_idx = return_idx
			
 
				+        self.norm_type = 'bn'
			
 
				+
			
 
				+        self.module_configs = {
			
 
				+            "lite_18": {
			
 
				+                "num_modules": [2, 4, 2],
			
 
				+                "num_branches": [2, 3, 4],
			
 
				+                "num_blocks": [2, 2, 2],
			
 
				+                "module_type": ["LITE", "LITE", "LITE"],
			
 
				+                "reduce_ratios": [8, 8, 8],
			
 
				+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
			
 
				+            },
			
 
				+            "lite_30": {
			
 
				+                "num_modules": [3, 8, 3],
			
 
				+                "num_branches": [2, 3, 4],
			
 
				+                "num_blocks": [2, 2, 2],
			
 
				+                "module_type": ["LITE", "LITE", "LITE"],
			
 
				+                "reduce_ratios": [8, 8, 8],
			
 
				+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
			
 
				+            },
			
 
				+            "naive": {
			
 
				+                "num_modules": [2, 4, 2],
			
 
				+                "num_branches": [2, 3, 4],
			
 
				+                "num_blocks": [2, 2, 2],
			
 
				+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
			
 
				+                "reduce_ratios": [1, 1, 1],
			
 
				+                "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
			
 
				+            },
			
 
				+            "wider_naive": {
			
 
				+                "num_modules": [2, 4, 2],
			
 
				+                "num_branches": [2, 3, 4],
			
 
				+                "num_blocks": [2, 2, 2],
			
 
				+                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
			
 
				+                "reduce_ratios": [1, 1, 1],
			
 
				+                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
			
 
				+            },
			
 
				+        }
			
 
				+
			
 
				+        self.stages_config = self.module_configs[network_type]
			
 
				+
			
 
				+        self.stem = Stem(3, 32, 32, 1)
			
 
				+        num_channels_pre_layer = [32]
			
 
				+        for stage_idx in range(3):
			
 
				+            num_channels = self.stages_config["num_channels"][stage_idx]
			
 
				+            setattr(self, 'transition{}'.format(stage_idx),
			
 
				+                    self._make_transition_layer(num_channels_pre_layer,
			
 
				+                                                num_channels, self.freeze_norm,
			
 
				+                                                self.norm_decay))
			
 
				+            stage, num_channels_pre_layer = self._make_stage(
			
 
				+                self.stages_config, stage_idx, num_channels, True,
			
 
				+                self.freeze_norm, self.norm_decay)
			
 
				+            setattr(self, 'stage{}'.format(stage_idx), stage)
			
 
				+        self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
			
 
				+                                        self.freeze_norm, self.norm_decay)
			
 
				+
			
 
				+    def _make_transition_layer(self,
			
 
				+                               num_channels_pre_layer,
			
 
				+                               num_channels_cur_layer,
			
 
				+                               freeze_norm=False,
			
 
				+                               norm_decay=0.):
			
 
				+        num_branches_pre = len(num_channels_pre_layer)
			
 
				+        num_branches_cur = len(num_channels_cur_layer)
			
 
				+        transition_layers = []
			
 
				+        for i in range(num_branches_cur):
			
 
				+            if i < num_branches_pre:
			
 
				+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
			
 
				+                    transition_layers.append(
			
 
				+                        nn.Sequential(
			
 
				+                            L.Conv2d(
			
 
				+                                num_channels_pre_layer[i],
			
 
				+                                num_channels_pre_layer[i],
			
 
				+                                kernel_size=3,
			
 
				+                                stride=1,
			
 
				+                                padding=1,
			
 
				+                                groups=num_channels_pre_layer[i],
			
 
				+                                bias=False),
			
 
				+                            nn.BatchNorm2D(num_channels_pre_layer[i]),
			
 
				+                            L.Conv2d(
			
 
				+                                num_channels_pre_layer[i],
			
 
				+                                num_channels_cur_layer[i],
			
 
				+                                kernel_size=1,
			
 
				+                                stride=1,
			
 
				+                                padding=0,
			
 
				+                                bias=False, ),
			
 
				+                            nn.BatchNorm2D(num_channels_cur_layer[i]),
			
 
				+                            nn.ReLU()))
			
 
				+                else:
			
 
				+                    transition_layers.append(None)
			
 
				+            else:
			
 
				+                conv_downsamples = []
			
 
				+                for j in range(i + 1 - num_branches_pre):
			
 
				+                    conv_downsamples.append(
			
 
				+                        nn.Sequential(
			
 
				+                            L.Conv2d(
			
 
				+                                num_channels_pre_layer[-1],
			
 
				+                                num_channels_pre_layer[-1],
			
 
				+                                groups=num_channels_pre_layer[-1],
			
 
				+                                kernel_size=3,
			
 
				+                                stride=2,
			
 
				+                                padding=1,
			
 
				+                                bias=False, ),
			
 
				+                            nn.BatchNorm2D(num_channels_pre_layer[-1]),
			
 
				+                            L.Conv2d(
			
 
				+                                num_channels_pre_layer[-1],
			
 
				+                                num_channels_cur_layer[i]
			
 
				+                                if j == i - num_branches_pre else
			
 
				+                                num_channels_pre_layer[-1],
			
 
				+                                kernel_size=1,
			
 
				+                                stride=1,
			
 
				+                                padding=0,
			
 
				+                                bias=False, ),
			
 
				+                            nn.BatchNorm2D(num_channels_cur_layer[i]
			
 
				+                                           if j == i - num_branches_pre else
			
 
				+                                           num_channels_pre_layer[-1]),
			
 
				+                            nn.ReLU()))
			
 
				+                transition_layers.append(nn.Sequential(*conv_downsamples))
			
 
				+        return nn.LayerList(transition_layers)
			
 
				+
			
 
				+    def _make_stage(self,
			
 
				+                    stages_config,
			
 
				+                    stage_idx,
			
 
				+                    in_channels,
			
 
				+                    multiscale_output,
			
 
				+                    freeze_norm=False,
			
 
				+                    norm_decay=0.):
			
 
				+        num_modules = stages_config["num_modules"][stage_idx]
			
 
				+        num_branches = stages_config["num_branches"][stage_idx]
			
 
				+        num_blocks = stages_config["num_blocks"][stage_idx]
			
 
				+        reduce_ratio = stages_config['reduce_ratios'][stage_idx]
			
 
				+        module_type = stages_config['module_type'][stage_idx]
			
 
				+
			
 
				+        modules = []
			
 
				+        for i in range(num_modules):
			
 
				+            if not multiscale_output and i == num_modules - 1:
			
 
				+                reset_multiscale_output = False
			
 
				+            else:
			
 
				+                reset_multiscale_output = True
			
 
				+            modules.append(
			
 
				+                LiteHRNetModule(
			
 
				+                    num_branches,
			
 
				+                    num_blocks,
			
 
				+                    in_channels,
			
 
				+                    reduce_ratio,
			
 
				+                    module_type,
			
 
				+                    multiscale_output=reset_multiscale_output,
			
 
				+                    with_fuse=True,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    norm_decay=norm_decay))
			
 
				+            in_channels = modules[-1].in_channels
			
 
				+        return nn.Sequential(*modules), in_channels
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+        x = self.stem(x)
			
 
				+        y_list = [x]
			
 
				+        for stage_idx in range(3):
			
 
				+            x_list = []
			
 
				+            transition = getattr(self, 'transition{}'.format(stage_idx))
			
 
				+            for j in range(self.stages_config["num_branches"][stage_idx]):
			
 
				+                if transition[j] is not None:
			
 
				+                    if j >= len(y_list):
			
 
				+                        x_list.append(transition[j](y_list[-1]))
			
 
				+                    else:
			
 
				+                        x_list.append(transition[j](y_list[j]))
			
 
				+                else:
			
 
				+                    x_list.append(y_list[j])
			
 
				+            y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
			
 
				+        x = self.head_layer(y_list)
			
 
				+        res = []
			
 
				+        for i, layer in enumerate(x):
			
 
				+            if i == self.freeze_at:
			
 
				+                layer.stop_gradient = True
			
 
				+            if i in self.return_idx:
			
 
				+                res.append(layer)
			
 
				+        return res
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [
			
 
				+            ShapeSpec(
			
 
				+                channels=self._out_channels[i], stride=self._out_strides[i])
			
 
				+            for i in self.return_idx
			
 
				+        ]
			
--- a/paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
+++ b/paddlers/models/ppdet/modeling/backbones/mobilenet_v1.py
@@ -0,0 +1,411 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from numbers import Integral
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['MobileNet']
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 num_groups=1,
			
 
				+                 act='relu',
			
 
				+                 conv_lr=1.,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+        self.act = act
			
 
				+        self._conv = nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=num_groups,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                learning_rate=conv_lr,
			
 
				+                initializer=KaimingNormal(),
			
 
				+                regularizer=L2Decay(conv_decay)),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        param_attr = ParamAttr(regularizer=L2Decay(norm_decay))
			
 
				+        bias_attr = ParamAttr(regularizer=L2Decay(norm_decay))
			
 
				+        if norm_type in ['sync_bn', 'bn']:
			
 
				+            self._batch_norm = nn.BatchNorm2D(
			
 
				+                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self._conv(x)
			
 
				+        x = self._batch_norm(x)
			
 
				+        if self.act == "relu":
			
 
				+            x = F.relu(x)
			
 
				+        elif self.act == "relu6":
			
 
				+            x = F.relu6(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class DepthwiseSeparable(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels1,
			
 
				+                 out_channels2,
			
 
				+                 num_groups,
			
 
				+                 stride,
			
 
				+                 scale,
			
 
				+                 conv_lr=1.,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 name=None):
			
 
				+        super(DepthwiseSeparable, self).__init__()
			
 
				+
			
 
				+        self._depthwise_conv = ConvBNLayer(
			
 
				+            in_channels,
			
 
				+            int(out_channels1 * scale),
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            num_groups=int(num_groups * scale),
			
 
				+            conv_lr=conv_lr,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_decay=norm_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            name=name + "_dw")
			
 
				+
			
 
				+        self._pointwise_conv = ConvBNLayer(
			
 
				+            int(out_channels1 * scale),
			
 
				+            int(out_channels2 * scale),
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            conv_lr=conv_lr,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_decay=norm_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            name=name + "_sep")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self._depthwise_conv(x)
			
 
				+        x = self._pointwise_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ExtraBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels1,
			
 
				+                 out_channels2,
			
 
				+                 num_groups=1,
			
 
				+                 stride=2,
			
 
				+                 conv_lr=1.,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 name=None):
			
 
				+        super(ExtraBlock, self).__init__()
			
 
				+
			
 
				+        self.pointwise_conv = ConvBNLayer(
			
 
				+            in_channels,
			
 
				+            int(out_channels1),
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            num_groups=int(num_groups),
			
 
				+            act='relu6',
			
 
				+            conv_lr=conv_lr,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_decay=norm_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            name=name + "_extra1")
			
 
				+
			
 
				+        self.normal_conv = ConvBNLayer(
			
 
				+            int(out_channels1),
			
 
				+            int(out_channels2),
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            num_groups=int(num_groups),
			
 
				+            act='relu6',
			
 
				+            conv_lr=conv_lr,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_decay=norm_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            name=name + "_extra2")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.pointwise_conv(x)
			
 
				+        x = self.normal_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class MobileNet(nn.Layer):
			
 
				+    __shared__ = ['norm_type']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 conv_decay=0.,
			
 
				+                 scale=1,
			
 
				+                 conv_learning_rate=1.0,
			
 
				+                 feature_maps=[4, 6, 13],
			
 
				+                 with_extra_blocks=False,
			
 
				+                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
			
 
				+                                      [64, 128]]):
			
 
				+        super(MobileNet, self).__init__()
			
 
				+        if isinstance(feature_maps, Integral):
			
 
				+            feature_maps = [feature_maps]
			
 
				+        self.feature_maps = feature_maps
			
 
				+        self.with_extra_blocks = with_extra_blocks
			
 
				+        self.extra_block_filters = extra_block_filters
			
 
				+
			
 
				+        self._out_channels = []
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            in_channels=3,
			
 
				+            out_channels=int(32 * scale),
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            conv_lr=conv_learning_rate,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_decay=norm_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            name="conv1")
			
 
				+
			
 
				+        self.dwsl = []
			
 
				+        dws21 = self.add_sublayer(
			
 
				+            "conv2_1",
			
 
				+            sublayer=DepthwiseSeparable(
			
 
				+                in_channels=int(32 * scale),
			
 
				+                out_channels1=32,
			
 
				+                out_channels2=64,
			
 
				+                num_groups=32,
			
 
				+                stride=1,
			
 
				+                scale=scale,
			
 
				+                conv_lr=conv_learning_rate,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_decay=norm_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                name="conv2_1"))
			
 
				+        self.dwsl.append(dws21)
			
 
				+        self._update_out_channels(
			
 
				+            int(64 * scale), len(self.dwsl), feature_maps)
			
 
				+        dws22 = self.add_sublayer(
			
 
				+            "conv2_2",
			
 
				+            sublayer=DepthwiseSeparable(
			
 
				+                in_channels=int(64 * scale),
			
 
				+                out_channels1=64,
			
 
				+                out_channels2=128,
			
 
				+                num_groups=64,
			
 
				+                stride=2,
			
 
				+                scale=scale,
			
 
				+                conv_lr=conv_learning_rate,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_decay=norm_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                name="conv2_2"))
			
 
				+        self.dwsl.append(dws22)
			
 
				+        self._update_out_channels(
			
 
				+            int(128 * scale), len(self.dwsl), feature_maps)
			
 
				+        # 1/4
			
 
				+        dws31 = self.add_sublayer(
			
 
				+            "conv3_1",
			
 
				+            sublayer=DepthwiseSeparable(
			
 
				+                in_channels=int(128 * scale),
			
 
				+                out_channels1=128,
			
 
				+                out_channels2=128,
			
 
				+                num_groups=128,
			
 
				+                stride=1,
			
 
				+                scale=scale,
			
 
				+                conv_lr=conv_learning_rate,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_decay=norm_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                name="conv3_1"))
			
 
				+        self.dwsl.append(dws31)
			
 
				+        self._update_out_channels(
			
 
				+            int(128 * scale), len(self.dwsl), feature_maps)
			
 
				+        dws32 = self.add_sublayer(
			
 
				+            "conv3_2",
			
 
				+            sublayer=DepthwiseSeparable(
			
 
				+                in_channels=int(128 * scale),
			
 
				+                out_channels1=128,
			
 
				+                out_channels2=256,
			
 
				+                num_groups=128,
			
 
				+                stride=2,
			
 
				+                scale=scale,
			
 
				+                conv_lr=conv_learning_rate,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_decay=norm_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                name="conv3_2"))
			
 
				+        self.dwsl.append(dws32)
			
 
				+        self._update_out_channels(
			
 
				+            int(256 * scale), len(self.dwsl), feature_maps)
			
 
				+        # 1/8
			
 
				+        dws41 = self.add_sublayer(
			
 
				+            "conv4_1",
			
 
				+            sublayer=DepthwiseSeparable(
			
 
				+                in_channels=int(256 * scale),
			
 
				+                out_channels1=256,
			
 
				+                out_channels2=256,
			
 
				+                num_groups=256,
			
 
				+                stride=1,
			
 
				+                scale=scale,
			
 
				+                conv_lr=conv_learning_rate,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_decay=norm_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                name="conv4_1"))
			
 
				+        self.dwsl.append(dws41)
			
 
				+        self._update_out_channels(
			
 
				+            int(256 * scale), len(self.dwsl), feature_maps)
			
 
				+        dws42 = self.add_sublayer(
			
 
				+            "conv4_2",
			
 
				+            sublayer=DepthwiseSeparable(
			
 
				+                in_channels=int(256 * scale),
			
 
				+                out_channels1=256,
			
 
				+                out_channels2=512,
			
 
				+                num_groups=256,
			
 
				+                stride=2,
			
 
				+                scale=scale,
			
 
				+                conv_lr=conv_learning_rate,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_decay=norm_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                name="conv4_2"))
			
 
				+        self.dwsl.append(dws42)
			
 
				+        self._update_out_channels(
			
 
				+            int(512 * scale), len(self.dwsl), feature_maps)
			
 
				+        # 1/16
			
 
				+        for i in range(5):
			
 
				+            tmp = self.add_sublayer(
			
 
				+                "conv5_" + str(i + 1),
			
 
				+                sublayer=DepthwiseSeparable(
			
 
				+                    in_channels=int(512 * scale),
			
 
				+                    out_channels1=512,
			
 
				+                    out_channels2=512,
			
 
				+                    num_groups=512,
			
 
				+                    stride=1,
			
 
				+                    scale=scale,
			
 
				+                    conv_lr=conv_learning_rate,
			
 
				+                    conv_decay=conv_decay,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    norm_type=norm_type,
			
 
				+                    name="conv5_" + str(i + 1)))
			
 
				+            self.dwsl.append(tmp)
			
 
				+            self._update_out_channels(
			
 
				+                int(512 * scale), len(self.dwsl), feature_maps)
			
 
				+        dws56 = self.add_sublayer(
			
 
				+            "conv5_6",
			
 
				+            sublayer=DepthwiseSeparable(
			
 
				+                in_channels=int(512 * scale),
			
 
				+                out_channels1=512,
			
 
				+                out_channels2=1024,
			
 
				+                num_groups=512,
			
 
				+                stride=2,
			
 
				+                scale=scale,
			
 
				+                conv_lr=conv_learning_rate,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_decay=norm_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                name="conv5_6"))
			
 
				+        self.dwsl.append(dws56)
			
 
				+        self._update_out_channels(
			
 
				+            int(1024 * scale), len(self.dwsl), feature_maps)
			
 
				+        # 1/32
			
 
				+        dws6 = self.add_sublayer(
			
 
				+            "conv6",
			
 
				+            sublayer=DepthwiseSeparable(
			
 
				+                in_channels=int(1024 * scale),
			
 
				+                out_channels1=1024,
			
 
				+                out_channels2=1024,
			
 
				+                num_groups=1024,
			
 
				+                stride=1,
			
 
				+                scale=scale,
			
 
				+                conv_lr=conv_learning_rate,
			
 
				+                conv_decay=conv_decay,
			
 
				+                norm_decay=norm_decay,
			
 
				+                norm_type=norm_type,
			
 
				+                name="conv6"))
			
 
				+        self.dwsl.append(dws6)
			
 
				+        self._update_out_channels(
			
 
				+            int(1024 * scale), len(self.dwsl), feature_maps)
			
 
				+
			
 
				+        if self.with_extra_blocks:
			
 
				+            self.extra_blocks = []
			
 
				+            for i, block_filter in enumerate(self.extra_block_filters):
			
 
				+                in_c = 1024 if i == 0 else self.extra_block_filters[i - 1][1]
			
 
				+                conv_extra = self.add_sublayer(
			
 
				+                    "conv7_" + str(i + 1),
			
 
				+                    sublayer=ExtraBlock(
			
 
				+                        in_c,
			
 
				+                        block_filter[0],
			
 
				+                        block_filter[1],
			
 
				+                        conv_lr=conv_learning_rate,
			
 
				+                        conv_decay=conv_decay,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        norm_type=norm_type,
			
 
				+                        name="conv7_" + str(i + 1)))
			
 
				+                self.extra_blocks.append(conv_extra)
			
 
				+                self._update_out_channels(
			
 
				+                    block_filter[1],
			
 
				+                    len(self.dwsl) + len(self.extra_blocks), feature_maps)
			
 
				+
			
 
				+    def _update_out_channels(self, channel, feature_idx, feature_maps):
			
 
				+        if feature_idx in feature_maps:
			
 
				+            self._out_channels.append(channel)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        outs = []
			
 
				+        y = self.conv1(inputs['image'])
			
 
				+        for i, block in enumerate(self.dwsl):
			
 
				+            y = block(y)
			
 
				+            if i + 1 in self.feature_maps:
			
 
				+                outs.append(y)
			
 
				+
			
 
				+        if not self.with_extra_blocks:
			
 
				+            return outs
			
 
				+
			
 
				+        y = outs[-1]
			
 
				+        for i, block in enumerate(self.extra_blocks):
			
 
				+            idx = i + len(self.dwsl)
			
 
				+            y = block(y)
			
 
				+            if idx + 1 in self.feature_maps:
			
 
				+                outs.append(y)
			
 
				+        return outs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=c) for c in self._out_channels]
			
--- a/paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
+++ b/paddlers/models/ppdet/modeling/backbones/mobilenet_v3.py
@@ -0,0 +1,479 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from numbers import Integral
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['MobileNetV3']
			
 
				+
			
 
				+
			
 
				+def make_divisible(v, divisor=8, min_value=None):
			
 
				+    if min_value is None:
			
 
				+        min_value = divisor
			
 
				+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
			
 
				+    if new_v < 0.9 * v:
			
 
				+        new_v += divisor
			
 
				+    return new_v
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_c,
			
 
				+                 out_c,
			
 
				+                 filter_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 num_groups=1,
			
 
				+                 act=None,
			
 
				+                 lr_mult=1.,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 name=""):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+        self.act = act
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=in_c,
			
 
				+            out_channels=out_c,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=num_groups,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        norm_lr = 0. if freeze_norm else lr_mult
			
 
				+        param_attr = ParamAttr(
			
 
				+            learning_rate=norm_lr,
			
 
				+            regularizer=L2Decay(norm_decay),
			
 
				+            trainable=False if freeze_norm else True)
			
 
				+        bias_attr = ParamAttr(
			
 
				+            learning_rate=norm_lr,
			
 
				+            regularizer=L2Decay(norm_decay),
			
 
				+            trainable=False if freeze_norm else True)
			
 
				+        global_stats = True if freeze_norm else None
			
 
				+        if norm_type in ['sync_bn', 'bn']:
			
 
				+            self.bn = nn.BatchNorm2D(
			
 
				+                out_c,
			
 
				+                weight_attr=param_attr,
			
 
				+                bias_attr=bias_attr,
			
 
				+                use_global_stats=global_stats)
			
 
				+        norm_params = self.bn.parameters()
			
 
				+        if freeze_norm:
			
 
				+            for param in norm_params:
			
 
				+                param.stop_gradient = True
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        if self.act is not None:
			
 
				+            if self.act == "relu":
			
 
				+                x = F.relu(x)
			
 
				+            elif self.act == "relu6":
			
 
				+                x = F.relu6(x)
			
 
				+            elif self.act == "hard_swish":
			
 
				+                x = F.hardswish(x)
			
 
				+            else:
			
 
				+                raise NotImplementedError(
			
 
				+                    "The activation function is selected incorrectly.")
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ResidualUnit(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_c,
			
 
				+                 mid_c,
			
 
				+                 out_c,
			
 
				+                 filter_size,
			
 
				+                 stride,
			
 
				+                 use_se,
			
 
				+                 lr_mult,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 act=None,
			
 
				+                 return_list=False,
			
 
				+                 name=''):
			
 
				+        super(ResidualUnit, self).__init__()
			
 
				+        self.if_shortcut = stride == 1 and in_c == out_c
			
 
				+        self.use_se = use_se
			
 
				+        self.return_list = return_list
			
 
				+
			
 
				+        self.expand_conv = ConvBNLayer(
			
 
				+            in_c=in_c,
			
 
				+            out_c=mid_c,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            act=act,
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_expand")
			
 
				+        self.bottleneck_conv = ConvBNLayer(
			
 
				+            in_c=mid_c,
			
 
				+            out_c=mid_c,
			
 
				+            filter_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=int((filter_size - 1) // 2),
			
 
				+            num_groups=mid_c,
			
 
				+            act=act,
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_depthwise")
			
 
				+        if self.use_se:
			
 
				+            self.mid_se = SEModule(
			
 
				+                mid_c, lr_mult, conv_decay, name=name + "_se")
			
 
				+        self.linear_conv = ConvBNLayer(
			
 
				+            in_c=mid_c,
			
 
				+            out_c=out_c,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            act=None,
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_linear")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.expand_conv(inputs)
			
 
				+        x = self.bottleneck_conv(y)
			
 
				+        if self.use_se:
			
 
				+            x = self.mid_se(x)
			
 
				+        x = self.linear_conv(x)
			
 
				+        if self.if_shortcut:
			
 
				+            x = paddle.add(inputs, x)
			
 
				+        if self.return_list:
			
 
				+            return [y, x]
			
 
				+        else:
			
 
				+            return x
			
 
				+
			
 
				+
			
 
				+class SEModule(nn.Layer):
			
 
				+    def __init__(self, channel, lr_mult, conv_decay, reduction=4, name=""):
			
 
				+        super(SEModule, self).__init__()
			
 
				+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
			
 
				+        mid_channels = int(channel // reduction)
			
 
				+        self.conv1 = nn.Conv2D(
			
 
				+            in_channels=channel,
			
 
				+            out_channels=mid_channels,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)))
			
 
				+        self.conv2 = nn.Conv2D(
			
 
				+            in_channels=mid_channels,
			
 
				+            out_channels=channel,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        outputs = self.avg_pool(inputs)
			
 
				+        outputs = self.conv1(outputs)
			
 
				+        outputs = F.relu(outputs)
			
 
				+        outputs = self.conv2(outputs)
			
 
				+        outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5)
			
 
				+        return paddle.multiply(x=inputs, y=outputs)
			
 
				+
			
 
				+
			
 
				+class ExtraBlockDW(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_c,
			
 
				+                 ch_1,
			
 
				+                 ch_2,
			
 
				+                 stride,
			
 
				+                 lr_mult,
			
 
				+                 conv_decay=0.,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=False,
			
 
				+                 name=None):
			
 
				+        super(ExtraBlockDW, self).__init__()
			
 
				+        self.pointwise_conv = ConvBNLayer(
			
 
				+            in_c=in_c,
			
 
				+            out_c=ch_1,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding='SAME',
			
 
				+            act='relu6',
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_extra1")
			
 
				+        self.depthwise_conv = ConvBNLayer(
			
 
				+            in_c=ch_1,
			
 
				+            out_c=ch_2,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            padding='SAME',
			
 
				+            num_groups=int(ch_1),
			
 
				+            act='relu6',
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_extra2_dw")
			
 
				+        self.normal_conv = ConvBNLayer(
			
 
				+            in_c=ch_2,
			
 
				+            out_c=ch_2,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding='SAME',
			
 
				+            act='relu6',
			
 
				+            lr_mult=lr_mult,
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name=name + "_extra2_sep")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.pointwise_conv(inputs)
			
 
				+        x = self.depthwise_conv(x)
			
 
				+        x = self.normal_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class MobileNetV3(nn.Layer):
			
 
				+    __shared__ = ['norm_type']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 scale=1.0,
			
 
				+                 model_name="large",
			
 
				+                 feature_maps=[6, 12, 15],
			
 
				+                 with_extra_blocks=False,
			
 
				+                 extra_block_filters=[[256, 512], [128, 256], [128, 256],
			
 
				+                                      [64, 128]],
			
 
				+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
			
 
				+                 conv_decay=0.0,
			
 
				+                 multiplier=1.0,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.0,
			
 
				+                 freeze_norm=False):
			
 
				+        super(MobileNetV3, self).__init__()
			
 
				+        if isinstance(feature_maps, Integral):
			
 
				+            feature_maps = [feature_maps]
			
 
				+        if norm_type == 'sync_bn' and freeze_norm:
			
 
				+            raise ValueError(
			
 
				+                "The norm_type should not be sync_bn when freeze_norm is True")
			
 
				+        self.feature_maps = feature_maps
			
 
				+        self.with_extra_blocks = with_extra_blocks
			
 
				+        self.extra_block_filters = extra_block_filters
			
 
				+
			
 
				+        inplanes = 16
			
 
				+        if model_name == "large":
			
 
				+            self.cfg = [
			
 
				+                # k, exp, c,  se,     nl,  s,
			
 
				+                [3, 16, 16, False, "relu", 1],
			
 
				+                [3, 64, 24, False, "relu", 2],
			
 
				+                [3, 72, 24, False, "relu", 1],
			
 
				+                [5, 72, 40, True, "relu", 2],  # RCNN output
			
 
				+                [5, 120, 40, True, "relu", 1],
			
 
				+                [5, 120, 40, True, "relu", 1],  # YOLOv3 output
			
 
				+                [3, 240, 80, False, "hard_swish", 2],  # RCNN output
			
 
				+                [3, 200, 80, False, "hard_swish", 1],
			
 
				+                [3, 184, 80, False, "hard_swish", 1],
			
 
				+                [3, 184, 80, False, "hard_swish", 1],
			
 
				+                [3, 480, 112, True, "hard_swish", 1],
			
 
				+                [3, 672, 112, True, "hard_swish", 1],  # YOLOv3 output
			
 
				+                [5, 672, 160, True, "hard_swish",
			
 
				+                 2],  # SSD/SSDLite/RCNN output
			
 
				+                [5, 960, 160, True, "hard_swish", 1],
			
 
				+                [5, 960, 160, True, "hard_swish", 1],  # YOLOv3 output
			
 
				+            ]
			
 
				+        elif model_name == "small":
			
 
				+            self.cfg = [
			
 
				+                # k, exp, c,  se,     nl,  s,
			
 
				+                [3, 16, 16, True, "relu", 2],
			
 
				+                [3, 72, 24, False, "relu", 2],  # RCNN output
			
 
				+                [3, 88, 24, False, "relu", 1],  # YOLOv3 output
			
 
				+                [5, 96, 40, True, "hard_swish", 2],  # RCNN output
			
 
				+                [5, 240, 40, True, "hard_swish", 1],
			
 
				+                [5, 240, 40, True, "hard_swish", 1],
			
 
				+                [5, 120, 48, True, "hard_swish", 1],
			
 
				+                [5, 144, 48, True, "hard_swish", 1],  # YOLOv3 output
			
 
				+                [5, 288, 96, True, "hard_swish", 2],  # SSD/SSDLite/RCNN output
			
 
				+                [5, 576, 96, True, "hard_swish", 1],
			
 
				+                [5, 576, 96, True, "hard_swish", 1],  # YOLOv3 output
			
 
				+            ]
			
 
				+        else:
			
 
				+            raise NotImplementedError(
			
 
				+                "mode[{}_model] is not implemented!".format(model_name))
			
 
				+
			
 
				+        if multiplier != 1.0:
			
 
				+            self.cfg[-3][2] = int(self.cfg[-3][2] * multiplier)
			
 
				+            self.cfg[-2][1] = int(self.cfg[-2][1] * multiplier)
			
 
				+            self.cfg[-2][2] = int(self.cfg[-2][2] * multiplier)
			
 
				+            self.cfg[-1][1] = int(self.cfg[-1][1] * multiplier)
			
 
				+            self.cfg[-1][2] = int(self.cfg[-1][2] * multiplier)
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            in_c=3,
			
 
				+            out_c=make_divisible(inplanes * scale),
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            num_groups=1,
			
 
				+            act="hard_swish",
			
 
				+            lr_mult=lr_mult_list[0],
			
 
				+            conv_decay=conv_decay,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            name="conv1")
			
 
				+
			
 
				+        self._out_channels = []
			
 
				+        self.block_list = []
			
 
				+        i = 0
			
 
				+        inplanes = make_divisible(inplanes * scale)
			
 
				+        for (k, exp, c, se, nl, s) in self.cfg:
			
 
				+            lr_idx = min(i // 3, len(lr_mult_list) - 1)
			
 
				+            lr_mult = lr_mult_list[lr_idx]
			
 
				+
			
 
				+            # for SSD/SSDLite, first head input is after ResidualUnit expand_conv
			
 
				+            return_list = self.with_extra_blocks and i + 2 in self.feature_maps
			
 
				+
			
 
				+            block = self.add_sublayer(
			
 
				+                "conv" + str(i + 2),
			
 
				+                sublayer=ResidualUnit(
			
 
				+                    in_c=inplanes,
			
 
				+                    mid_c=make_divisible(scale * exp),
			
 
				+                    out_c=make_divisible(scale * c),
			
 
				+                    filter_size=k,
			
 
				+                    stride=s,
			
 
				+                    use_se=se,
			
 
				+                    act=nl,
			
 
				+                    lr_mult=lr_mult,
			
 
				+                    conv_decay=conv_decay,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    return_list=return_list,
			
 
				+                    name="conv" + str(i + 2)))
			
 
				+            self.block_list.append(block)
			
 
				+            inplanes = make_divisible(scale * c)
			
 
				+            i += 1
			
 
				+            self._update_out_channels(
			
 
				+                make_divisible(scale * exp)
			
 
				+                if return_list else inplanes, i + 1, feature_maps)
			
 
				+
			
 
				+        if self.with_extra_blocks:
			
 
				+            self.extra_block_list = []
			
 
				+            extra_out_c = make_divisible(scale * self.cfg[-1][1])
			
 
				+            lr_idx = min(i // 3, len(lr_mult_list) - 1)
			
 
				+            lr_mult = lr_mult_list[lr_idx]
			
 
				+
			
 
				+            conv_extra = self.add_sublayer(
			
 
				+                "conv" + str(i + 2),
			
 
				+                sublayer=ConvBNLayer(
			
 
				+                    in_c=inplanes,
			
 
				+                    out_c=extra_out_c,
			
 
				+                    filter_size=1,
			
 
				+                    stride=1,
			
 
				+                    padding=0,
			
 
				+                    num_groups=1,
			
 
				+                    act="hard_swish",
			
 
				+                    lr_mult=lr_mult,
			
 
				+                    conv_decay=conv_decay,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    name="conv" + str(i + 2)))
			
 
				+            self.extra_block_list.append(conv_extra)
			
 
				+            i += 1
			
 
				+            self._update_out_channels(extra_out_c, i + 1, feature_maps)
			
 
				+
			
 
				+            for j, block_filter in enumerate(self.extra_block_filters):
			
 
				+                in_c = extra_out_c if j == 0 else self.extra_block_filters[
			
 
				+                    j - 1][1]
			
 
				+                conv_extra = self.add_sublayer(
			
 
				+                    "conv" + str(i + 2),
			
 
				+                    sublayer=ExtraBlockDW(
			
 
				+                        in_c,
			
 
				+                        block_filter[0],
			
 
				+                        block_filter[1],
			
 
				+                        stride=2,
			
 
				+                        lr_mult=lr_mult,
			
 
				+                        conv_decay=conv_decay,
			
 
				+                        norm_type=norm_type,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        name='conv' + str(i + 2)))
			
 
				+                self.extra_block_list.append(conv_extra)
			
 
				+                i += 1
			
 
				+                self._update_out_channels(block_filter[1], i + 1, feature_maps)
			
 
				+
			
 
				+    def _update_out_channels(self, channel, feature_idx, feature_maps):
			
 
				+        if feature_idx in feature_maps:
			
 
				+            self._out_channels.append(channel)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.conv1(inputs['image'])
			
 
				+        outs = []
			
 
				+        for idx, block in enumerate(self.block_list):
			
 
				+            x = block(x)
			
 
				+            if idx + 2 in self.feature_maps:
			
 
				+                if isinstance(x, list):
			
 
				+                    outs.append(x[0])
			
 
				+                    x = x[1]
			
 
				+                else:
			
 
				+                    outs.append(x)
			
 
				+
			
 
				+        if not self.with_extra_blocks:
			
 
				+            return outs
			
 
				+
			
 
				+        for i, block in enumerate(self.extra_block_list):
			
 
				+            idx = i + len(self.block_list)
			
 
				+            x = block(x)
			
 
				+            if idx + 2 in self.feature_maps:
			
 
				+                outs.append(x)
			
 
				+        return outs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=c) for c in self._out_channels]
			
--- a/paddlers/models/ppdet/modeling/backbones/name_adapter.py
+++ b/paddlers/models/ppdet/modeling/backbones/name_adapter.py
@@ -0,0 +1,69 @@
 
				+class NameAdapter(object):
			
 
				+    """Fix the backbones variable names for pretrained weight"""
			
 
				+
			
 
				+    def __init__(self, model):
			
 
				+        super(NameAdapter, self).__init__()
			
 
				+        self.model = model
			
 
				+
			
 
				+    @property
			
 
				+    def model_type(self):
			
 
				+        return getattr(self.model, '_model_type', '')
			
 
				+
			
 
				+    @property
			
 
				+    def variant(self):
			
 
				+        return getattr(self.model, 'variant', '')
			
 
				+
			
 
				+    def fix_conv_norm_name(self, name):
			
 
				+        if name == "conv1":
			
 
				+            bn_name = "bn_" + name
			
 
				+        else:
			
 
				+            bn_name = "bn" + name[3:]
			
 
				+        # the naming rule is same as pretrained weight
			
 
				+        if self.model_type == 'SEResNeXt':
			
 
				+            bn_name = name + "_bn"
			
 
				+        return bn_name
			
 
				+
			
 
				+    def fix_shortcut_name(self, name):
			
 
				+        if self.model_type == 'SEResNeXt':
			
 
				+            name = 'conv' + name + '_prj'
			
 
				+        return name
			
 
				+
			
 
				+    def fix_bottleneck_name(self, name):
			
 
				+        if self.model_type == 'SEResNeXt':
			
 
				+            conv_name1 = 'conv' + name + '_x1'
			
 
				+            conv_name2 = 'conv' + name + '_x2'
			
 
				+            conv_name3 = 'conv' + name + '_x3'
			
 
				+            shortcut_name = name
			
 
				+        else:
			
 
				+            conv_name1 = name + "_branch2a"
			
 
				+            conv_name2 = name + "_branch2b"
			
 
				+            conv_name3 = name + "_branch2c"
			
 
				+            shortcut_name = name + "_branch1"
			
 
				+        return conv_name1, conv_name2, conv_name3, shortcut_name
			
 
				+
			
 
				+    def fix_basicblock_name(self, name):
			
 
				+        if self.model_type == 'SEResNeXt':
			
 
				+            conv_name1 = 'conv' + name + '_x1'
			
 
				+            conv_name2 = 'conv' + name + '_x2'
			
 
				+            shortcut_name = name
			
 
				+        else:
			
 
				+            conv_name1 = name + "_branch2a"
			
 
				+            conv_name2 = name + "_branch2b"
			
 
				+            shortcut_name = name + "_branch1"
			
 
				+        return conv_name1, conv_name2, shortcut_name
			
 
				+
			
 
				+    def fix_layer_warp_name(self, stage_num, count, i):
			
 
				+        name = 'res' + str(stage_num)
			
 
				+        if count > 10 and stage_num == 4:
			
 
				+            if i == 0:
			
 
				+                conv_name = name + "a"
			
 
				+            else:
			
 
				+                conv_name = name + "b" + str(i)
			
 
				+        else:
			
 
				+            conv_name = name + chr(ord("a") + i)
			
 
				+        if self.model_type == 'SEResNeXt':
			
 
				+            conv_name = str(stage_num + 2) + '_' + str(i + 1)
			
 
				+        return conv_name
			
 
				+
			
 
				+    def fix_c1_stage_name(self):
			
 
				+        return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
			
--- a/paddlers/models/ppdet/modeling/backbones/res2net.py
+++ b/paddlers/models/ppdet/modeling/backbones/res2net.py
@@ -0,0 +1,358 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from numbers import Integral
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+from .resnet import ConvNormLayer
			
 
				+
			
 
				+__all__ = ['Res2Net', 'Res2NetC5']
			
 
				+
			
 
				+Res2Net_cfg = {
			
 
				+    50: [3, 4, 6, 3],
			
 
				+    101: [3, 4, 23, 3],
			
 
				+    152: [3, 8, 36, 3],
			
 
				+    200: [3, 12, 48, 3]
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class BottleNeck(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 stride,
			
 
				+                 shortcut,
			
 
				+                 width,
			
 
				+                 scales=4,
			
 
				+                 variant='b',
			
 
				+                 groups=1,
			
 
				+                 lr=1.0,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 dcn_v2=False):
			
 
				+        super(BottleNeck, self).__init__()
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+        self.scales = scales
			
 
				+        self.stride = stride
			
 
				+        if not shortcut:
			
 
				+            if variant == 'd' and stride == 2:
			
 
				+                self.branch1 = nn.Sequential()
			
 
				+                self.branch1.add_sublayer(
			
 
				+                    'pool',
			
 
				+                    nn.AvgPool2D(
			
 
				+                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
			
 
				+                self.branch1.add_sublayer(
			
 
				+                    'conv',
			
 
				+                    ConvNormLayer(
			
 
				+                        ch_in=ch_in,
			
 
				+                        ch_out=ch_out,
			
 
				+                        filter_size=1,
			
 
				+                        stride=1,
			
 
				+                        norm_type=norm_type,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        lr=lr))
			
 
				+            else:
			
 
				+                self.branch1 = ConvNormLayer(
			
 
				+                    ch_in=ch_in,
			
 
				+                    ch_out=ch_out,
			
 
				+                    filter_size=1,
			
 
				+                    stride=stride,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    lr=lr)
			
 
				+
			
 
				+        self.branch2a = ConvNormLayer(
			
 
				+            ch_in=ch_in,
			
 
				+            ch_out=width * scales,
			
 
				+            filter_size=1,
			
 
				+            stride=stride if variant == 'a' else 1,
			
 
				+            groups=1,
			
 
				+            act='relu',
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            lr=lr)
			
 
				+
			
 
				+        self.branch2b = nn.LayerList([
			
 
				+            ConvNormLayer(
			
 
				+                ch_in=width,
			
 
				+                ch_out=width,
			
 
				+                filter_size=3,
			
 
				+                stride=1 if variant == 'a' else stride,
			
 
				+                groups=groups,
			
 
				+                act='relu',
			
 
				+                norm_type=norm_type,
			
 
				+                norm_decay=norm_decay,
			
 
				+                freeze_norm=freeze_norm,
			
 
				+                lr=lr,
			
 
				+                dcn_v2=dcn_v2) for _ in range(self.scales - 1)
			
 
				+        ])
			
 
				+
			
 
				+        self.branch2c = ConvNormLayer(
			
 
				+            ch_in=width * scales,
			
 
				+            ch_out=ch_out,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            lr=lr)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+
			
 
				+        out = self.branch2a(inputs)
			
 
				+        feature_split = paddle.split(out, self.scales, 1)
			
 
				+        out_split = []
			
 
				+        for i in range(self.scales - 1):
			
 
				+            if i == 0 or self.stride == 2:
			
 
				+                out_split.append(self.branch2b[i](feature_split[i]))
			
 
				+            else:
			
 
				+                out_split.append(self.branch2b[i](paddle.add(feature_split[i],
			
 
				+                                                             out_split[-1])))
			
 
				+        if self.stride == 1:
			
 
				+            out_split.append(feature_split[-1])
			
 
				+        else:
			
 
				+            out_split.append(
			
 
				+                F.avg_pool2d(feature_split[-1], 3, self.stride, 1))
			
 
				+        out = self.branch2c(paddle.concat(out_split, 1))
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.branch1(inputs)
			
 
				+
			
 
				+        out = paddle.add(out, short)
			
 
				+        out = F.relu(out)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class Blocks(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 count,
			
 
				+                 stage_num,
			
 
				+                 width,
			
 
				+                 scales=4,
			
 
				+                 variant='b',
			
 
				+                 groups=1,
			
 
				+                 lr=1.0,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 dcn_v2=False):
			
 
				+        super(Blocks, self).__init__()
			
 
				+
			
 
				+        self.blocks = nn.Sequential()
			
 
				+        for i in range(count):
			
 
				+            self.blocks.add_sublayer(
			
 
				+                str(i),
			
 
				+                BottleNeck(
			
 
				+                    ch_in=ch_in if i == 0 else ch_out,
			
 
				+                    ch_out=ch_out,
			
 
				+                    stride=2 if i == 0 and stage_num != 2 else 1,
			
 
				+                    shortcut=False if i == 0 else True,
			
 
				+                    width=width * (2**(stage_num - 2)),
			
 
				+                    scales=scales,
			
 
				+                    variant=variant,
			
 
				+                    groups=groups,
			
 
				+                    lr=lr,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    dcn_v2=dcn_v2))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        return self.blocks(inputs)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class Res2Net(nn.Layer):
			
 
				+    """
			
 
				+    Res2Net, see https://arxiv.org/abs/1904.01169
			
 
				+    Args:
			
 
				+        depth (int): Res2Net depth, should be 50, 101, 152, 200.
			
 
				+        width (int): Res2Net width
			
 
				+        scales (int): Res2Net scale
			
 
				+        variant (str): Res2Net variant, supports 'a', 'b', 'c', 'd' currently
			
 
				+        lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
			
 
				+                             lower learning rate ratio is need for pretrained model
			
 
				+                             got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
			
 
				+        groups (int): The groups number of the Conv Layer.
			
 
				+        norm_type (str): normalization type, 'bn' or 'sync_bn'
			
 
				+        norm_decay (float): weight decay for normalization layer weights
			
 
				+        freeze_norm (bool): freeze normalization layers
			
 
				+        freeze_at (int): freeze the backbone at which stage
			
 
				+        return_idx (list): index of stages whose feature maps are returned,
			
 
				+                           index 0 stands for res2
			
 
				+        dcn_v2_stages (list): index of stages who select deformable conv v2
			
 
				+        num_stages (int): number of stages created
			
 
				+
			
 
				+    """
			
 
				+    __shared__ = ['norm_type']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 depth=50,
			
 
				+                 width=26,
			
 
				+                 scales=4,
			
 
				+                 variant='b',
			
 
				+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
			
 
				+                 groups=1,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 freeze_at=0,
			
 
				+                 return_idx=[0, 1, 2, 3],
			
 
				+                 dcn_v2_stages=[-1],
			
 
				+                 num_stages=4):
			
 
				+        super(Res2Net, self).__init__()
			
 
				+
			
 
				+        self._model_type = 'Res2Net' if groups == 1 else 'Res2NeXt'
			
 
				+
			
 
				+        assert depth in [50, 101, 152, 200], \
			
 
				+            "depth {} not in [50, 101, 152, 200]"
			
 
				+        assert variant in ['a', 'b', 'c', 'd'], "invalid Res2Net variant"
			
 
				+        assert num_stages >= 1 and num_stages <= 4
			
 
				+
			
 
				+        self.depth = depth
			
 
				+        self.variant = variant
			
 
				+        self.norm_type = norm_type
			
 
				+        self.norm_decay = norm_decay
			
 
				+        self.freeze_norm = freeze_norm
			
 
				+        self.freeze_at = freeze_at
			
 
				+        if isinstance(return_idx, Integral):
			
 
				+            return_idx = [return_idx]
			
 
				+        assert max(return_idx) < num_stages, \
			
 
				+            'the maximum return index must smaller than num_stages, ' \
			
 
				+            'but received maximum return index is {} and num_stages ' \
			
 
				+            'is {}'.format(max(return_idx), num_stages)
			
 
				+        self.return_idx = return_idx
			
 
				+        self.num_stages = num_stages
			
 
				+        assert len(lr_mult_list) == 4, \
			
 
				+            "lr_mult_list length must be 4 but got {}".format(len(lr_mult_list))
			
 
				+        if isinstance(dcn_v2_stages, Integral):
			
 
				+            dcn_v2_stages = [dcn_v2_stages]
			
 
				+        assert max(dcn_v2_stages) < num_stages
			
 
				+        self.dcn_v2_stages = dcn_v2_stages
			
 
				+
			
 
				+        block_nums = Res2Net_cfg[depth]
			
 
				+
			
 
				+        # C1 stage
			
 
				+        if self.variant in ['c', 'd']:
			
 
				+            conv_def = [
			
 
				+                [3, 32, 3, 2, "conv1_1"],
			
 
				+                [32, 32, 3, 1, "conv1_2"],
			
 
				+                [32, 64, 3, 1, "conv1_3"],
			
 
				+            ]
			
 
				+        else:
			
 
				+            conv_def = [[3, 64, 7, 2, "conv1"]]
			
 
				+        self.res1 = nn.Sequential()
			
 
				+        for (c_in, c_out, k, s, _name) in conv_def:
			
 
				+            self.res1.add_sublayer(
			
 
				+                _name,
			
 
				+                ConvNormLayer(
			
 
				+                    ch_in=c_in,
			
 
				+                    ch_out=c_out,
			
 
				+                    filter_size=k,
			
 
				+                    stride=s,
			
 
				+                    groups=1,
			
 
				+                    act='relu',
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    lr=1.0))
			
 
				+
			
 
				+        self._in_channels = [64, 256, 512, 1024]
			
 
				+        self._out_channels = [256, 512, 1024, 2048]
			
 
				+        self._out_strides = [4, 8, 16, 32]
			
 
				+
			
 
				+        # C2-C5 stages
			
 
				+        self.res_layers = []
			
 
				+        for i in range(num_stages):
			
 
				+            lr_mult = lr_mult_list[i]
			
 
				+            stage_num = i + 2
			
 
				+            self.res_layers.append(
			
 
				+                self.add_sublayer(
			
 
				+                    "res{}".format(stage_num),
			
 
				+                    Blocks(
			
 
				+                        self._in_channels[i],
			
 
				+                        self._out_channels[i],
			
 
				+                        count=block_nums[i],
			
 
				+                        stage_num=stage_num,
			
 
				+                        width=width,
			
 
				+                        scales=scales,
			
 
				+                        groups=groups,
			
 
				+                        lr=lr_mult,
			
 
				+                        norm_type=norm_type,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        dcn_v2=(i in self.dcn_v2_stages))))
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [
			
 
				+            ShapeSpec(
			
 
				+                channels=self._out_channels[i], stride=self._out_strides[i])
			
 
				+            for i in self.return_idx
			
 
				+        ]
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+        res1 = self.res1(x)
			
 
				+        x = F.max_pool2d(res1, kernel_size=3, stride=2, padding=1)
			
 
				+        outs = []
			
 
				+        for idx, stage in enumerate(self.res_layers):
			
 
				+            x = stage(x)
			
 
				+            if idx == self.freeze_at:
			
 
				+                x.stop_gradient = True
			
 
				+            if idx in self.return_idx:
			
 
				+                outs.append(x)
			
 
				+        return outs
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class Res2NetC5(nn.Layer):
			
 
				+    def __init__(self, depth=50, width=26, scales=4, variant='b'):
			
 
				+        super(Res2NetC5, self).__init__()
			
 
				+        feat_in, feat_out = [1024, 2048]
			
 
				+        self.res5 = Blocks(
			
 
				+            feat_in,
			
 
				+            feat_out,
			
 
				+            count=3,
			
 
				+            stage_num=5,
			
 
				+            width=width,
			
 
				+            scales=scales,
			
 
				+            variant=variant)
			
 
				+        self.feat_out = feat_out
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(
			
 
				+            channels=self.feat_out,
			
 
				+            stride=32, )]
			
 
				+
			
 
				+    def forward(self, roi_feat, stage=0):
			
 
				+        y = self.res5(roi_feat)
			
 
				+        return y
			
--- a/paddlers/models/ppdet/modeling/backbones/resnet.py
+++ b/paddlers/models/ppdet/modeling/backbones/resnet.py
@@ -0,0 +1,609 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import math
			
 
				+from numbers import Integral
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn.initializer import Constant
			
 
				+from paddle.vision.ops import DeformConv2D
			
 
				+from .name_adapter import NameAdapter
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+
			
 
				+__all__ = ['ResNet', 'Res5Head', 'Blocks', 'BasicBlock', 'BottleNeck']
			
 
				+
			
 
				+ResNet_cfg = {
			
 
				+    18: [2, 2, 2, 2],
			
 
				+    34: [3, 4, 6, 3],
			
 
				+    50: [3, 4, 6, 3],
			
 
				+    101: [3, 4, 23, 3],
			
 
				+    152: [3, 8, 36, 3],
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class ConvNormLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 filter_size,
			
 
				+                 stride,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 lr=1.0,
			
 
				+                 dcn_v2=False):
			
 
				+        super(ConvNormLayer, self).__init__()
			
 
				+        assert norm_type in ['bn', 'sync_bn']
			
 
				+        self.norm_type = norm_type
			
 
				+        self.act = act
			
 
				+        self.dcn_v2 = dcn_v2
			
 
				+
			
 
				+        if not self.dcn_v2:
			
 
				+            self.conv = nn.Conv2D(
			
 
				+                in_channels=ch_in,
			
 
				+                out_channels=ch_out,
			
 
				+                kernel_size=filter_size,
			
 
				+                stride=stride,
			
 
				+                padding=(filter_size - 1) // 2,
			
 
				+                groups=groups,
			
 
				+                weight_attr=ParamAttr(learning_rate=lr),
			
 
				+                bias_attr=False)
			
 
				+        else:
			
 
				+            self.offset_channel = 2 * filter_size**2
			
 
				+            self.mask_channel = filter_size**2
			
 
				+
			
 
				+            self.conv_offset = nn.Conv2D(
			
 
				+                in_channels=ch_in,
			
 
				+                out_channels=3 * filter_size**2,
			
 
				+                kernel_size=filter_size,
			
 
				+                stride=stride,
			
 
				+                padding=(filter_size - 1) // 2,
			
 
				+                weight_attr=ParamAttr(initializer=Constant(0.)),
			
 
				+                bias_attr=ParamAttr(initializer=Constant(0.)))
			
 
				+            self.conv = DeformConv2D(
			
 
				+                in_channels=ch_in,
			
 
				+                out_channels=ch_out,
			
 
				+                kernel_size=filter_size,
			
 
				+                stride=stride,
			
 
				+                padding=(filter_size - 1) // 2,
			
 
				+                dilation=1,
			
 
				+                groups=groups,
			
 
				+                weight_attr=ParamAttr(learning_rate=lr),
			
 
				+                bias_attr=False)
			
 
				+
			
 
				+        norm_lr = 0. if freeze_norm else lr
			
 
				+        param_attr = ParamAttr(
			
 
				+            learning_rate=norm_lr,
			
 
				+            regularizer=L2Decay(norm_decay),
			
 
				+            trainable=False if freeze_norm else True)
			
 
				+        bias_attr = ParamAttr(
			
 
				+            learning_rate=norm_lr,
			
 
				+            regularizer=L2Decay(norm_decay),
			
 
				+            trainable=False if freeze_norm else True)
			
 
				+
			
 
				+        global_stats = True if freeze_norm else None
			
 
				+        if norm_type in ['sync_bn', 'bn']:
			
 
				+            self.norm = nn.BatchNorm2D(
			
 
				+                ch_out,
			
 
				+                weight_attr=param_attr,
			
 
				+                bias_attr=bias_attr,
			
 
				+                use_global_stats=global_stats)
			
 
				+        norm_params = self.norm.parameters()
			
 
				+
			
 
				+        if freeze_norm:
			
 
				+            for param in norm_params:
			
 
				+                param.stop_gradient = True
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if not self.dcn_v2:
			
 
				+            out = self.conv(inputs)
			
 
				+        else:
			
 
				+            offset_mask = self.conv_offset(inputs)
			
 
				+            offset, mask = paddle.split(
			
 
				+                offset_mask,
			
 
				+                num_or_sections=[self.offset_channel, self.mask_channel],
			
 
				+                axis=1)
			
 
				+            mask = F.sigmoid(mask)
			
 
				+            out = self.conv(inputs, offset, mask=mask)
			
 
				+
			
 
				+        if self.norm_type in ['bn', 'sync_bn']:
			
 
				+            out = self.norm(out)
			
 
				+        if self.act:
			
 
				+            out = getattr(F, self.act)(out)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class SELayer(nn.Layer):
			
 
				+    def __init__(self, ch, reduction_ratio=16):
			
 
				+        super(SELayer, self).__init__()
			
 
				+        self.pool = nn.AdaptiveAvgPool2D(1)
			
 
				+        stdv = 1.0 / math.sqrt(ch)
			
 
				+        c_ = ch // reduction_ratio
			
 
				+        self.squeeze = nn.Linear(
			
 
				+            ch,
			
 
				+            c_,
			
 
				+            weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=True)
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(c_)
			
 
				+        self.extract = nn.Linear(
			
 
				+            c_,
			
 
				+            ch,
			
 
				+            weight_attr=paddle.ParamAttr(initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=True)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        out = self.pool(inputs)
			
 
				+        out = paddle.squeeze(out, axis=[2, 3])
			
 
				+        out = self.squeeze(out)
			
 
				+        out = F.relu(out)
			
 
				+        out = self.extract(out)
			
 
				+        out = F.sigmoid(out)
			
 
				+        out = paddle.unsqueeze(out, axis=[2, 3])
			
 
				+        scale = out * inputs
			
 
				+        return scale
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+
			
 
				+    expansion = 1
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 stride,
			
 
				+                 shortcut,
			
 
				+                 variant='b',
			
 
				+                 groups=1,
			
 
				+                 base_width=64,
			
 
				+                 lr=1.0,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 dcn_v2=False,
			
 
				+                 std_senet=False):
			
 
				+        super(BasicBlock, self).__init__()
			
 
				+        assert groups == 1 and base_width == 64, 'BasicBlock only supports groups=1 and base_width=64'
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+        if not shortcut:
			
 
				+            if variant == 'd' and stride == 2:
			
 
				+                self.short = nn.Sequential()
			
 
				+                self.short.add_sublayer(
			
 
				+                    'pool',
			
 
				+                    nn.AvgPool2D(
			
 
				+                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
			
 
				+                self.short.add_sublayer(
			
 
				+                    'conv',
			
 
				+                    ConvNormLayer(
			
 
				+                        ch_in=ch_in,
			
 
				+                        ch_out=ch_out,
			
 
				+                        filter_size=1,
			
 
				+                        stride=1,
			
 
				+                        norm_type=norm_type,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        lr=lr))
			
 
				+            else:
			
 
				+                self.short = ConvNormLayer(
			
 
				+                    ch_in=ch_in,
			
 
				+                    ch_out=ch_out,
			
 
				+                    filter_size=1,
			
 
				+                    stride=stride,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    lr=lr)
			
 
				+
			
 
				+        self.branch2a = ConvNormLayer(
			
 
				+            ch_in=ch_in,
			
 
				+            ch_out=ch_out,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            lr=lr)
			
 
				+
			
 
				+        self.branch2b = ConvNormLayer(
			
 
				+            ch_in=ch_out,
			
 
				+            ch_out=ch_out,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act=None,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            lr=lr,
			
 
				+            dcn_v2=dcn_v2)
			
 
				+
			
 
				+        self.std_senet = std_senet
			
 
				+        if self.std_senet:
			
 
				+            self.se = SELayer(ch_out)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        out = self.branch2a(inputs)
			
 
				+        out = self.branch2b(out)
			
 
				+        if self.std_senet:
			
 
				+            out = self.se(out)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+
			
 
				+        out = paddle.add(x=out, y=short)
			
 
				+        out = F.relu(out)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class BottleNeck(nn.Layer):
			
 
				+
			
 
				+    expansion = 4
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 stride,
			
 
				+                 shortcut,
			
 
				+                 variant='b',
			
 
				+                 groups=1,
			
 
				+                 base_width=4,
			
 
				+                 lr=1.0,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 dcn_v2=False,
			
 
				+                 std_senet=False):
			
 
				+        super(BottleNeck, self).__init__()
			
 
				+        if variant == 'a':
			
 
				+            stride1, stride2 = stride, 1
			
 
				+        else:
			
 
				+            stride1, stride2 = 1, stride
			
 
				+
			
 
				+        # ResNeXt
			
 
				+        width = int(ch_out * (base_width / 64.)) * groups
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+        if not shortcut:
			
 
				+            if variant == 'd' and stride == 2:
			
 
				+                self.short = nn.Sequential()
			
 
				+                self.short.add_sublayer(
			
 
				+                    'pool',
			
 
				+                    nn.AvgPool2D(
			
 
				+                        kernel_size=2, stride=2, padding=0, ceil_mode=True))
			
 
				+                self.short.add_sublayer(
			
 
				+                    'conv',
			
 
				+                    ConvNormLayer(
			
 
				+                        ch_in=ch_in,
			
 
				+                        ch_out=ch_out * self.expansion,
			
 
				+                        filter_size=1,
			
 
				+                        stride=1,
			
 
				+                        norm_type=norm_type,
			
 
				+                        norm_decay=norm_decay,
			
 
				+                        freeze_norm=freeze_norm,
			
 
				+                        lr=lr))
			
 
				+            else:
			
 
				+                self.short = ConvNormLayer(
			
 
				+                    ch_in=ch_in,
			
 
				+                    ch_out=ch_out * self.expansion,
			
 
				+                    filter_size=1,
			
 
				+                    stride=stride,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    lr=lr)
			
 
				+
			
 
				+        self.branch2a = ConvNormLayer(
			
 
				+            ch_in=ch_in,
			
 
				+            ch_out=width,
			
 
				+            filter_size=1,
			
 
				+            stride=stride1,
			
 
				+            groups=1,
			
 
				+            act='relu',
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            lr=lr)
			
 
				+
			
 
				+        self.branch2b = ConvNormLayer(
			
 
				+            ch_in=width,
			
 
				+            ch_out=width,
			
 
				+            filter_size=3,
			
 
				+            stride=stride2,
			
 
				+            groups=groups,
			
 
				+            act='relu',
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            lr=lr,
			
 
				+            dcn_v2=dcn_v2)
			
 
				+
			
 
				+        self.branch2c = ConvNormLayer(
			
 
				+            ch_in=width,
			
 
				+            ch_out=ch_out * self.expansion,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            lr=lr)
			
 
				+
			
 
				+        self.std_senet = std_senet
			
 
				+        if self.std_senet:
			
 
				+            self.se = SELayer(ch_out * self.expansion)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+
			
 
				+        out = self.branch2a(inputs)
			
 
				+        out = self.branch2b(out)
			
 
				+        out = self.branch2c(out)
			
 
				+
			
 
				+        if self.std_senet:
			
 
				+            out = self.se(out)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+
			
 
				+        out = paddle.add(x=out, y=short)
			
 
				+        out = F.relu(out)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class Blocks(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 block,
			
 
				+                 ch_in,
			
 
				+                 ch_out,
			
 
				+                 count,
			
 
				+                 name_adapter,
			
 
				+                 stage_num,
			
 
				+                 variant='b',
			
 
				+                 groups=1,
			
 
				+                 base_width=64,
			
 
				+                 lr=1.0,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0.,
			
 
				+                 freeze_norm=True,
			
 
				+                 dcn_v2=False,
			
 
				+                 std_senet=False):
			
 
				+        super(Blocks, self).__init__()
			
 
				+
			
 
				+        self.blocks = []
			
 
				+        for i in range(count):
			
 
				+            conv_name = name_adapter.fix_layer_warp_name(stage_num, count, i)
			
 
				+            layer = self.add_sublayer(
			
 
				+                conv_name,
			
 
				+                block(
			
 
				+                    ch_in=ch_in,
			
 
				+                    ch_out=ch_out,
			
 
				+                    stride=2 if i == 0 and stage_num != 2 else 1,
			
 
				+                    shortcut=False if i == 0 else True,
			
 
				+                    variant=variant,
			
 
				+                    groups=groups,
			
 
				+                    base_width=base_width,
			
 
				+                    lr=lr,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    dcn_v2=dcn_v2,
			
 
				+                    std_senet=std_senet))
			
 
				+            self.blocks.append(layer)
			
 
				+            if i == 0:
			
 
				+                ch_in = ch_out * block.expansion
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        block_out = inputs
			
 
				+        for block in self.blocks:
			
 
				+            block_out = block(block_out)
			
 
				+        return block_out
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class ResNet(nn.Layer):
			
 
				+    __shared__ = ['norm_type']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 depth=50,
			
 
				+                 ch_in=64,
			
 
				+                 variant='b',
			
 
				+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
			
 
				+                 groups=1,
			
 
				+                 base_width=64,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0,
			
 
				+                 freeze_norm=True,
			
 
				+                 freeze_at=0,
			
 
				+                 return_idx=[0, 1, 2, 3],
			
 
				+                 dcn_v2_stages=[-1],
			
 
				+                 num_stages=4,
			
 
				+                 std_senet=False):
			
 
				+        """
			
 
				+        Residual Network, see https://arxiv.org/abs/1512.03385
			
 
				+
			
 
				+        Args:
			
 
				+            depth (int): ResNet depth, should be 18, 34, 50, 101, 152.
			
 
				+            ch_in (int): output channel of first stage, default 64
			
 
				+            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
			
 
				+            lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
			
 
				+                                 lower learning rate ratio is need for pretrained model
			
 
				+                                 got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
			
 
				+            groups (int): group convolution cardinality
			
 
				+            base_width (int): base width of each group convolution
			
 
				+            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
			
 
				+            norm_decay (float): weight decay for normalization layer weights
			
 
				+            freeze_norm (bool): freeze normalization layers
			
 
				+            freeze_at (int): freeze the backbone at which stage
			
 
				+            return_idx (list): index of the stages whose feature maps are returned
			
 
				+            dcn_v2_stages (list): index of stages who select deformable conv v2
			
 
				+            num_stages (int): total num of stages
			
 
				+            std_senet (bool): whether use senet, default True
			
 
				+        """
			
 
				+        super(ResNet, self).__init__()
			
 
				+        self._model_type = 'ResNet' if groups == 1 else 'ResNeXt'
			
 
				+        assert num_stages >= 1 and num_stages <= 4
			
 
				+        self.depth = depth
			
 
				+        self.variant = variant
			
 
				+        self.groups = groups
			
 
				+        self.base_width = base_width
			
 
				+        self.norm_type = norm_type
			
 
				+        self.norm_decay = norm_decay
			
 
				+        self.freeze_norm = freeze_norm
			
 
				+        self.freeze_at = freeze_at
			
 
				+        if isinstance(return_idx, Integral):
			
 
				+            return_idx = [return_idx]
			
 
				+        assert max(return_idx) < num_stages, \
			
 
				+            'the maximum return index must smaller than num_stages, ' \
			
 
				+            'but received maximum return index is {} and num_stages ' \
			
 
				+            'is {}'.format(max(return_idx), num_stages)
			
 
				+        self.return_idx = return_idx
			
 
				+        self.num_stages = num_stages
			
 
				+        assert len(lr_mult_list) == 4, \
			
 
				+            "lr_mult_list length must be 4 but got {}".format(len(lr_mult_list))
			
 
				+        if isinstance(dcn_v2_stages, Integral):
			
 
				+            dcn_v2_stages = [dcn_v2_stages]
			
 
				+        assert max(dcn_v2_stages) < num_stages
			
 
				+
			
 
				+        if isinstance(dcn_v2_stages, Integral):
			
 
				+            dcn_v2_stages = [dcn_v2_stages]
			
 
				+        assert max(dcn_v2_stages) < num_stages
			
 
				+        self.dcn_v2_stages = dcn_v2_stages
			
 
				+
			
 
				+        block_nums = ResNet_cfg[depth]
			
 
				+        na = NameAdapter(self)
			
 
				+
			
 
				+        conv1_name = na.fix_c1_stage_name()
			
 
				+        if variant in ['c', 'd']:
			
 
				+            conv_def = [
			
 
				+                [3, ch_in // 2, 3, 2, "conv1_1"],
			
 
				+                [ch_in // 2, ch_in // 2, 3, 1, "conv1_2"],
			
 
				+                [ch_in // 2, ch_in, 3, 1, "conv1_3"],
			
 
				+            ]
			
 
				+        else:
			
 
				+            conv_def = [[3, ch_in, 7, 2, conv1_name]]
			
 
				+        self.conv1 = nn.Sequential()
			
 
				+        for (c_in, c_out, k, s, _name) in conv_def:
			
 
				+            self.conv1.add_sublayer(
			
 
				+                _name,
			
 
				+                ConvNormLayer(
			
 
				+                    ch_in=c_in,
			
 
				+                    ch_out=c_out,
			
 
				+                    filter_size=k,
			
 
				+                    stride=s,
			
 
				+                    groups=1,
			
 
				+                    act='relu',
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    lr=1.0))
			
 
				+
			
 
				+        self.ch_in = ch_in
			
 
				+        ch_out_list = [64, 128, 256, 512]
			
 
				+        block = BottleNeck if depth >= 50 else BasicBlock
			
 
				+
			
 
				+        self._out_channels = [block.expansion * v for v in ch_out_list]
			
 
				+        self._out_strides = [4, 8, 16, 32]
			
 
				+
			
 
				+        self.res_layers = []
			
 
				+        for i in range(num_stages):
			
 
				+            lr_mult = lr_mult_list[i]
			
 
				+            stage_num = i + 2
			
 
				+            res_name = "res{}".format(stage_num)
			
 
				+            res_layer = self.add_sublayer(
			
 
				+                res_name,
			
 
				+                Blocks(
			
 
				+                    block,
			
 
				+                    self.ch_in,
			
 
				+                    ch_out_list[i],
			
 
				+                    count=block_nums[i],
			
 
				+                    name_adapter=na,
			
 
				+                    stage_num=stage_num,
			
 
				+                    variant=variant,
			
 
				+                    groups=groups,
			
 
				+                    base_width=base_width,
			
 
				+                    lr=lr_mult,
			
 
				+                    norm_type=norm_type,
			
 
				+                    norm_decay=norm_decay,
			
 
				+                    freeze_norm=freeze_norm,
			
 
				+                    dcn_v2=(i in self.dcn_v2_stages),
			
 
				+                    std_senet=std_senet))
			
 
				+            self.res_layers.append(res_layer)
			
 
				+            self.ch_in = self._out_channels[i]
			
 
				+
			
 
				+        if freeze_at >= 0:
			
 
				+            self._freeze_parameters(self.conv1)
			
 
				+            for i in range(min(freeze_at + 1, num_stages)):
			
 
				+                self._freeze_parameters(self.res_layers[i])
			
 
				+
			
 
				+    def _freeze_parameters(self, m):
			
 
				+        for p in m.parameters():
			
 
				+            p.stop_gradient = True
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [
			
 
				+            ShapeSpec(
			
 
				+                channels=self._out_channels[i], stride=self._out_strides[i])
			
 
				+            for i in self.return_idx
			
 
				+        ]
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs['image']
			
 
				+        conv1 = self.conv1(x)
			
 
				+        x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
			
 
				+        outs = []
			
 
				+        for idx, stage in enumerate(self.res_layers):
			
 
				+            x = stage(x)
			
 
				+            if idx in self.return_idx:
			
 
				+                outs.append(x)
			
 
				+        return outs
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class Res5Head(nn.Layer):
			
 
				+    def __init__(self, depth=50):
			
 
				+        super(Res5Head, self).__init__()
			
 
				+        feat_in, feat_out = [1024, 512]
			
 
				+        if depth < 50:
			
 
				+            feat_in = 256
			
 
				+        na = NameAdapter(self)
			
 
				+        block = BottleNeck if depth >= 50 else BasicBlock
			
 
				+        self.res5 = Blocks(
			
 
				+            block, feat_in, feat_out, count=3, name_adapter=na, stage_num=5)
			
 
				+        self.feat_out = feat_out if depth < 50 else feat_out * 4
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(
			
 
				+            channels=self.feat_out,
			
 
				+            stride=16, )]
			
 
				+
			
 
				+    def forward(self, roi_feat, stage=0):
			
 
				+        y = self.res5(roi_feat)
			
 
				+        return y
			
--- a/paddlers/models/ppdet/modeling/backbones/senet.py
+++ b/paddlers/models/ppdet/modeling/backbones/senet.py
@@ -0,0 +1,139 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from .resnet import ResNet, Blocks, BasicBlock, BottleNeck
			
 
				+
			
 
				+__all__ = ['SENet', 'SERes5Head']
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class SENet(ResNet):
			
 
				+    __shared__ = ['norm_type']
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 depth=50,
			
 
				+                 variant='b',
			
 
				+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0],
			
 
				+                 groups=1,
			
 
				+                 base_width=64,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0,
			
 
				+                 freeze_norm=True,
			
 
				+                 freeze_at=0,
			
 
				+                 return_idx=[0, 1, 2, 3],
			
 
				+                 dcn_v2_stages=[-1],
			
 
				+                 std_senet=True,
			
 
				+                 num_stages=4):
			
 
				+        """
			
 
				+        Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507
			
 
				+
			
 
				+        Args:
			
 
				+            depth (int): SENet depth, should be 50, 101, 152
			
 
				+            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
			
 
				+            lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5),
			
 
				+                                 lower learning rate ratio is need for pretrained model
			
 
				+                                 got using distillation(default as [1.0, 1.0, 1.0, 1.0]).
			
 
				+            groups (int): group convolution cardinality
			
 
				+            base_width (int): base width of each group convolution
			
 
				+            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
			
 
				+            norm_decay (float): weight decay for normalization layer weights
			
 
				+            freeze_norm (bool): freeze normalization layers
			
 
				+            freeze_at (int): freeze the backbone at which stage
			
 
				+            return_idx (list): index of the stages whose feature maps are returned
			
 
				+            dcn_v2_stages (list): index of stages who select deformable conv v2
			
 
				+            std_senet (bool): whether use senet, default True
			
 
				+            num_stages (int): total num of stages
			
 
				+        """
			
 
				+
			
 
				+        super(SENet, self).__init__(
			
 
				+            depth=depth,
			
 
				+            variant=variant,
			
 
				+            lr_mult_list=lr_mult_list,
			
 
				+            ch_in=128,
			
 
				+            groups=groups,
			
 
				+            base_width=base_width,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            freeze_at=freeze_at,
			
 
				+            return_idx=return_idx,
			
 
				+            dcn_v2_stages=dcn_v2_stages,
			
 
				+            std_senet=std_senet,
			
 
				+            num_stages=num_stages)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+class SERes5Head(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 depth=50,
			
 
				+                 variant='b',
			
 
				+                 lr_mult=1.0,
			
 
				+                 groups=1,
			
 
				+                 base_width=64,
			
 
				+                 norm_type='bn',
			
 
				+                 norm_decay=0,
			
 
				+                 dcn_v2=False,
			
 
				+                 freeze_norm=False,
			
 
				+                 std_senet=True):
			
 
				+        """
			
 
				+        SERes5Head layer
			
 
				+
			
 
				+        Args:
			
 
				+            depth (int): SENet depth, should be 50, 101, 152
			
 
				+            variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently
			
 
				+            lr_mult (list): learning rate ratio of SERes5Head, default as 1.0.
			
 
				+            groups (int): group convolution cardinality
			
 
				+            base_width (int): base width of each group convolution
			
 
				+            norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel'
			
 
				+            norm_decay (float): weight decay for normalization layer weights
			
 
				+            dcn_v2_stages (list): index of stages who select deformable conv v2
			
 
				+            std_senet (bool): whether use senet, default True
			
 
				+
			
 
				+        """
			
 
				+        super(SERes5Head, self).__init__()
			
 
				+        ch_out = 512
			
 
				+        ch_in = 256 if depth < 50 else 1024
			
 
				+        na = NameAdapter(self)
			
 
				+        block = BottleNeck if depth >= 50 else BasicBlock
			
 
				+        self.res5 = Blocks(
			
 
				+            block,
			
 
				+            ch_in,
			
 
				+            ch_out,
			
 
				+            count=3,
			
 
				+            name_adapter=na,
			
 
				+            stage_num=5,
			
 
				+            variant=variant,
			
 
				+            groups=groups,
			
 
				+            base_width=base_width,
			
 
				+            lr=lr_mult,
			
 
				+            norm_type=norm_type,
			
 
				+            norm_decay=norm_decay,
			
 
				+            freeze_norm=freeze_norm,
			
 
				+            dcn_v2=dcn_v2,
			
 
				+            std_senet=std_senet)
			
 
				+        self.ch_out = ch_out * block.expansion
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(
			
 
				+            channels=self.ch_out,
			
 
				+            stride=16, )]
			
 
				+
			
 
				+    def forward(self, roi_feat):
			
 
				+        y = self.res5(roi_feat)
			
 
				+        return y
			
--- a/paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
+++ b/paddlers/models/ppdet/modeling/backbones/shufflenet_v2.py
@@ -0,0 +1,251 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm2D
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+from paddle.regularizer import L2Decay
			
 
				+
			
 
				+from paddlers.models.ppdet.core.workspace import register, serializable
			
 
				+from numbers import Integral
			
 
				+from ..shape_spec import ShapeSpec
			
 
				+from paddlers.models.ppdet.modeling.ops import channel_shuffle
			
 
				+
			
 
				+__all__ = ['ShuffleNetV2']
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 groups=1,
			
 
				+                 act=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(initializer=KaimingNormal()),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self._batch_norm = BatchNorm2D(
			
 
				+            out_channels,
			
 
				+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+        if act == "hard_swish":
			
 
				+            act = 'hardswish'
			
 
				+        self.act = act
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        if self.act:
			
 
				+            y = getattr(F, self.act)(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class InvertedResidual(nn.Layer):
			
 
				+    def __init__(self, in_channels, out_channels, stride, act="relu"):
			
 
				+        super(InvertedResidual, self).__init__()
			
 
				+        self._conv_pw = ConvBNLayer(
			
 
				+            in_channels=in_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+        self._conv_dw = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=out_channels // 2,
			
 
				+            act=None)
			
 
				+        self._conv_linear = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x1, x2 = paddle.split(
			
 
				+            inputs,
			
 
				+            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
			
 
				+            axis=1)
			
 
				+        x2 = self._conv_pw(x2)
			
 
				+        x2 = self._conv_dw(x2)
			
 
				+        x2 = self._conv_linear(x2)
			
 
				+        out = paddle.concat([x1, x2], axis=1)
			
 
				+        return channel_shuffle(out, 2)
			
 
				+
			
 
				+
			
 
				+class InvertedResidualDS(nn.Layer):
			
 
				+    def __init__(self, in_channels, out_channels, stride, act="relu"):
			
 
				+        super(InvertedResidualDS, self).__init__()
			
 
				+
			
 
				+        # branch1
			
 
				+        self._conv_dw_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=in_channels,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=in_channels,
			
 
				+            act=None)
			
 
				+        self._conv_linear_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+        # branch2
			
 
				+        self._conv_pw_2 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+        self._conv_dw_2 = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=out_channels // 2,
			
 
				+            act=None)
			
 
				+        self._conv_linear_2 = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x1 = self._conv_dw_1(inputs)
			
 
				+        x1 = self._conv_linear_1(x1)
			
 
				+        x2 = self._conv_pw_2(inputs)
			
 
				+        x2 = self._conv_dw_2(x2)
			
 
				+        x2 = self._conv_linear_2(x2)
			
 
				+        out = paddle.concat([x1, x2], axis=1)
			
 
				+
			
 
				+        return channel_shuffle(out, 2)
			
 
				+
			
 
				+
			
 
				+@register
			
 
				+@serializable
			
 
				+class ShuffleNetV2(nn.Layer):
			
 
				+    def __init__(self, scale=1.0, act="relu", feature_maps=[5, 13, 17]):
			
 
				+        super(ShuffleNetV2, self).__init__()
			
 
				+        self.scale = scale
			
 
				+        if isinstance(feature_maps, Integral):
			
 
				+            feature_maps = [feature_maps]
			
 
				+        self.feature_maps = feature_maps
			
 
				+        stage_repeats = [4, 8, 4]
			
 
				+
			
 
				+        if scale == 0.25:
			
 
				+            stage_out_channels = [-1, 24, 24, 48, 96, 512]
			
 
				+        elif scale == 0.33:
			
 
				+            stage_out_channels = [-1, 24, 32, 64, 128, 512]
			
 
				+        elif scale == 0.5:
			
 
				+            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
			
 
				+        elif scale == 1.0:
			
 
				+            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
			
 
				+        elif scale == 1.5:
			
 
				+            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
			
 
				+        elif scale == 2.0:
			
 
				+            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
			
 
				+        else:
			
 
				+            raise NotImplementedError("This scale size:[" + str(scale) +
			
 
				+                                      "] is not implemented!")
			
 
				+
			
 
				+        self._out_channels = []
			
 
				+        self._feature_idx = 0
			
 
				+        # 1. conv1
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            in_channels=3,
			
 
				+            out_channels=stage_out_channels[1],
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            act=act)
			
 
				+        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+        self._feature_idx += 1
			
 
				+
			
 
				+        # 2. bottleneck sequences
			
 
				+        self._block_list = []
			
 
				+        for stage_id, num_repeat in enumerate(stage_repeats):
			
 
				+            for i in range(num_repeat):
			
 
				+                if i == 0:
			
 
				+                    block = self.add_sublayer(
			
 
				+                        name=str(stage_id + 2) + '_' + str(i + 1),
			
 
				+                        sublayer=InvertedResidualDS(
			
 
				+                            in_channels=stage_out_channels[stage_id + 1],
			
 
				+                            out_channels=stage_out_channels[stage_id + 2],
			
 
				+                            stride=2,
			
 
				+                            act=act))
			
 
				+                else:
			
 
				+                    block = self.add_sublayer(
			
 
				+                        name=str(stage_id + 2) + '_' + str(i + 1),
			
 
				+                        sublayer=InvertedResidual(
			
 
				+                            in_channels=stage_out_channels[stage_id + 2],
			
 
				+                            out_channels=stage_out_channels[stage_id + 2],
			
 
				+                            stride=1,
			
 
				+                            act=act))
			
 
				+                self._block_list.append(block)
			
 
				+                self._feature_idx += 1
			
 
				+                self._update_out_channels(stage_out_channels[stage_id + 2],
			
 
				+                                          self._feature_idx, self.feature_maps)
			
 
				+
			
 
				+    def _update_out_channels(self, channel, feature_idx, feature_maps):
			
 
				+        if feature_idx in feature_maps:
			
 
				+            self._out_channels.append(channel)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv1(inputs['image'])
			
 
				+        y = self._max_pool(y)
			
 
				+        outs = []
			
 
				+        for i, inv in enumerate(self._block_list):
			
 
				+            y = inv(y)
			
 
				+            if i + 2 in self.feature_maps:
			
 
				+                outs.append(y)
			
 
				+
			
 
				+        return outs
			
 
				+
			
 
				+    @property
			
 
				+    def out_shape(self):
			
 
				+        return [ShapeSpec(channels=c) for c in self._out_channels]