пре 3 година · 14863747a7
--- a/paddlers/datasets/__init__.py
+++ b/paddlers/datasets/__init__.py
@@ -15,4 +15,5 @@
 
				 from .voc import VOCDetection
			
 
				 from .seg_dataset import SegDataset
			
 
				 from .cd_dataset import CDDataset
			
 
				+from .clas_dataset import ClasDataset
			
 
				 from .raster import Raster
			
--- a/paddlers/datasets/clas_dataset.py
+++ b/paddlers/datasets/clas_dataset.py
@@ -0,0 +1,90 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os.path as osp
			
 
				+import copy
			
 
				+
			
 
				+from paddle.io import Dataset
			
 
				+from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
			
 
				+
			
 
				+
			
 
				+class ClasDataset(Dataset):
			
 
				+    """读取图像分类任务数据集，并对样本进行相应的处理。
			
 
				+
			
 
				+    Args:
			
 
				+        data_dir (str): 数据集所在的目录路径。
			
 
				+        file_list (str): 描述数据集图片文件和对应标注序号（文本内每行路径为相对data_dir的相对路）。
			
 
				+        label_list (str): 描述数据集包含的类别信息文件路径。默认值为None。
			
 
				+        transforms (paddlers.transforms): 数据集中每个样本的预处理/增强算子。
			
 
				+        num_workers (int|str): 数据集中样本在预处理过程中的线程或进程数。默认为'auto'。
			
 
				+        shuffle (bool): 是否需要对数据集中样本打乱顺序。默认为False。
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 data_dir,
			
 
				+                 file_list,
			
 
				+                 label_list=None,
			
 
				+                 transforms=None,
			
 
				+                 num_workers='auto',
			
 
				+                 shuffle=False):
			
 
				+        super(ClasDataset, self).__init__()
			
 
				+        self.transforms = copy.deepcopy(transforms)
			
 
				+        # TODO batch padding
			
 
				+        self.batch_transforms = None
			
 
				+        self.num_workers = get_num_workers(num_workers)
			
 
				+        self.shuffle = shuffle
			
 
				+        self.file_list = list()
			
 
				+        self.labels = list()
			
 
				+
			
 
				+        # TODO：非None时，让用户跳转数据集分析生成label_list
			
 
				+        # 不要在此处分析label file
			
 
				+        if label_list is not None:
			
 
				+            with open(label_list, encoding=get_encoding(label_list)) as f:
			
 
				+                for line in f:
			
 
				+                    item = line.strip()
			
 
				+                    self.labels.append(item)
			
 
				+        with open(file_list, encoding=get_encoding(file_list)) as f:
			
 
				+            for line in f:
			
 
				+                items = line.strip().split()
			
 
				+                if len(items) > 2:
			
 
				+                    raise Exception(
			
 
				+                        "A space is defined as the delimiter to separate the image and label path, " \
			
 
				+                        "so the space cannot be in the image or label path, but the line[{}] of " \
			
 
				+                        " file_list[{}] has a space in the image or label path.".format(line, file_list))
			
 
				+                items[0] = path_normalization(items[0])
			
 
				+                if not is_pic(items[0]):
			
 
				+                    continue
			
 
				+                full_path_im = osp.join(data_dir, items[0])
			
 
				+                label = items[1]
			
 
				+                if not osp.exists(full_path_im):
			
 
				+                    raise IOError('Image file {} does not exist!'.format(
			
 
				+                        full_path_im))
			
 
				+                if not label.isdigit():
			
 
				+                    raise ValueError('Label {} does not convert to number(int)!'.format(
			
 
				+                        label))
			
 
				+                self.file_list.append({
			
 
				+                    'image': full_path_im,
			
 
				+                    'label': int(label)
			
 
				+                })
			
 
				+        self.num_samples = len(self.file_list)
			
 
				+        logging.info("{} samples in file {}".format(
			
 
				+            len(self.file_list), file_list))
			
 
				+
			
 
				+    def __getitem__(self, idx):
			
 
				+        sample = copy.deepcopy(self.file_list[idx])
			
 
				+        outputs = self.transforms(sample)
			
 
				+        return outputs
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return len(self.file_list)
			
--- a/paddlers/models/ppcls/__init__.py
+++ b/paddlers/models/ppcls/__init__.py
@@ -1,13 +1,24 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

			
 
				-#

			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");

			
 
				-# you may not use this file except in compliance with the License.

			
 
				-# You may obtain a copy of the License at

			
 
				-#

			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0

			
 
				-#

			
 
				-# Unless required by applicable law or agreed to in writing, software

			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,

			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				-# See the License for the specific language governing permissions and

			
 
				-# limitations under the License.
			
 
				+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# TODO: add ppcls module
			
 
				+import sys
			
 
				+sys.path.append("paddlers/models")
			
 
				+
			
 
				+from . import optimizer
			
 
				+
			
 
				+from .arch import *
			
 
				+from .optimizer import *
			
 
				+from .data import *
			
 
				+from .utils import *
			
--- a/paddlers/models/ppcls/arch/__init__.py
+++ b/paddlers/models/ppcls/arch/__init__.py
@@ -0,0 +1,134 @@
 
				+#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+#Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+#you may not use this file except in compliance with the License.
			
 
				+#You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+#Unless required by applicable law or agreed to in writing, software
			
 
				+#distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#See the License for the specific language governing permissions and
			
 
				+#limitations under the License.
			
 
				+
			
 
				+import copy
			
 
				+import importlib
			
 
				+
			
 
				+import paddle.nn as nn
			
 
				+from paddle.jit import to_static
			
 
				+from paddle.static import InputSpec
			
 
				+
			
 
				+from . import backbone, gears
			
 
				+from .backbone import *
			
 
				+from .gears import build_gear
			
 
				+from .utils import *
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
			
 
				+from ppcls.utils import logger
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain
			
 
				+from ppcls.arch.slim import prune_model, quantize_model
			
 
				+
			
 
				+__all__ = ["build_model", "RecModel", "DistillationModel"]
			
 
				+
			
 
				+
			
 
				+def build_model(config):
			
 
				+    arch_config = copy.deepcopy(config["Arch"])
			
 
				+    model_type = arch_config.pop("name")
			
 
				+    mod = importlib.import_module(__name__)
			
 
				+    arch = getattr(mod, model_type)(**arch_config)
			
 
				+    if isinstance(arch, TheseusLayer):
			
 
				+        prune_model(config, arch)
			
 
				+        quantize_model(config, arch)
			
 
				+    return arch
			
 
				+
			
 
				+
			
 
				+def apply_to_static(config, model):
			
 
				+    support_to_static = config['Global'].get('to_static', False)
			
 
				+
			
 
				+    if support_to_static:
			
 
				+        specs = None
			
 
				+        if 'image_shape' in config['Global']:
			
 
				+            specs = [InputSpec([None] + config['Global']['image_shape'])]
			
 
				+        model = to_static(model, input_spec=specs)
			
 
				+        logger.info("Successfully to apply @to_static with specs: {}".format(
			
 
				+            specs))
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+class RecModel(TheseusLayer):
			
 
				+    def __init__(self, **config):
			
 
				+        super().__init__()
			
 
				+        backbone_config = config["Backbone"]
			
 
				+        backbone_name = backbone_config.pop("name")
			
 
				+        self.backbone = eval(backbone_name)(**backbone_config)
			
 
				+        if "BackboneStopLayer" in config:
			
 
				+            backbone_stop_layer = config["BackboneStopLayer"]["name"]
			
 
				+            self.backbone.stop_after(backbone_stop_layer)
			
 
				+
			
 
				+        if "Neck" in config:
			
 
				+            self.neck = build_gear(config["Neck"])
			
 
				+        else:
			
 
				+            self.neck = None
			
 
				+
			
 
				+        if "Head" in config:
			
 
				+            self.head = build_gear(config["Head"])
			
 
				+        else:
			
 
				+            self.head = None
			
 
				+
			
 
				+    def forward(self, x, label=None):
			
 
				+        out = dict()
			
 
				+        x = self.backbone(x)
			
 
				+        out["backbone"] = x
			
 
				+        if self.neck is not None:
			
 
				+            x = self.neck(x)
			
 
				+            out["neck"] = x
			
 
				+        out["features"] = x
			
 
				+        if self.head is not None:
			
 
				+            y = self.head(x, label)
			
 
				+            out["logits"] = y
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class DistillationModel(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 models=None,
			
 
				+                 pretrained_list=None,
			
 
				+                 freeze_params_list=None,
			
 
				+                 **kargs):
			
 
				+        super().__init__()
			
 
				+        assert isinstance(models, list)
			
 
				+        self.model_list = []
			
 
				+        self.model_name_list = []
			
 
				+        if pretrained_list is not None:
			
 
				+            assert len(pretrained_list) == len(models)
			
 
				+
			
 
				+        if freeze_params_list is None:
			
 
				+            freeze_params_list = [False] * len(models)
			
 
				+        assert len(freeze_params_list) == len(models)
			
 
				+        for idx, model_config in enumerate(models):
			
 
				+            assert len(model_config) == 1
			
 
				+            key = list(model_config.keys())[0]
			
 
				+            model_config = model_config[key]
			
 
				+            model_name = model_config.pop("name")
			
 
				+            model = eval(model_name)(**model_config)
			
 
				+
			
 
				+            if freeze_params_list[idx]:
			
 
				+                for param in model.parameters():
			
 
				+                    param.trainable = False
			
 
				+            self.model_list.append(self.add_sublayer(key, model))
			
 
				+            self.model_name_list.append(key)
			
 
				+
			
 
				+        if pretrained_list is not None:
			
 
				+            for idx, pretrained in enumerate(pretrained_list):
			
 
				+                if pretrained is not None:
			
 
				+                    load_dygraph_pretrain(
			
 
				+                        self.model_name_list[idx], path=pretrained)
			
 
				+
			
 
				+    def forward(self, x, label=None):
			
 
				+        result_dict = dict()
			
 
				+        for idx, model_name in enumerate(self.model_name_list):
			
 
				+            if label is None:
			
 
				+                result_dict[model_name] = self.model_list[idx](x)
			
 
				+            else:
			
 
				+                result_dict[model_name] = self.model_list[idx](x, label)
			
 
				+        return result_dict
			
--- a/paddlers/models/ppcls/arch/backbone/__init__.py
+++ b/paddlers/models/ppcls/arch/backbone/__init__.py
@@ -0,0 +1,82 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import sys
			
 
				+import inspect
			
 
				+
			
 
				+from ppcls.arch.backbone.legendary_models.mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1
			
 
				+from ppcls.arch.backbone.legendary_models.mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25
			
 
				+from ppcls.arch.backbone.legendary_models.resnet import ResNet18, ResNet18_vd, ResNet34, ResNet34_vd, ResNet50, ResNet50_vd, ResNet101, ResNet101_vd, ResNet152, ResNet152_vd, ResNet200_vd
			
 
				+from ppcls.arch.backbone.legendary_models.vgg import VGG11, VGG13, VGG16, VGG19
			
 
				+from ppcls.arch.backbone.legendary_models.inception_v3 import InceptionV3
			
 
				+from ppcls.arch.backbone.legendary_models.hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W64_C
			
 
				+from ppcls.arch.backbone.legendary_models.pp_lcnet import PPLCNet_x0_25, PPLCNet_x0_35, PPLCNet_x0_5, PPLCNet_x0_75, PPLCNet_x1_0, PPLCNet_x1_5, PPLCNet_x2_0, PPLCNet_x2_5
			
 
				+from ppcls.arch.backbone.legendary_models.esnet import ESNet_x0_25, ESNet_x0_5, ESNet_x0_75, ESNet_x1_0
			
 
				+
			
 
				+from ppcls.arch.backbone.model_zoo.resnet_vc import ResNet50_vc
			
 
				+from ppcls.arch.backbone.model_zoo.resnext import ResNeXt50_32x4d, ResNeXt50_64x4d, ResNeXt101_32x4d, ResNeXt101_64x4d, ResNeXt152_32x4d, ResNeXt152_64x4d
			
 
				+from ppcls.arch.backbone.model_zoo.resnext_vd import ResNeXt50_vd_32x4d, ResNeXt50_vd_64x4d, ResNeXt101_vd_32x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_32x4d, ResNeXt152_vd_64x4d
			
 
				+from ppcls.arch.backbone.model_zoo.res2net import Res2Net50_26w_4s, Res2Net50_14w_8s
			
 
				+from ppcls.arch.backbone.model_zoo.res2net_vd import Res2Net50_vd_26w_4s, Res2Net101_vd_26w_4s, Res2Net200_vd_26w_4s
			
 
				+from ppcls.arch.backbone.model_zoo.se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd
			
 
				+from ppcls.arch.backbone.model_zoo.se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt50_vd_32x4d, SENet154_vd
			
 
				+from ppcls.arch.backbone.model_zoo.se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_64x4d
			
 
				+from ppcls.arch.backbone.model_zoo.dpn import DPN68, DPN92, DPN98, DPN107, DPN131
			
 
				+from ppcls.arch.backbone.model_zoo.densenet import DenseNet121, DenseNet161, DenseNet169, DenseNet201, DenseNet264
			
 
				+from ppcls.arch.backbone.model_zoo.efficientnet import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7, EfficientNetB0_small
			
 
				+from ppcls.arch.backbone.model_zoo.resnest import ResNeSt50_fast_1s1x64d, ResNeSt50, ResNeSt101
			
 
				+from ppcls.arch.backbone.model_zoo.googlenet import GoogLeNet
			
 
				+from ppcls.arch.backbone.model_zoo.mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0
			
 
				+from ppcls.arch.backbone.model_zoo.shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish
			
 
				+from ppcls.arch.backbone.model_zoo.ghostnet import GhostNet_x0_5, GhostNet_x1_0, GhostNet_x1_3
			
 
				+from ppcls.arch.backbone.model_zoo.alexnet import AlexNet
			
 
				+from ppcls.arch.backbone.model_zoo.inception_v4 import InceptionV4
			
 
				+from ppcls.arch.backbone.model_zoo.xception import Xception41, Xception65, Xception71
			
 
				+from ppcls.arch.backbone.model_zoo.xception_deeplab import Xception41_deeplab, Xception65_deeplab
			
 
				+from ppcls.arch.backbone.model_zoo.resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl
			
 
				+from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1
			
 
				+from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
			
 
				+from ppcls.arch.backbone.model_zoo.regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF
			
 
				+from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384
			
 
				+from ppcls.arch.backbone.model_zoo.distilled_vision_transformer import DeiT_tiny_patch16_224, DeiT_small_patch16_224, DeiT_base_patch16_224, DeiT_tiny_distilled_patch16_224, DeiT_small_distilled_patch16_224, DeiT_base_distilled_patch16_224, DeiT_base_patch16_384, DeiT_base_distilled_patch16_384
			
 
				+from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
			
 
				+from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
			
 
				+from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0
			
 
				+from ppcls.arch.backbone.model_zoo.gvt import pcpvt_small, pcpvt_base, pcpvt_large, alt_gvt_small, alt_gvt_base, alt_gvt_large
			
 
				+from ppcls.arch.backbone.model_zoo.levit import LeViT_128S, LeViT_128, LeViT_192, LeViT_256, LeViT_384
			
 
				+from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169
			
 
				+from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152
			
 
				+from ppcls.arch.backbone.model_zoo.tnt import TNT_small
			
 
				+from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds
			
 
				+from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53
			
 
				+from ppcls.arch.backbone.model_zoo.pvt_v2 import PVT_V2_B0, PVT_V2_B1, PVT_V2_B2_Linear, PVT_V2_B2, PVT_V2_B3, PVT_V2_B4, PVT_V2_B5
			
 
				+from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
			
 
				+from ppcls.arch.backbone.variant_models.vgg_variant import VGG19Sigmoid
			
 
				+from ppcls.arch.backbone.variant_models.pp_lcnet_variant import PPLCNet_x2_5_Tanh
			
 
				+
			
 
				+
			
 
				+# help whl get all the models' api (class type) and components' api (func type)
			
 
				+def get_apis():
			
 
				+    current_func = sys._getframe().f_code.co_name
			
 
				+    current_module = sys.modules[__name__]
			
 
				+    api = []
			
 
				+    for _, obj in inspect.getmembers(current_module,
			
 
				+                                     inspect.isclass) + inspect.getmembers(
			
 
				+                                         current_module, inspect.isfunction):
			
 
				+        api.append(obj.__name__)
			
 
				+    api.remove(current_func)
			
 
				+    return api
			
 
				+
			
 
				+
			
 
				+__all__ = get_apis()
			
--- a/paddlers/models/ppcls/arch/backbone/base/__init__.py
+++ b/paddlers/models/ppcls/arch/backbone/base/__init__.py
--- a/paddlers/models/ppcls/arch/backbone/base/theseus_layer.py
+++ b/paddlers/models/ppcls/arch/backbone/base/theseus_layer.py
@@ -0,0 +1,301 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from typing import Tuple, List, Dict, Union, Callable, Any
			
 
				+
			
 
				+from paddle import nn
			
 
				+from ppcls.utils import logger
			
 
				+
			
 
				+
			
 
				+class Identity(nn.Layer):
			
 
				+    def __init__(self):
			
 
				+        super(Identity, self).__init__()
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        return inputs
			
 
				+
			
 
				+
			
 
				+class TheseusLayer(nn.Layer):
			
 
				+    def __init__(self, *args, **kwargs):
			
 
				+        super(TheseusLayer, self).__init__()
			
 
				+        self.res_dict = {}
			
 
				+        self.res_name = self.full_name()
			
 
				+        self.pruner = None
			
 
				+        self.quanter = None
			
 
				+
			
 
				+    def _return_dict_hook(self, layer, input, output):
			
 
				+        res_dict = {"output": output}
			
 
				+        # 'list' is needed to avoid error raised by popping self.res_dict
			
 
				+        for res_key in list(self.res_dict):
			
 
				+            # clear the res_dict because the forward process may change according to input
			
 
				+            res_dict[res_key] = self.res_dict.pop(res_key)
			
 
				+        return res_dict
			
 
				+
			
 
				+    def init_res(self,
			
 
				+                 stages_pattern,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        if return_patterns and return_stages:
			
 
				+            msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
			
 
				+            logger.warning(msg)
			
 
				+            return_stages = None
			
 
				+
			
 
				+        if return_stages is True:
			
 
				+            return_patterns = stages_pattern
			
 
				+        # return_stages is int or bool
			
 
				+        if type(return_stages) is int:
			
 
				+            return_stages = [return_stages]
			
 
				+        if isinstance(return_stages, list):
			
 
				+            if max(return_stages) > len(stages_pattern) or min(
			
 
				+                    return_stages) < 0:
			
 
				+                msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
			
 
				+                logger.warning(msg)
			
 
				+                return_stages = [
			
 
				+                    val for val in return_stages
			
 
				+                    if val >= 0 and val < len(stages_pattern)
			
 
				+                ]
			
 
				+            return_patterns = [stages_pattern[i] for i in return_stages]
			
 
				+
			
 
				+        if return_patterns:
			
 
				+            self.update_res(return_patterns)
			
 
				+
			
 
				+    def replace_sub(self, *args, **kwargs) -> None:
			
 
				+        msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead."
			
 
				+        logger.error(DeprecationWarning(msg))
			
 
				+        raise DeprecationWarning(msg)
			
 
				+
			
 
				+    def upgrade_sublayer(self,
			
 
				+                         layer_name_pattern: Union[str, List[str]],
			
 
				+                         handle_func: Callable[[nn.Layer, str], nn.Layer]
			
 
				+                         ) -> Dict[str, nn.Layer]:
			
 
				+        """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'.
			
 
				+
			
 
				+        Args:
			
 
				+            layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'.
			
 
				+            handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed.
			
 
				+
			
 
				+        Returns:
			
 
				+            Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'.
			
 
				+
			
 
				+        Examples:
			
 
				+
			
 
				+            from paddle import nn
			
 
				+            import paddleclas
			
 
				+
			
 
				+            def rep_func(layer: nn.Layer, pattern: str):
			
 
				+                new_layer = nn.Conv2D(
			
 
				+                    in_channels=layer._in_channels,
			
 
				+                    out_channels=layer._out_channels,
			
 
				+                    kernel_size=5,
			
 
				+                    padding=2
			
 
				+                )
			
 
				+                return new_layer
			
 
				+
			
 
				+            net = paddleclas.MobileNetV1()
			
 
				+            res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func)
			
 
				+            print(res)
			
 
				+            # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer}
			
 
				+        """
			
 
				+
			
 
				+        if not isinstance(layer_name_pattern, list):
			
 
				+            layer_name_pattern = [layer_name_pattern]
			
 
				+
			
 
				+        hit_layer_pattern_list = []
			
 
				+        for pattern in layer_name_pattern:
			
 
				+            # parse pattern to find target layer and its parent
			
 
				+            layer_list = parse_pattern_str(pattern=pattern, parent_layer=self)
			
 
				+            if not layer_list:
			
 
				+                continue
			
 
				+            sub_layer_parent = layer_list[-2]["layer"] if len(
			
 
				+                layer_list) > 1 else self
			
 
				+
			
 
				+            sub_layer = layer_list[-1]["layer"]
			
 
				+            sub_layer_name = layer_list[-1]["name"]
			
 
				+            sub_layer_index = layer_list[-1]["index"]
			
 
				+
			
 
				+            new_sub_layer = handle_func(sub_layer, pattern)
			
 
				+
			
 
				+            if sub_layer_index:
			
 
				+                getattr(sub_layer_parent,
			
 
				+                        sub_layer_name)[sub_layer_index] = new_sub_layer
			
 
				+            else:
			
 
				+                setattr(sub_layer_parent, sub_layer_name, new_sub_layer)
			
 
				+
			
 
				+            hit_layer_pattern_list.append(pattern)
			
 
				+        return hit_layer_pattern_list
			
 
				+
			
 
				+    def stop_after(self, stop_layer_name: str) -> bool:
			
 
				+        """stop forward and backward after 'stop_layer_name'.
			
 
				+
			
 
				+        Args:
			
 
				+            stop_layer_name (str): The name of layer that stop forward and backward after this layer.
			
 
				+
			
 
				+        Returns:
			
 
				+            bool: 'True' if successful, 'False' otherwise.
			
 
				+        """
			
 
				+
			
 
				+        layer_list = parse_pattern_str(stop_layer_name, self)
			
 
				+        if not layer_list:
			
 
				+            return False
			
 
				+
			
 
				+        parent_layer = self
			
 
				+        for layer_dict in layer_list:
			
 
				+            name, index = layer_dict["name"], layer_dict["index"]
			
 
				+            if not set_identity(parent_layer, name, index):
			
 
				+                msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'."
			
 
				+                logger.warning(msg)
			
 
				+                return False
			
 
				+            parent_layer = layer_dict["layer"]
			
 
				+
			
 
				+        return True
			
 
				+
			
 
				+    def update_res(
			
 
				+            self,
			
 
				+            return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]:
			
 
				+        """update the result(s) to be returned.
			
 
				+
			
 
				+        Args:
			
 
				+            return_patterns (Union[str, List[str]]): The name of layer to return output.
			
 
				+
			
 
				+        Returns:
			
 
				+            Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully.
			
 
				+        """
			
 
				+
			
 
				+        # clear res_dict that could have been set
			
 
				+        self.res_dict = {}
			
 
				+
			
 
				+        class Handler(object):
			
 
				+            def __init__(self, res_dict):
			
 
				+                # res_dict is a reference
			
 
				+                self.res_dict = res_dict
			
 
				+
			
 
				+            def __call__(self, layer, pattern):
			
 
				+                layer.res_dict = self.res_dict
			
 
				+                layer.res_name = pattern
			
 
				+                if hasattr(layer, "hook_remove_helper"):
			
 
				+                    layer.hook_remove_helper.remove()
			
 
				+                layer.hook_remove_helper = layer.register_forward_post_hook(
			
 
				+                    save_sub_res_hook)
			
 
				+                return layer
			
 
				+
			
 
				+        handle_func = Handler(self.res_dict)
			
 
				+
			
 
				+        hit_layer_pattern_list = self.upgrade_sublayer(
			
 
				+            return_patterns, handle_func=handle_func)
			
 
				+
			
 
				+        if hasattr(self, "hook_remove_helper"):
			
 
				+            self.hook_remove_helper.remove()
			
 
				+        self.hook_remove_helper = self.register_forward_post_hook(
			
 
				+            self._return_dict_hook)
			
 
				+
			
 
				+        return hit_layer_pattern_list
			
 
				+
			
 
				+
			
 
				+def save_sub_res_hook(layer, input, output):
			
 
				+    layer.res_dict[layer.res_name] = output
			
 
				+
			
 
				+
			
 
				+def set_identity(parent_layer: nn.Layer,
			
 
				+                 layer_name: str,
			
 
				+                 layer_index: str=None) -> bool:
			
 
				+    """set the layer specified by layer_name and layer_index to Indentity.
			
 
				+
			
 
				+    Args:
			
 
				+        parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index.
			
 
				+        layer_name (str): The name of target layer to be set to Indentity.
			
 
				+        layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None.
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: True if successfully, False otherwise.
			
 
				+    """
			
 
				+
			
 
				+    stop_after = False
			
 
				+    for sub_layer_name in parent_layer._sub_layers:
			
 
				+        if stop_after:
			
 
				+            parent_layer._sub_layers[sub_layer_name] = Identity()
			
 
				+            continue
			
 
				+        if sub_layer_name == layer_name:
			
 
				+            stop_after = True
			
 
				+
			
 
				+    if layer_index and stop_after:
			
 
				+        stop_after = False
			
 
				+        for sub_layer_index in parent_layer._sub_layers[
			
 
				+                layer_name]._sub_layers:
			
 
				+            if stop_after:
			
 
				+                parent_layer._sub_layers[layer_name][
			
 
				+                    sub_layer_index] = Identity()
			
 
				+                continue
			
 
				+            if layer_index == sub_layer_index:
			
 
				+                stop_after = True
			
 
				+
			
 
				+    return stop_after
			
 
				+
			
 
				+
			
 
				+def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[
			
 
				+        None, List[Dict[str, Union[nn.Layer, str, None]]]]:
			
 
				+    """parse the string type pattern.
			
 
				+
			
 
				+    Args:
			
 
				+        pattern (str): The pattern to discribe layer.
			
 
				+        parent_layer (nn.Layer): The root layer relative to the pattern.
			
 
				+
			
 
				+    Returns:
			
 
				+        Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order:
			
 
				+                                                                [
			
 
				+                                                                    {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist},
			
 
				+                                                                    {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist},
			
 
				+                                                                    ...
			
 
				+                                                                ]
			
 
				+    """
			
 
				+
			
 
				+    pattern_list = pattern.split(".")
			
 
				+    if not pattern_list:
			
 
				+        msg = f"The pattern('{pattern}') is illegal. Please check and retry."
			
 
				+        logger.warning(msg)
			
 
				+        return None
			
 
				+
			
 
				+    layer_list = []
			
 
				+    while len(pattern_list) > 0:
			
 
				+        if '[' in pattern_list[0]:
			
 
				+            target_layer_name = pattern_list[0].split('[')[0]
			
 
				+            target_layer_index = pattern_list[0].split('[')[1].split(']')[0]
			
 
				+        else:
			
 
				+            target_layer_name = pattern_list[0]
			
 
				+            target_layer_index = None
			
 
				+
			
 
				+        target_layer = getattr(parent_layer, target_layer_name, None)
			
 
				+
			
 
				+        if target_layer is None:
			
 
				+            msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')."
			
 
				+            logger.warning(msg)
			
 
				+            return None
			
 
				+
			
 
				+        if target_layer_index and target_layer:
			
 
				+            if int(target_layer_index) < 0 or int(target_layer_index) >= len(
			
 
				+                    target_layer):
			
 
				+                msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0."
			
 
				+                logger.warning(msg)
			
 
				+                return None
			
 
				+
			
 
				+            target_layer = target_layer[target_layer_index]
			
 
				+
			
 
				+        layer_list.append({
			
 
				+            "layer": target_layer,
			
 
				+            "name": target_layer_name,
			
 
				+            "index": target_layer_index
			
 
				+        })
			
 
				+
			
 
				+        pattern_list = pattern_list[1:]
			
 
				+        parent_layer = target_layer
			
 
				+    return layer_list
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/__init__.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/__init__.py
@@ -0,0 +1,6 @@
 
				+from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd
			
 
				+from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W64_C
			
 
				+from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1
			
 
				+from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25
			
 
				+from .inception_v3 import InceptionV3
			
 
				+from .vgg import VGG11, VGG13, VGG16, VGG19
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/esnet.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/esnet.py
@@ -0,0 +1,369 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+import math
			
 
				+import paddle
			
 
				+from paddle import ParamAttr, reshape, transpose, concat, split
			
 
				+import paddle.nn as nn
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+from paddle.regularizer import L2Decay
			
 
				+
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ESNet_x0_25":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_25_pretrained.pdparams",
			
 
				+    "ESNet_x0_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_5_pretrained.pdparams",
			
 
				+    "ESNet_x0_75":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_75_pretrained.pdparams",
			
 
				+    "ESNet_x1_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x1_0_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+MODEL_STAGES_PATTERN = {"ESNet": ["blocks[2]", "blocks[9]", "blocks[12]"]}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+def channel_shuffle(x, groups):
			
 
				+    batch_size, num_channels, height, width = x.shape[0:4]
			
 
				+    channels_per_group = num_channels // groups
			
 
				+    x = reshape(
			
 
				+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
			
 
				+    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
			
 
				+    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
			
 
				+    return x
			
 
				+
			
 
				+
			
 
				+def make_divisible(v, divisor=8, min_value=None):
			
 
				+    if min_value is None:
			
 
				+        min_value = divisor
			
 
				+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
			
 
				+    if new_v < 0.9 * v:
			
 
				+        new_v += divisor
			
 
				+    return new_v
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 if_act=True):
			
 
				+        super().__init__()
			
 
				+        self.conv = Conv2D(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=(kernel_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(initializer=KaimingNormal()),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self.bn = BatchNorm(
			
 
				+            out_channels,
			
 
				+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+        self.if_act = if_act
			
 
				+        self.hardswish = nn.Hardswish()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        if self.if_act:
			
 
				+            x = self.hardswish(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SEModule(TheseusLayer):
			
 
				+    def __init__(self, channel, reduction=4):
			
 
				+        super().__init__()
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self.conv1 = Conv2D(
			
 
				+            in_channels=channel,
			
 
				+            out_channels=channel // reduction,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+        self.relu = nn.ReLU()
			
 
				+        self.conv2 = Conv2D(
			
 
				+            in_channels=channel // reduction,
			
 
				+            out_channels=channel,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+        self.hardsigmoid = nn.Hardsigmoid()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        identity = x
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.conv2(x)
			
 
				+        x = self.hardsigmoid(x)
			
 
				+        x = paddle.multiply(x=identity, y=x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ESBlock1(TheseusLayer):
			
 
				+    def __init__(self, in_channels, out_channels):
			
 
				+        super().__init__()
			
 
				+        self.pw_1_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1)
			
 
				+        self.dw_1 = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=3,
			
 
				+            stride=1,
			
 
				+            groups=out_channels // 2,
			
 
				+            if_act=False)
			
 
				+        self.se = SEModule(out_channels)
			
 
				+
			
 
				+        self.pw_1_2 = ConvBNLayer(
			
 
				+            in_channels=out_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x1, x2 = split(
			
 
				+            x, num_or_sections=[x.shape[1] // 2, x.shape[1] // 2], axis=1)
			
 
				+        x2 = self.pw_1_1(x2)
			
 
				+        x3 = self.dw_1(x2)
			
 
				+        x3 = concat([x2, x3], axis=1)
			
 
				+        x3 = self.se(x3)
			
 
				+        x3 = self.pw_1_2(x3)
			
 
				+        x = concat([x1, x3], axis=1)
			
 
				+        return channel_shuffle(x, 2)
			
 
				+
			
 
				+
			
 
				+class ESBlock2(TheseusLayer):
			
 
				+    def __init__(self, in_channels, out_channels):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        # branch1
			
 
				+        self.dw_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=in_channels,
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            groups=in_channels,
			
 
				+            if_act=False)
			
 
				+        self.pw_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1)
			
 
				+        # branch2
			
 
				+        self.pw_2_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1)
			
 
				+        self.dw_2 = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            groups=out_channels // 2,
			
 
				+            if_act=False)
			
 
				+        self.se = SEModule(out_channels // 2)
			
 
				+        self.pw_2_2 = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1)
			
 
				+        self.concat_dw = ConvBNLayer(
			
 
				+            in_channels=out_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=3,
			
 
				+            groups=out_channels)
			
 
				+        self.concat_pw = ConvBNLayer(
			
 
				+            in_channels=out_channels, out_channels=out_channels, kernel_size=1)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x1 = self.dw_1(x)
			
 
				+        x1 = self.pw_1(x1)
			
 
				+        x2 = self.pw_2_1(x)
			
 
				+        x2 = self.dw_2(x2)
			
 
				+        x2 = self.se(x2)
			
 
				+        x2 = self.pw_2_2(x2)
			
 
				+        x = concat([x1, x2], axis=1)
			
 
				+        x = self.concat_dw(x)
			
 
				+        x = self.concat_pw(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ESNet(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 stages_pattern,
			
 
				+                 class_num=1000,
			
 
				+                 scale=1.0,
			
 
				+                 dropout_prob=0.2,
			
 
				+                 class_expand=1280,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        super().__init__()
			
 
				+        self.scale = scale
			
 
				+        self.class_num = class_num
			
 
				+        self.class_expand = class_expand
			
 
				+        stage_repeats = [3, 7, 3]
			
 
				+        stage_out_channels = [
			
 
				+            -1, 24, make_divisible(116 * scale), make_divisible(232 * scale),
			
 
				+            make_divisible(464 * scale), 1024
			
 
				+        ]
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            in_channels=3,
			
 
				+            out_channels=stage_out_channels[1],
			
 
				+            kernel_size=3,
			
 
				+            stride=2)
			
 
				+        self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        block_list = []
			
 
				+        for stage_id, num_repeat in enumerate(stage_repeats):
			
 
				+            for i in range(num_repeat):
			
 
				+                if i == 0:
			
 
				+                    block = ESBlock2(
			
 
				+                        in_channels=stage_out_channels[stage_id + 1],
			
 
				+                        out_channels=stage_out_channels[stage_id + 2])
			
 
				+                else:
			
 
				+                    block = ESBlock1(
			
 
				+                        in_channels=stage_out_channels[stage_id + 2],
			
 
				+                        out_channels=stage_out_channels[stage_id + 2])
			
 
				+                block_list.append(block)
			
 
				+        self.blocks = nn.Sequential(*block_list)
			
 
				+
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            in_channels=stage_out_channels[-2],
			
 
				+            out_channels=stage_out_channels[-1],
			
 
				+            kernel_size=1)
			
 
				+
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.last_conv = Conv2D(
			
 
				+            in_channels=stage_out_channels[-1],
			
 
				+            out_channels=self.class_expand,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            bias_attr=False)
			
 
				+        self.hardswish = nn.Hardswish()
			
 
				+        self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
			
 
				+        self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
			
 
				+        self.fc = Linear(self.class_expand, self.class_num)
			
 
				+
			
 
				+        super().init_res(
			
 
				+            stages_pattern,
			
 
				+            return_patterns=return_patterns,
			
 
				+            return_stages=return_stages)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.max_pool(x)
			
 
				+        x = self.blocks(x)
			
 
				+        x = self.conv2(x)
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = self.last_conv(x)
			
 
				+        x = self.hardswish(x)
			
 
				+        x = self.dropout(x)
			
 
				+        x = self.flatten(x)
			
 
				+        x = self.fc(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ESNet_x0_25(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ESNet_x0_25
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ESNet_x0_25` model depends on args.
			
 
				+    """
			
 
				+    model = ESNet(
			
 
				+        scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_25"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ESNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ESNet_x0_5
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ESNet_x0_5` model depends on args.
			
 
				+    """
			
 
				+    model = ESNet(
			
 
				+        scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_5"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ESNet_x0_75(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ESNet_x0_75
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ESNet_x0_75` model depends on args.
			
 
				+    """
			
 
				+    model = ESNet(
			
 
				+        scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_75"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ESNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ESNet_x1_0
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ESNet_x1_0` model depends on args.
			
 
				+    """
			
 
				+    model = ESNet(
			
 
				+        scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ESNet_x1_0"], use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/hrnet.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/hrnet.py
@@ -0,0 +1,794 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import math
			
 
				+import paddle
			
 
				+from paddle import nn
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn.functional import upsample
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer, Identity
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "HRNet_W18_C":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W18_C_pretrained.pdparams",
			
 
				+    "HRNet_W30_C":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W30_C_pretrained.pdparams",
			
 
				+    "HRNet_W32_C":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W32_C_pretrained.pdparams",
			
 
				+    "HRNet_W40_C":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W40_C_pretrained.pdparams",
			
 
				+    "HRNet_W44_C":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W44_C_pretrained.pdparams",
			
 
				+    "HRNet_W48_C":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W48_C_pretrained.pdparams",
			
 
				+    "HRNet_W64_C":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W64_C_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+MODEL_STAGES_PATTERN = {"HRNet": ["st4"]}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+def _create_act(act):
			
 
				+    if act == "hardswish":
			
 
				+        return nn.Hardswish()
			
 
				+    elif act == "relu":
			
 
				+        return nn.ReLU()
			
 
				+    elif act is None:
			
 
				+        return Identity()
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "The activation function is not supported: {}".format(act))
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 act="relu"):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            bias_attr=False)
			
 
				+        self.bn = nn.BatchNorm(num_filters, act=None)
			
 
				+        self.act = _create_act(act)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        x = self.act(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 has_se,
			
 
				+                 stride=1,
			
 
				+                 downsample=False):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.has_se = has_se
			
 
				+        self.downsample = downsample
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            act="relu")
			
 
				+        self.conv3 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters * 4,
			
 
				+            filter_size=1,
			
 
				+            act=None)
			
 
				+
			
 
				+        if self.downsample:
			
 
				+            self.conv_down = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters * 4,
			
 
				+                filter_size=1,
			
 
				+                act=None)
			
 
				+
			
 
				+        if self.has_se:
			
 
				+            self.se = SELayer(
			
 
				+                num_channels=num_filters * 4,
			
 
				+                num_filters=num_filters * 4,
			
 
				+                reduction_ratio=16)
			
 
				+        self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, x, res_dict=None):
			
 
				+        residual = x
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.conv2(x)
			
 
				+        x = self.conv3(x)
			
 
				+        if self.downsample:
			
 
				+            residual = self.conv_down(residual)
			
 
				+        if self.has_se:
			
 
				+            x = self.se(x)
			
 
				+        x = paddle.add(x=residual, y=x)
			
 
				+        x = self.relu(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+    def __init__(self, num_channels, num_filters, has_se=False):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.has_se = has_se
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act="relu")
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act=None)
			
 
				+
			
 
				+        if self.has_se:
			
 
				+            self.se = SELayer(
			
 
				+                num_channels=num_filters,
			
 
				+                num_filters=num_filters,
			
 
				+                reduction_ratio=16)
			
 
				+        self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        residual = x
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.conv2(x)
			
 
				+
			
 
				+        if self.has_se:
			
 
				+            x = self.se(x)
			
 
				+
			
 
				+        x = paddle.add(x=residual, y=x)
			
 
				+        x = self.relu(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SELayer(TheseusLayer):
			
 
				+    def __init__(self, num_channels, num_filters, reduction_ratio):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self._num_channels = num_channels
			
 
				+
			
 
				+        med_ch = int(num_channels / reduction_ratio)
			
 
				+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
			
 
				+        self.fc_squeeze = nn.Linear(
			
 
				+            num_channels,
			
 
				+            med_ch,
			
 
				+            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
			
 
				+        self.relu = nn.ReLU()
			
 
				+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
			
 
				+        self.fc_excitation = nn.Linear(
			
 
				+            med_ch,
			
 
				+            num_filters,
			
 
				+            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
			
 
				+        self.sigmoid = nn.Sigmoid()
			
 
				+
			
 
				+    def forward(self, x, res_dict=None):
			
 
				+        residual = x
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = paddle.squeeze(x, axis=[2, 3])
			
 
				+        x = self.fc_squeeze(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.fc_excitation(x)
			
 
				+        x = self.sigmoid(x)
			
 
				+        x = paddle.unsqueeze(x, axis=[2, 3])
			
 
				+        x = residual * x
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Stage(TheseusLayer):
			
 
				+    def __init__(self, num_modules, num_filters, has_se=False):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self._num_modules = num_modules
			
 
				+
			
 
				+        self.stage_func_list = nn.LayerList()
			
 
				+        for i in range(num_modules):
			
 
				+            self.stage_func_list.append(
			
 
				+                HighResolutionModule(
			
 
				+                    num_filters=num_filters, has_se=has_se))
			
 
				+
			
 
				+    def forward(self, x, res_dict=None):
			
 
				+        x = x
			
 
				+        for idx in range(self._num_modules):
			
 
				+            x = self.stage_func_list[idx](x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class HighResolutionModule(TheseusLayer):
			
 
				+    def __init__(self, num_filters, has_se=False):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.basic_block_list = nn.LayerList()
			
 
				+
			
 
				+        for i in range(len(num_filters)):
			
 
				+            self.basic_block_list.append(
			
 
				+                nn.Sequential(* [
			
 
				+                    BasicBlock(
			
 
				+                        num_channels=num_filters[i],
			
 
				+                        num_filters=num_filters[i],
			
 
				+                        has_se=has_se) for j in range(4)
			
 
				+                ]))
			
 
				+
			
 
				+        self.fuse_func = FuseLayers(
			
 
				+            in_channels=num_filters, out_channels=num_filters)
			
 
				+
			
 
				+    def forward(self, x, res_dict=None):
			
 
				+        out = []
			
 
				+        for idx, xi in enumerate(x):
			
 
				+            basic_block_list = self.basic_block_list[idx]
			
 
				+            for basic_block_func in basic_block_list:
			
 
				+                xi = basic_block_func(xi)
			
 
				+            out.append(xi)
			
 
				+        out = self.fuse_func(out)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class FuseLayers(TheseusLayer):
			
 
				+    def __init__(self, in_channels, out_channels):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self._actual_ch = len(in_channels)
			
 
				+        self._in_channels = in_channels
			
 
				+
			
 
				+        self.residual_func_list = nn.LayerList()
			
 
				+        self.relu = nn.ReLU()
			
 
				+        for i in range(len(in_channels)):
			
 
				+            for j in range(len(in_channels)):
			
 
				+                if j > i:
			
 
				+                    self.residual_func_list.append(
			
 
				+                        ConvBNLayer(
			
 
				+                            num_channels=in_channels[j],
			
 
				+                            num_filters=out_channels[i],
			
 
				+                            filter_size=1,
			
 
				+                            stride=1,
			
 
				+                            act=None))
			
 
				+                elif j < i:
			
 
				+                    pre_num_filters = in_channels[j]
			
 
				+                    for k in range(i - j):
			
 
				+                        if k == i - j - 1:
			
 
				+                            self.residual_func_list.append(
			
 
				+                                ConvBNLayer(
			
 
				+                                    num_channels=pre_num_filters,
			
 
				+                                    num_filters=out_channels[i],
			
 
				+                                    filter_size=3,
			
 
				+                                    stride=2,
			
 
				+                                    act=None))
			
 
				+                            pre_num_filters = out_channels[i]
			
 
				+                        else:
			
 
				+                            self.residual_func_list.append(
			
 
				+                                ConvBNLayer(
			
 
				+                                    num_channels=pre_num_filters,
			
 
				+                                    num_filters=out_channels[j],
			
 
				+                                    filter_size=3,
			
 
				+                                    stride=2,
			
 
				+                                    act="relu"))
			
 
				+                            pre_num_filters = out_channels[j]
			
 
				+
			
 
				+    def forward(self, x, res_dict=None):
			
 
				+        out = []
			
 
				+        residual_func_idx = 0
			
 
				+        for i in range(len(self._in_channels)):
			
 
				+            residual = x[i]
			
 
				+            for j in range(len(self._in_channels)):
			
 
				+                if j > i:
			
 
				+                    xj = self.residual_func_list[residual_func_idx](x[j])
			
 
				+                    residual_func_idx += 1
			
 
				+
			
 
				+                    xj = upsample(xj, scale_factor=2**(j - i), mode="nearest")
			
 
				+                    residual = paddle.add(x=residual, y=xj)
			
 
				+                elif j < i:
			
 
				+                    xj = x[j]
			
 
				+                    for k in range(i - j):
			
 
				+                        xj = self.residual_func_list[residual_func_idx](xj)
			
 
				+                        residual_func_idx += 1
			
 
				+
			
 
				+                    residual = paddle.add(x=residual, y=xj)
			
 
				+
			
 
				+            residual = self.relu(residual)
			
 
				+            out.append(residual)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class LastClsOut(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channel_list,
			
 
				+                 has_se,
			
 
				+                 num_filters_list=[32, 64, 128, 256]):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.func_list = nn.LayerList()
			
 
				+        for idx in range(len(num_channel_list)):
			
 
				+            self.func_list.append(
			
 
				+                BottleneckBlock(
			
 
				+                    num_channels=num_channel_list[idx],
			
 
				+                    num_filters=num_filters_list[idx],
			
 
				+                    has_se=has_se,
			
 
				+                    downsample=True))
			
 
				+
			
 
				+    def forward(self, x, res_dict=None):
			
 
				+        out = []
			
 
				+        for idx, xi in enumerate(x):
			
 
				+            xi = self.func_list[idx](xi)
			
 
				+            out.append(xi)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class HRNet(TheseusLayer):
			
 
				+    """
			
 
				+    HRNet
			
 
				+    Args:
			
 
				+        width: int=18. Base channel number of HRNet.
			
 
				+        has_se: bool=False. If 'True', add se module to HRNet.
			
 
				+        class_num: int=1000. Output num of last fc layer.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific HRNet model depends on args.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 stages_pattern,
			
 
				+                 width=18,
			
 
				+                 has_se=False,
			
 
				+                 class_num=1000,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.width = width
			
 
				+        self.has_se = has_se
			
 
				+        self._class_num = class_num
			
 
				+
			
 
				+        channels_2 = [self.width, self.width * 2]
			
 
				+        channels_3 = [self.width, self.width * 2, self.width * 4]
			
 
				+        channels_4 = [
			
 
				+            self.width, self.width * 2, self.width * 4, self.width * 8
			
 
				+        ]
			
 
				+
			
 
				+        self.conv_layer1_1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act="relu")
			
 
				+
			
 
				+        self.conv_layer1_2 = ConvBNLayer(
			
 
				+            num_channels=64,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act="relu")
			
 
				+
			
 
				+        self.layer1 = nn.Sequential(* [
			
 
				+            BottleneckBlock(
			
 
				+                num_channels=64 if i == 0 else 256,
			
 
				+                num_filters=64,
			
 
				+                has_se=has_se,
			
 
				+                stride=1,
			
 
				+                downsample=True if i == 0 else False) for i in range(4)
			
 
				+        ])
			
 
				+
			
 
				+        self.conv_tr1_1 = ConvBNLayer(
			
 
				+            num_channels=256, num_filters=width, filter_size=3)
			
 
				+        self.conv_tr1_2 = ConvBNLayer(
			
 
				+            num_channels=256, num_filters=width * 2, filter_size=3, stride=2)
			
 
				+
			
 
				+        self.st2 = Stage(
			
 
				+            num_modules=1, num_filters=channels_2, has_se=self.has_se)
			
 
				+
			
 
				+        self.conv_tr2 = ConvBNLayer(
			
 
				+            num_channels=width * 2,
			
 
				+            num_filters=width * 4,
			
 
				+            filter_size=3,
			
 
				+            stride=2)
			
 
				+        self.st3 = Stage(
			
 
				+            num_modules=4, num_filters=channels_3, has_se=self.has_se)
			
 
				+
			
 
				+        self.conv_tr3 = ConvBNLayer(
			
 
				+            num_channels=width * 4,
			
 
				+            num_filters=width * 8,
			
 
				+            filter_size=3,
			
 
				+            stride=2)
			
 
				+
			
 
				+        self.st4 = Stage(
			
 
				+            num_modules=3, num_filters=channels_4, has_se=self.has_se)
			
 
				+
			
 
				+        # classification
			
 
				+        num_filters_list = [32, 64, 128, 256]
			
 
				+        self.last_cls = LastClsOut(
			
 
				+            num_channel_list=channels_4,
			
 
				+            has_se=self.has_se,
			
 
				+            num_filters_list=num_filters_list)
			
 
				+
			
 
				+        last_num_filters = [256, 512, 1024]
			
 
				+        self.cls_head_conv_list = nn.LayerList()
			
 
				+        for idx in range(3):
			
 
				+            self.cls_head_conv_list.append(
			
 
				+                ConvBNLayer(
			
 
				+                    num_channels=num_filters_list[idx] * 4,
			
 
				+                    num_filters=last_num_filters[idx],
			
 
				+                    filter_size=3,
			
 
				+                    stride=2))
			
 
				+
			
 
				+        self.conv_last = ConvBNLayer(
			
 
				+            num_channels=1024, num_filters=2048, filter_size=1, stride=1)
			
 
				+
			
 
				+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(2048 * 1.0)
			
 
				+
			
 
				+        self.fc = nn.Linear(
			
 
				+            2048,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
			
 
				+
			
 
				+        super().init_res(
			
 
				+            stages_pattern,
			
 
				+            return_patterns=return_patterns,
			
 
				+            return_stages=return_stages)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv_layer1_1(x)
			
 
				+        x = self.conv_layer1_2(x)
			
 
				+
			
 
				+        x = self.layer1(x)
			
 
				+
			
 
				+        tr1_1 = self.conv_tr1_1(x)
			
 
				+        tr1_2 = self.conv_tr1_2(x)
			
 
				+        x = self.st2([tr1_1, tr1_2])
			
 
				+
			
 
				+        tr2 = self.conv_tr2(x[-1])
			
 
				+        x.append(tr2)
			
 
				+        x = self.st3(x)
			
 
				+
			
 
				+        tr3 = self.conv_tr3(x[-1])
			
 
				+        x.append(tr3)
			
 
				+        x = self.st4(x)
			
 
				+
			
 
				+        x = self.last_cls(x)
			
 
				+
			
 
				+        y = x[0]
			
 
				+        for idx in range(3):
			
 
				+            y = paddle.add(x[idx + 1], self.cls_head_conv_list[idx](y))
			
 
				+
			
 
				+        y = self.conv_last(y)
			
 
				+        y = self.avg_pool(y)
			
 
				+        y = paddle.reshape(y, shape=[-1, y.shape[1]])
			
 
				+        y = self.fc(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def HRNet_W18_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    HRNet_W18_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `HRNet_W18_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=18, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W18_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HRNet_W30_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    HRNet_W30_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `HRNet_W30_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=30, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W30_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HRNet_W32_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    HRNet_W32_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `HRNet_W32_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=32, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W32_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HRNet_W40_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    HRNet_W40_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `HRNet_W40_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=40, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W40_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HRNet_W44_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    HRNet_W44_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `HRNet_W44_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=44, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W44_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HRNet_W48_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    HRNet_W48_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `HRNet_W48_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=48, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W48_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HRNet_W60_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    HRNet_W60_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `HRNet_W60_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=60, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W60_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HRNet_W64_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    HRNet_W64_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `HRNet_W64_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=64, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W64_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_HRNet_W18_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    SE_HRNet_W18_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `SE_HRNet_W18_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=18,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
			
 
				+        has_se=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W18_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_HRNet_W30_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    SE_HRNet_W30_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `SE_HRNet_W30_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=30,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
			
 
				+        has_se=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W30_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_HRNet_W32_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    SE_HRNet_W32_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `SE_HRNet_W32_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=32,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
			
 
				+        has_se=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W32_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_HRNet_W40_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    SE_HRNet_W40_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `SE_HRNet_W40_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=40,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
			
 
				+        has_se=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W40_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_HRNet_W44_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    SE_HRNet_W44_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `SE_HRNet_W44_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=44,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
			
 
				+        has_se=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W44_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_HRNet_W48_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    SE_HRNet_W48_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `SE_HRNet_W48_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=48,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
			
 
				+        has_se=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W48_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_HRNet_W60_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    SE_HRNet_W60_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `SE_HRNet_W60_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=60,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
			
 
				+        has_se=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W60_C"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_HRNet_W64_C(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    SE_HRNet_W64_C
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `SE_HRNet_W64_C` model depends on args.
			
 
				+    """
			
 
				+    model = HRNet(
			
 
				+        width=64,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
			
 
				+        has_se=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W64_C"], use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/inception_v3.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/inception_v3.py
@@ -0,0 +1,557 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+import math
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "InceptionV3":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/InceptionV3_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+MODEL_STAGES_PATTERN = {
			
 
				+    "InceptionV3": [
			
 
				+        "inception_block_list[2]", "inception_block_list[3]",
			
 
				+        "inception_block_list[7]", "inception_block_list[8]",
			
 
				+        "inception_block_list[10]"
			
 
				+    ]
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+'''
			
 
				+InceptionV3 config: dict.
			
 
				+    key: inception blocks of InceptionV3.
			
 
				+    values: conv num in different blocks.
			
 
				+'''
			
 
				+NET_CONFIG = {
			
 
				+    "inception_a": [[192, 256, 288], [32, 64, 64]],
			
 
				+    "inception_b": [288],
			
 
				+    "inception_c": [[768, 768, 768, 768], [128, 160, 160, 192]],
			
 
				+    "inception_d": [768],
			
 
				+    "inception_e": [1280, 2048]
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 padding=0,
			
 
				+                 groups=1,
			
 
				+                 act="relu"):
			
 
				+        super().__init__()
			
 
				+        self.act = act
			
 
				+        self.conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            bias_attr=False)
			
 
				+        self.bn = BatchNorm(num_filters)
			
 
				+        self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        if self.act:
			
 
				+            x = self.relu(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class InceptionStem(TheseusLayer):
			
 
				+    def __init__(self):
			
 
				+        super().__init__()
			
 
				+        self.conv_1a_3x3 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act="relu")
			
 
				+        self.conv_2a_3x3 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act="relu")
			
 
				+        self.conv_2b_3x3 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            padding=1,
			
 
				+            act="relu")
			
 
				+
			
 
				+        self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
			
 
				+        self.conv_3b_1x1 = ConvBNLayer(
			
 
				+            num_channels=64, num_filters=80, filter_size=1, act="relu")
			
 
				+        self.conv_4a_3x3 = ConvBNLayer(
			
 
				+            num_channels=80, num_filters=192, filter_size=3, act="relu")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv_1a_3x3(x)
			
 
				+        x = self.conv_2a_3x3(x)
			
 
				+        x = self.conv_2b_3x3(x)
			
 
				+        x = self.max_pool(x)
			
 
				+        x = self.conv_3b_1x1(x)
			
 
				+        x = self.conv_4a_3x3(x)
			
 
				+        x = self.max_pool(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class InceptionA(TheseusLayer):
			
 
				+    def __init__(self, num_channels, pool_features):
			
 
				+        super().__init__()
			
 
				+        self.branch1x1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=64,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch5x5_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=48,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch5x5_2 = ConvBNLayer(
			
 
				+            num_channels=48,
			
 
				+            num_filters=64,
			
 
				+            filter_size=5,
			
 
				+            padding=2,
			
 
				+            act="relu")
			
 
				+
			
 
				+        self.branch3x3dbl_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=64,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3dbl_2 = ConvBNLayer(
			
 
				+            num_channels=64,
			
 
				+            num_filters=96,
			
 
				+            filter_size=3,
			
 
				+            padding=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3dbl_3 = ConvBNLayer(
			
 
				+            num_channels=96,
			
 
				+            num_filters=96,
			
 
				+            filter_size=3,
			
 
				+            padding=1,
			
 
				+            act="relu")
			
 
				+        self.branch_pool = AvgPool2D(
			
 
				+            kernel_size=3, stride=1, padding=1, exclusive=False)
			
 
				+        self.branch_pool_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=pool_features,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        branch1x1 = self.branch1x1(x)
			
 
				+        branch5x5 = self.branch5x5_1(x)
			
 
				+        branch5x5 = self.branch5x5_2(branch5x5)
			
 
				+
			
 
				+        branch3x3dbl = self.branch3x3dbl_1(x)
			
 
				+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
			
 
				+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
			
 
				+
			
 
				+        branch_pool = self.branch_pool(x)
			
 
				+        branch_pool = self.branch_pool_conv(branch_pool)
			
 
				+        x = paddle.concat(
			
 
				+            [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class InceptionB(TheseusLayer):
			
 
				+    def __init__(self, num_channels):
			
 
				+        super().__init__()
			
 
				+        self.branch3x3 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=384,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act="relu")
			
 
				+        self.branch3x3dbl_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=64,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3dbl_2 = ConvBNLayer(
			
 
				+            num_channels=64,
			
 
				+            num_filters=96,
			
 
				+            filter_size=3,
			
 
				+            padding=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3dbl_3 = ConvBNLayer(
			
 
				+            num_channels=96,
			
 
				+            num_filters=96,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act="relu")
			
 
				+        self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        branch3x3 = self.branch3x3(x)
			
 
				+
			
 
				+        branch3x3dbl = self.branch3x3dbl_1(x)
			
 
				+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
			
 
				+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
			
 
				+
			
 
				+        branch_pool = self.branch_pool(x)
			
 
				+
			
 
				+        x = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class InceptionC(TheseusLayer):
			
 
				+    def __init__(self, num_channels, channels_7x7):
			
 
				+        super().__init__()
			
 
				+        self.branch1x1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=192,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+
			
 
				+        self.branch7x7_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=channels_7x7,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            act="relu")
			
 
				+        self.branch7x7_2 = ConvBNLayer(
			
 
				+            num_channels=channels_7x7,
			
 
				+            num_filters=channels_7x7,
			
 
				+            filter_size=(1, 7),
			
 
				+            stride=1,
			
 
				+            padding=(0, 3),
			
 
				+            act="relu")
			
 
				+        self.branch7x7_3 = ConvBNLayer(
			
 
				+            num_channels=channels_7x7,
			
 
				+            num_filters=192,
			
 
				+            filter_size=(7, 1),
			
 
				+            stride=1,
			
 
				+            padding=(3, 0),
			
 
				+            act="relu")
			
 
				+
			
 
				+        self.branch7x7dbl_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=channels_7x7,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch7x7dbl_2 = ConvBNLayer(
			
 
				+            num_channels=channels_7x7,
			
 
				+            num_filters=channels_7x7,
			
 
				+            filter_size=(7, 1),
			
 
				+            padding=(3, 0),
			
 
				+            act="relu")
			
 
				+        self.branch7x7dbl_3 = ConvBNLayer(
			
 
				+            num_channels=channels_7x7,
			
 
				+            num_filters=channels_7x7,
			
 
				+            filter_size=(1, 7),
			
 
				+            padding=(0, 3),
			
 
				+            act="relu")
			
 
				+        self.branch7x7dbl_4 = ConvBNLayer(
			
 
				+            num_channels=channels_7x7,
			
 
				+            num_filters=channels_7x7,
			
 
				+            filter_size=(7, 1),
			
 
				+            padding=(3, 0),
			
 
				+            act="relu")
			
 
				+        self.branch7x7dbl_5 = ConvBNLayer(
			
 
				+            num_channels=channels_7x7,
			
 
				+            num_filters=192,
			
 
				+            filter_size=(1, 7),
			
 
				+            padding=(0, 3),
			
 
				+            act="relu")
			
 
				+
			
 
				+        self.branch_pool = AvgPool2D(
			
 
				+            kernel_size=3, stride=1, padding=1, exclusive=False)
			
 
				+        self.branch_pool_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=192,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        branch1x1 = self.branch1x1(x)
			
 
				+
			
 
				+        branch7x7 = self.branch7x7_1(x)
			
 
				+        branch7x7 = self.branch7x7_2(branch7x7)
			
 
				+        branch7x7 = self.branch7x7_3(branch7x7)
			
 
				+
			
 
				+        branch7x7dbl = self.branch7x7dbl_1(x)
			
 
				+        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
			
 
				+        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
			
 
				+        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
			
 
				+        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
			
 
				+
			
 
				+        branch_pool = self.branch_pool(x)
			
 
				+        branch_pool = self.branch_pool_conv(branch_pool)
			
 
				+
			
 
				+        x = paddle.concat(
			
 
				+            [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class InceptionD(TheseusLayer):
			
 
				+    def __init__(self, num_channels):
			
 
				+        super().__init__()
			
 
				+        self.branch3x3_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=192,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3_2 = ConvBNLayer(
			
 
				+            num_channels=192,
			
 
				+            num_filters=320,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act="relu")
			
 
				+        self.branch7x7x3_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=192,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch7x7x3_2 = ConvBNLayer(
			
 
				+            num_channels=192,
			
 
				+            num_filters=192,
			
 
				+            filter_size=(1, 7),
			
 
				+            padding=(0, 3),
			
 
				+            act="relu")
			
 
				+        self.branch7x7x3_3 = ConvBNLayer(
			
 
				+            num_channels=192,
			
 
				+            num_filters=192,
			
 
				+            filter_size=(7, 1),
			
 
				+            padding=(3, 0),
			
 
				+            act="relu")
			
 
				+        self.branch7x7x3_4 = ConvBNLayer(
			
 
				+            num_channels=192,
			
 
				+            num_filters=192,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act="relu")
			
 
				+        self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        branch3x3 = self.branch3x3_1(x)
			
 
				+        branch3x3 = self.branch3x3_2(branch3x3)
			
 
				+
			
 
				+        branch7x7x3 = self.branch7x7x3_1(x)
			
 
				+        branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
			
 
				+        branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
			
 
				+        branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
			
 
				+
			
 
				+        branch_pool = self.branch_pool(x)
			
 
				+
			
 
				+        x = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class InceptionE(TheseusLayer):
			
 
				+    def __init__(self, num_channels):
			
 
				+        super().__init__()
			
 
				+        self.branch1x1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=320,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=384,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3_2a = ConvBNLayer(
			
 
				+            num_channels=384,
			
 
				+            num_filters=384,
			
 
				+            filter_size=(1, 3),
			
 
				+            padding=(0, 1),
			
 
				+            act="relu")
			
 
				+        self.branch3x3_2b = ConvBNLayer(
			
 
				+            num_channels=384,
			
 
				+            num_filters=384,
			
 
				+            filter_size=(3, 1),
			
 
				+            padding=(1, 0),
			
 
				+            act="relu")
			
 
				+
			
 
				+        self.branch3x3dbl_1 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=448,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3dbl_2 = ConvBNLayer(
			
 
				+            num_channels=448,
			
 
				+            num_filters=384,
			
 
				+            filter_size=3,
			
 
				+            padding=1,
			
 
				+            act="relu")
			
 
				+        self.branch3x3dbl_3a = ConvBNLayer(
			
 
				+            num_channels=384,
			
 
				+            num_filters=384,
			
 
				+            filter_size=(1, 3),
			
 
				+            padding=(0, 1),
			
 
				+            act="relu")
			
 
				+        self.branch3x3dbl_3b = ConvBNLayer(
			
 
				+            num_channels=384,
			
 
				+            num_filters=384,
			
 
				+            filter_size=(3, 1),
			
 
				+            padding=(1, 0),
			
 
				+            act="relu")
			
 
				+        self.branch_pool = AvgPool2D(
			
 
				+            kernel_size=3, stride=1, padding=1, exclusive=False)
			
 
				+        self.branch_pool_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=192,
			
 
				+            filter_size=1,
			
 
				+            act="relu")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        branch1x1 = self.branch1x1(x)
			
 
				+
			
 
				+        branch3x3 = self.branch3x3_1(x)
			
 
				+        branch3x3 = [
			
 
				+            self.branch3x3_2a(branch3x3),
			
 
				+            self.branch3x3_2b(branch3x3),
			
 
				+        ]
			
 
				+        branch3x3 = paddle.concat(branch3x3, axis=1)
			
 
				+
			
 
				+        branch3x3dbl = self.branch3x3dbl_1(x)
			
 
				+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
			
 
				+        branch3x3dbl = [
			
 
				+            self.branch3x3dbl_3a(branch3x3dbl),
			
 
				+            self.branch3x3dbl_3b(branch3x3dbl),
			
 
				+        ]
			
 
				+        branch3x3dbl = paddle.concat(branch3x3dbl, axis=1)
			
 
				+
			
 
				+        branch_pool = self.branch_pool(x)
			
 
				+        branch_pool = self.branch_pool_conv(branch_pool)
			
 
				+
			
 
				+        x = paddle.concat(
			
 
				+            [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Inception_V3(TheseusLayer):
			
 
				+    """
			
 
				+    Inception_V3
			
 
				+    Args:
			
 
				+        config: dict. config of Inception_V3.
			
 
				+        class_num: int=1000. The number of classes.
			
 
				+        pretrained: (True or False) or path of pretrained_model. Whether to load the pretrained model.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific Inception_V3 model depends on args.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 config,
			
 
				+                 stages_pattern,
			
 
				+                 class_num=1000,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.inception_a_list = config["inception_a"]
			
 
				+        self.inception_c_list = config["inception_c"]
			
 
				+        self.inception_b_list = config["inception_b"]
			
 
				+        self.inception_d_list = config["inception_d"]
			
 
				+        self.inception_e_list = config["inception_e"]
			
 
				+
			
 
				+        self.inception_stem = InceptionStem()
			
 
				+
			
 
				+        self.inception_block_list = nn.LayerList()
			
 
				+        for i in range(len(self.inception_a_list[0])):
			
 
				+            inception_a = InceptionA(self.inception_a_list[0][i],
			
 
				+                                     self.inception_a_list[1][i])
			
 
				+            self.inception_block_list.append(inception_a)
			
 
				+
			
 
				+        for i in range(len(self.inception_b_list)):
			
 
				+            inception_b = InceptionB(self.inception_b_list[i])
			
 
				+            self.inception_block_list.append(inception_b)
			
 
				+
			
 
				+        for i in range(len(self.inception_c_list[0])):
			
 
				+            inception_c = InceptionC(self.inception_c_list[0][i],
			
 
				+                                     self.inception_c_list[1][i])
			
 
				+            self.inception_block_list.append(inception_c)
			
 
				+
			
 
				+        for i in range(len(self.inception_d_list)):
			
 
				+            inception_d = InceptionD(self.inception_d_list[i])
			
 
				+            self.inception_block_list.append(inception_d)
			
 
				+
			
 
				+        for i in range(len(self.inception_e_list)):
			
 
				+            inception_e = InceptionE(self.inception_e_list[i])
			
 
				+            self.inception_block_list.append(inception_e)
			
 
				+
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self.dropout = Dropout(p=0.2, mode="downscale_in_infer")
			
 
				+        stdv = 1.0 / math.sqrt(2048 * 1.0)
			
 
				+        self.fc = Linear(
			
 
				+            2048,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr())
			
 
				+
			
 
				+        super().init_res(
			
 
				+            stages_pattern,
			
 
				+            return_patterns=return_patterns,
			
 
				+            return_stages=return_stages)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.inception_stem(x)
			
 
				+        for inception_block in self.inception_block_list:
			
 
				+            x = inception_block(x)
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = paddle.reshape(x, shape=[-1, 2048])
			
 
				+        x = self.dropout(x)
			
 
				+        x = self.fc(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def InceptionV3(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    InceptionV3
			
 
				+    Args:
			
 
				+        pretrained: bool=false or str. if `true` load pretrained parameters, `false` otherwise.
			
 
				+                    if str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `InceptionV3` model
			
 
				+    """
			
 
				+    model = Inception_V3(
			
 
				+        NET_CONFIG,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["InceptionV3"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["InceptionV3"], use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/mobilenet_v1.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/mobilenet_v1.py
@@ -0,0 +1,257 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, ReLU, Flatten
			
 
				+from paddle.nn import AdaptiveAvgPool2D
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "MobileNetV1_x0_25":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_25_pretrained.pdparams",
			
 
				+    "MobileNetV1_x0_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_5_pretrained.pdparams",
			
 
				+    "MobileNetV1_x0_75":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_75_pretrained.pdparams",
			
 
				+    "MobileNetV1":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+MODEL_STAGES_PATTERN = {
			
 
				+    "MobileNetV1": ["blocks[0]", "blocks[2]", "blocks[4]", "blocks[10]"]
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 filter_size,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 num_groups=1):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=num_groups,
			
 
				+            weight_attr=ParamAttr(initializer=KaimingNormal()),
			
 
				+            bias_attr=False)
			
 
				+        self.bn = BatchNorm(num_filters)
			
 
				+        self.relu = ReLU()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        x = self.relu(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class DepthwiseSeparable(TheseusLayer):
			
 
				+    def __init__(self, num_channels, num_filters1, num_filters2, num_groups,
			
 
				+                 stride, scale):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.depthwise_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=int(num_filters1 * scale),
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            num_groups=int(num_groups * scale))
			
 
				+
			
 
				+        self.pointwise_conv = ConvBNLayer(
			
 
				+            num_channels=int(num_filters1 * scale),
			
 
				+            filter_size=1,
			
 
				+            num_filters=int(num_filters2 * scale),
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.depthwise_conv(x)
			
 
				+        x = self.pointwise_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class MobileNet(TheseusLayer):
			
 
				+    """
			
 
				+    MobileNet
			
 
				+    Args:
			
 
				+        scale: float=1.0. The coefficient that controls the size of network parameters. 
			
 
				+        class_num: int=1000. The number of classes.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific MobileNet model depends on args.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 stages_pattern,
			
 
				+                 scale=1.0,
			
 
				+                 class_num=1000,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        super().__init__()
			
 
				+        self.scale = scale
			
 
				+
			
 
				+        self.conv = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            filter_size=3,
			
 
				+            num_filters=int(32 * scale),
			
 
				+            stride=2,
			
 
				+            padding=1)
			
 
				+
			
 
				+        #num_channels, num_filters1, num_filters2, num_groups, stride
			
 
				+        self.cfg = [[int(32 * scale), 32, 64, 32, 1],
			
 
				+                    [int(64 * scale), 64, 128, 64, 2],
			
 
				+                    [int(128 * scale), 128, 128, 128, 1],
			
 
				+                    [int(128 * scale), 128, 256, 128, 2],
			
 
				+                    [int(256 * scale), 256, 256, 256, 1],
			
 
				+                    [int(256 * scale), 256, 512, 256, 2],
			
 
				+                    [int(512 * scale), 512, 512, 512, 1],
			
 
				+                    [int(512 * scale), 512, 512, 512, 1],
			
 
				+                    [int(512 * scale), 512, 512, 512, 1],
			
 
				+                    [int(512 * scale), 512, 512, 512, 1],
			
 
				+                    [int(512 * scale), 512, 512, 512, 1],
			
 
				+                    [int(512 * scale), 512, 1024, 512, 2],
			
 
				+                    [int(1024 * scale), 1024, 1024, 1024, 1]]
			
 
				+
			
 
				+        self.blocks = nn.Sequential(* [
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=params[0],
			
 
				+                num_filters1=params[1],
			
 
				+                num_filters2=params[2],
			
 
				+                num_groups=params[3],
			
 
				+                stride=params[4],
			
 
				+                scale=scale) for params in self.cfg
			
 
				+        ])
			
 
				+
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self.flatten = Flatten(start_axis=1, stop_axis=-1)
			
 
				+
			
 
				+        self.fc = Linear(
			
 
				+            int(1024 * scale),
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(initializer=KaimingNormal()))
			
 
				+
			
 
				+        super().init_res(
			
 
				+            stages_pattern,
			
 
				+            return_patterns=return_patterns,
			
 
				+            return_stages=return_stages)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.blocks(x)
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = self.flatten(x)
			
 
				+        x = self.fc(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def MobileNetV1_x0_25(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV1_x0_25
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV1_x0_25` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNet(
			
 
				+        scale=0.25,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_25"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV1_x0_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV1_x0_5
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV1_x0_5` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNet(
			
 
				+        scale=0.5,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_5"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV1_x0_75(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV1_x0_75
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV1_x0_75` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNet(
			
 
				+        scale=0.75,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_75"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV1(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV1
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV1` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNet(
			
 
				+        scale=1.0,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1"], use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/mobilenet_v3.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/mobilenet_v3.py
@@ -0,0 +1,586 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "MobileNetV3_small_x0_35":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_35_pretrained.pdparams",
			
 
				+    "MobileNetV3_small_x0_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_5_pretrained.pdparams",
			
 
				+    "MobileNetV3_small_x0_75":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_75_pretrained.pdparams",
			
 
				+    "MobileNetV3_small_x1_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x1_0_pretrained.pdparams",
			
 
				+    "MobileNetV3_small_x1_25":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x1_25_pretrained.pdparams",
			
 
				+    "MobileNetV3_large_x0_35":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_35_pretrained.pdparams",
			
 
				+    "MobileNetV3_large_x0_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_5_pretrained.pdparams",
			
 
				+    "MobileNetV3_large_x0_75":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_75_pretrained.pdparams",
			
 
				+    "MobileNetV3_large_x1_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x1_0_pretrained.pdparams",
			
 
				+    "MobileNetV3_large_x1_25":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x1_25_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+MODEL_STAGES_PATTERN = {
			
 
				+    "MobileNetV3_small":
			
 
				+    ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
			
 
				+    "MobileNetV3_large":
			
 
				+    ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+
			
 
				+# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
			
 
				+# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
			
 
				+# k: kernel_size
			
 
				+# exp: middle channel number in depthwise block
			
 
				+# c: output channel number in depthwise block
			
 
				+# se: whether to use SE block
			
 
				+# act: which activation to use
			
 
				+# s: stride in depthwise block
			
 
				+NET_CONFIG = {
			
 
				+    "large": [
			
 
				+        # k, exp, c, se, act, s
			
 
				+        [3, 16, 16, False, "relu", 1],
			
 
				+        [3, 64, 24, False, "relu", 2],
			
 
				+        [3, 72, 24, False, "relu", 1],
			
 
				+        [5, 72, 40, True, "relu", 2],
			
 
				+        [5, 120, 40, True, "relu", 1],
			
 
				+        [5, 120, 40, True, "relu", 1],
			
 
				+        [3, 240, 80, False, "hardswish", 2],
			
 
				+        [3, 200, 80, False, "hardswish", 1],
			
 
				+        [3, 184, 80, False, "hardswish", 1],
			
 
				+        [3, 184, 80, False, "hardswish", 1],
			
 
				+        [3, 480, 112, True, "hardswish", 1],
			
 
				+        [3, 672, 112, True, "hardswish", 1],
			
 
				+        [5, 672, 160, True, "hardswish", 2],
			
 
				+        [5, 960, 160, True, "hardswish", 1],
			
 
				+        [5, 960, 160, True, "hardswish", 1],
			
 
				+    ],
			
 
				+    "small": [
			
 
				+        # k, exp, c, se, act, s
			
 
				+        [3, 16, 16, True, "relu", 2],
			
 
				+        [3, 72, 24, False, "relu", 2],
			
 
				+        [3, 88, 24, False, "relu", 1],
			
 
				+        [5, 96, 40, True, "hardswish", 2],
			
 
				+        [5, 240, 40, True, "hardswish", 1],
			
 
				+        [5, 240, 40, True, "hardswish", 1],
			
 
				+        [5, 120, 48, True, "hardswish", 1],
			
 
				+        [5, 144, 48, True, "hardswish", 1],
			
 
				+        [5, 288, 96, True, "hardswish", 2],
			
 
				+        [5, 576, 96, True, "hardswish", 1],
			
 
				+        [5, 576, 96, True, "hardswish", 1],
			
 
				+    ]
			
 
				+}
			
 
				+# first conv output channel number in MobileNetV3
			
 
				+STEM_CONV_NUMBER = 16
			
 
				+# last second conv output channel for "small"
			
 
				+LAST_SECOND_CONV_SMALL = 576
			
 
				+# last second conv output channel for "large"
			
 
				+LAST_SECOND_CONV_LARGE = 960
			
 
				+# last conv output channel number for "large" and "small"
			
 
				+LAST_CONV = 1280
			
 
				+
			
 
				+
			
 
				+def _make_divisible(v, divisor=8, min_value=None):
			
 
				+    if min_value is None:
			
 
				+        min_value = divisor
			
 
				+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
			
 
				+    if new_v < 0.9 * v:
			
 
				+        new_v += divisor
			
 
				+    return new_v
			
 
				+
			
 
				+
			
 
				+def _create_act(act):
			
 
				+    if act == "hardswish":
			
 
				+        return nn.Hardswish()
			
 
				+    elif act == "relu":
			
 
				+        return nn.ReLU()
			
 
				+    elif act is None:
			
 
				+        return None
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "The activation function is not supported: {}".format(act))
			
 
				+
			
 
				+
			
 
				+class MobileNetV3(TheseusLayer):
			
 
				+    """
			
 
				+    MobileNetV3
			
 
				+    Args:
			
 
				+        config: list. MobileNetV3 depthwise blocks config.
			
 
				+        scale: float=1.0. The coefficient that controls the size of network parameters. 
			
 
				+        class_num: int=1000. The number of classes.
			
 
				+        inplanes: int=16. The output channel number of first convolution layer.
			
 
				+        class_squeeze: int=960. The output channel number of penultimate convolution layer. 
			
 
				+        class_expand: int=1280. The output channel number of last convolution layer. 
			
 
				+        dropout_prob: float=0.2.  Probability of setting units to zero.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific MobileNetV3 model depends on args.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 config,
			
 
				+                 stages_pattern,
			
 
				+                 scale=1.0,
			
 
				+                 class_num=1000,
			
 
				+                 inplanes=STEM_CONV_NUMBER,
			
 
				+                 class_squeeze=LAST_SECOND_CONV_LARGE,
			
 
				+                 class_expand=LAST_CONV,
			
 
				+                 dropout_prob=0.2,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.cfg = config
			
 
				+        self.scale = scale
			
 
				+        self.inplanes = inplanes
			
 
				+        self.class_squeeze = class_squeeze
			
 
				+        self.class_expand = class_expand
			
 
				+        self.class_num = class_num
			
 
				+
			
 
				+        self.conv = ConvBNLayer(
			
 
				+            in_c=3,
			
 
				+            out_c=_make_divisible(self.inplanes * self.scale),
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            num_groups=1,
			
 
				+            if_act=True,
			
 
				+            act="hardswish")
			
 
				+
			
 
				+        self.blocks = nn.Sequential(* [
			
 
				+            ResidualUnit(
			
 
				+                in_c=_make_divisible(self.inplanes * self.scale if i == 0 else
			
 
				+                                     self.cfg[i - 1][2] * self.scale),
			
 
				+                mid_c=_make_divisible(self.scale * exp),
			
 
				+                out_c=_make_divisible(self.scale * c),
			
 
				+                filter_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se,
			
 
				+                act=act) for i, (k, exp, c, se, act, s) in enumerate(self.cfg)
			
 
				+        ])
			
 
				+
			
 
				+        self.last_second_conv = ConvBNLayer(
			
 
				+            in_c=_make_divisible(self.cfg[-1][2] * self.scale),
			
 
				+            out_c=_make_divisible(self.scale * self.class_squeeze),
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            num_groups=1,
			
 
				+            if_act=True,
			
 
				+            act="hardswish")
			
 
				+
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.last_conv = Conv2D(
			
 
				+            in_channels=_make_divisible(self.scale * self.class_squeeze),
			
 
				+            out_channels=self.class_expand,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self.hardswish = nn.Hardswish()
			
 
				+        if dropout_prob is not None:
			
 
				+            self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
			
 
				+        else:
			
 
				+            self.dropout = None
			
 
				+        self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
			
 
				+
			
 
				+        self.fc = Linear(self.class_expand, class_num)
			
 
				+
			
 
				+        super().init_res(
			
 
				+            stages_pattern,
			
 
				+            return_patterns=return_patterns,
			
 
				+            return_stages=return_stages)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.blocks(x)
			
 
				+        x = self.last_second_conv(x)
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = self.last_conv(x)
			
 
				+        x = self.hardswish(x)
			
 
				+        if self.dropout is not None:
			
 
				+            x = self.dropout(x)
			
 
				+        x = self.flatten(x)
			
 
				+        x = self.fc(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 in_c,
			
 
				+                 out_c,
			
 
				+                 filter_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 num_groups=1,
			
 
				+                 if_act=True,
			
 
				+                 act=None):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.conv = Conv2D(
			
 
				+            in_channels=in_c,
			
 
				+            out_channels=out_c,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=num_groups,
			
 
				+            bias_attr=False)
			
 
				+        self.bn = BatchNorm(
			
 
				+            num_channels=out_c,
			
 
				+            act=None,
			
 
				+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+        self.if_act = if_act
			
 
				+        self.act = _create_act(act)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        if self.if_act:
			
 
				+            x = self.act(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ResidualUnit(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 in_c,
			
 
				+                 mid_c,
			
 
				+                 out_c,
			
 
				+                 filter_size,
			
 
				+                 stride,
			
 
				+                 use_se,
			
 
				+                 act=None):
			
 
				+        super().__init__()
			
 
				+        self.if_shortcut = stride == 1 and in_c == out_c
			
 
				+        self.if_se = use_se
			
 
				+
			
 
				+        self.expand_conv = ConvBNLayer(
			
 
				+            in_c=in_c,
			
 
				+            out_c=mid_c,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            if_act=True,
			
 
				+            act=act)
			
 
				+        self.bottleneck_conv = ConvBNLayer(
			
 
				+            in_c=mid_c,
			
 
				+            out_c=mid_c,
			
 
				+            filter_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=int((filter_size - 1) // 2),
			
 
				+            num_groups=mid_c,
			
 
				+            if_act=True,
			
 
				+            act=act)
			
 
				+        if self.if_se:
			
 
				+            self.mid_se = SEModule(mid_c)
			
 
				+        self.linear_conv = ConvBNLayer(
			
 
				+            in_c=mid_c,
			
 
				+            out_c=out_c,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            if_act=False,
			
 
				+            act=None)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        identity = x
			
 
				+        x = self.expand_conv(x)
			
 
				+        x = self.bottleneck_conv(x)
			
 
				+        if self.if_se:
			
 
				+            x = self.mid_se(x)
			
 
				+        x = self.linear_conv(x)
			
 
				+        if self.if_shortcut:
			
 
				+            x = paddle.add(identity, x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
			
 
				+class Hardsigmoid(TheseusLayer):
			
 
				+    def __init__(self, slope=0.2, offset=0.5):
			
 
				+        super().__init__()
			
 
				+        self.slope = slope
			
 
				+        self.offset = offset
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        return nn.functional.hardsigmoid(
			
 
				+            x, slope=self.slope, offset=self.offset)
			
 
				+
			
 
				+
			
 
				+class SEModule(TheseusLayer):
			
 
				+    def __init__(self, channel, reduction=4):
			
 
				+        super().__init__()
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self.conv1 = Conv2D(
			
 
				+            in_channels=channel,
			
 
				+            out_channels=channel // reduction,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+        self.relu = nn.ReLU()
			
 
				+        self.conv2 = Conv2D(
			
 
				+            in_channels=channel // reduction,
			
 
				+            out_channels=channel,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+        self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        identity = x
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.conv2(x)
			
 
				+        x = self.hardsigmoid(x)
			
 
				+        return paddle.multiply(x=identity, y=x)
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_small_x0_35(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_small_x0_35
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_small_x0_35` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["small"],
			
 
				+        scale=0.35,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_SMALL,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_35"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_small_x0_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_small_x0_5
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_small_x0_5` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["small"],
			
 
				+        scale=0.5,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_SMALL,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_5"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_small_x0_75(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_small_x0_75
			
 
				+    Args:
			
 
				+        pretrained: bool=false or str. if `true` load pretrained parameters, `false` otherwise.
			
 
				+                    if str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_small_x0_75` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["small"],
			
 
				+        scale=0.75,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_SMALL,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_75"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_small_x1_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_small_x1_0
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_small_x1_0` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["small"],
			
 
				+        scale=1.0,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_SMALL,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_0"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_small_x1_25(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_small_x1_25
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_small_x1_25` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["small"],
			
 
				+        scale=1.25,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_SMALL,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_25"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_large_x0_35(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_large_x0_35
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_large_x0_35` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["large"],
			
 
				+        scale=0.35,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_LARGE,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_35"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_large_x0_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_large_x0_5
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_large_x0_5` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["large"],
			
 
				+        scale=0.5,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_LARGE,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_5"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_large_x0_75(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_large_x0_75
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_large_x0_75` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["large"],
			
 
				+        scale=0.75,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_LARGE,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_75"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_large_x1_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_large_x1_0
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_large_x1_0` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["large"],
			
 
				+        scale=1.0,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_LARGE,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_0"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV3_large_x1_25(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MobileNetV3_large_x1_25
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `MobileNetV3_large_x1_25` model depends on args.
			
 
				+    """
			
 
				+    model = MobileNetV3(
			
 
				+        config=NET_CONFIG["large"],
			
 
				+        scale=1.25,
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
			
 
				+        class_squeeze=LAST_SECOND_CONV_LARGE,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_25"],
			
 
				+                     use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/pp_lcnet.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/pp_lcnet.py
@@ -0,0 +1,419 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "PPLCNet_x0_25":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_25_pretrained.pdparams",
			
 
				+    "PPLCNet_x0_35":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_35_pretrained.pdparams",
			
 
				+    "PPLCNet_x0_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_5_pretrained.pdparams",
			
 
				+    "PPLCNet_x0_75":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_75_pretrained.pdparams",
			
 
				+    "PPLCNet_x1_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_pretrained.pdparams",
			
 
				+    "PPLCNet_x1_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_5_pretrained.pdparams",
			
 
				+    "PPLCNet_x2_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_0_pretrained.pdparams",
			
 
				+    "PPLCNet_x2_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+MODEL_STAGES_PATTERN = {
			
 
				+    "PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se.
			
 
				+# k: kernel_size
			
 
				+# in_c: input channel number in depthwise block
			
 
				+# out_c: output channel number in depthwise block
			
 
				+# s: stride in depthwise block
			
 
				+# use_se: whether to use SE block
			
 
				+
			
 
				+NET_CONFIG = {
			
 
				+    "blocks2":
			
 
				+    #k, in_c, out_c, s, use_se
			
 
				+    [[3, 16, 32, 1, False]],
			
 
				+    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
			
 
				+    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
			
 
				+    "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
			
 
				+                [5, 256, 256, 1, False], [5, 256, 256, 1, False],
			
 
				+                [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
			
 
				+    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def make_divisible(v, divisor=8, min_value=None):
			
 
				+    if min_value is None:
			
 
				+        min_value = divisor
			
 
				+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
			
 
				+    if new_v < 0.9 * v:
			
 
				+        new_v += divisor
			
 
				+    return new_v
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 filter_size,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 num_groups=1):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=num_groups,
			
 
				+            weight_attr=ParamAttr(initializer=KaimingNormal()),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self.bn = BatchNorm(
			
 
				+            num_filters,
			
 
				+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+        self.hardswish = nn.Hardswish()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        x = self.hardswish(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class DepthwiseSeparable(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 dw_size=3,
			
 
				+                 use_se=False):
			
 
				+        super().__init__()
			
 
				+        self.use_se = use_se
			
 
				+        self.dw_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_channels,
			
 
				+            filter_size=dw_size,
			
 
				+            stride=stride,
			
 
				+            num_groups=num_channels)
			
 
				+        if use_se:
			
 
				+            self.se = SEModule(num_channels)
			
 
				+        self.pw_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            filter_size=1,
			
 
				+            num_filters=num_filters,
			
 
				+            stride=1)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.dw_conv(x)
			
 
				+        if self.use_se:
			
 
				+            x = self.se(x)
			
 
				+        x = self.pw_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SEModule(TheseusLayer):
			
 
				+    def __init__(self, channel, reduction=4):
			
 
				+        super().__init__()
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self.conv1 = Conv2D(
			
 
				+            in_channels=channel,
			
 
				+            out_channels=channel // reduction,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+        self.relu = nn.ReLU()
			
 
				+        self.conv2 = Conv2D(
			
 
				+            in_channels=channel // reduction,
			
 
				+            out_channels=channel,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0)
			
 
				+        self.hardsigmoid = nn.Hardsigmoid()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        identity = x
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.conv2(x)
			
 
				+        x = self.hardsigmoid(x)
			
 
				+        x = paddle.multiply(x=identity, y=x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class PPLCNet(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 stages_pattern,
			
 
				+                 scale=1.0,
			
 
				+                 class_num=1000,
			
 
				+                 dropout_prob=0.2,
			
 
				+                 class_expand=1280,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        super().__init__()
			
 
				+        self.scale = scale
			
 
				+        self.class_expand = class_expand
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            filter_size=3,
			
 
				+            num_filters=make_divisible(16 * scale),
			
 
				+            stride=2)
			
 
				+
			
 
				+        self.blocks2 = nn.Sequential(* [
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
			
 
				+        ])
			
 
				+
			
 
				+        self.blocks3 = nn.Sequential(* [
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
			
 
				+        ])
			
 
				+
			
 
				+        self.blocks4 = nn.Sequential(* [
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
			
 
				+        ])
			
 
				+
			
 
				+        self.blocks5 = nn.Sequential(* [
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
			
 
				+        ])
			
 
				+
			
 
				+        self.blocks6 = nn.Sequential(* [
			
 
				+            DepthwiseSeparable(
			
 
				+                num_channels=make_divisible(in_c * scale),
			
 
				+                num_filters=make_divisible(out_c * scale),
			
 
				+                dw_size=k,
			
 
				+                stride=s,
			
 
				+                use_se=se)
			
 
				+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
			
 
				+        ])
			
 
				+
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.last_conv = Conv2D(
			
 
				+            in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
			
 
				+            out_channels=self.class_expand,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self.hardswish = nn.Hardswish()
			
 
				+        self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
			
 
				+        self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
			
 
				+
			
 
				+        self.fc = Linear(self.class_expand, class_num)
			
 
				+
			
 
				+        super().init_res(
			
 
				+            stages_pattern,
			
 
				+            return_patterns=return_patterns,
			
 
				+            return_stages=return_stages)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv1(x)
			
 
				+
			
 
				+        x = self.blocks2(x)
			
 
				+        x = self.blocks3(x)
			
 
				+        x = self.blocks4(x)
			
 
				+        x = self.blocks5(x)
			
 
				+        x = self.blocks6(x)
			
 
				+
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = self.last_conv(x)
			
 
				+        x = self.hardswish(x)
			
 
				+        x = self.dropout(x)
			
 
				+        x = self.flatten(x)
			
 
				+        x = self.fc(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x0_25(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    PPLCNet_x0_25
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `PPLCNet_x0_25` model depends on args.
			
 
				+    """
			
 
				+    model = PPLCNet(
			
 
				+        scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_25"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x0_35(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    PPLCNet_x0_35
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `PPLCNet_x0_35` model depends on args.
			
 
				+    """
			
 
				+    model = PPLCNet(
			
 
				+        scale=0.35, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_35"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    PPLCNet_x0_5
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `PPLCNet_x0_5` model depends on args.
			
 
				+    """
			
 
				+    model = PPLCNet(
			
 
				+        scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_5"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x0_75(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    PPLCNet_x0_75
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `PPLCNet_x0_75` model depends on args.
			
 
				+    """
			
 
				+    model = PPLCNet(
			
 
				+        scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_75"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    PPLCNet_x1_0
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `PPLCNet_x1_0` model depends on args.
			
 
				+    """
			
 
				+    model = PPLCNet(
			
 
				+        scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x1_0"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x1_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    PPLCNet_x1_5
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `PPLCNet_x1_5` model depends on args.
			
 
				+    """
			
 
				+    model = PPLCNet(
			
 
				+        scale=1.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x1_5"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x2_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    PPLCNet_x2_0
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `PPLCNet_x2_0` model depends on args.
			
 
				+    """
			
 
				+    model = PPLCNet(
			
 
				+        scale=2.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x2_0"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x2_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    PPLCNet_x2_5
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `PPLCNet_x2_5` model depends on args.
			
 
				+    """
			
 
				+    model = PPLCNet(
			
 
				+        scale=2.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x2_5"], use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/resnet.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/resnet.py
@@ -0,0 +1,591 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+import math
			
 
				+
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ResNet18":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_pretrained.pdparams",
			
 
				+    "ResNet18_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_vd_pretrained.pdparams",
			
 
				+    "ResNet34":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_pretrained.pdparams",
			
 
				+    "ResNet34_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_pretrained.pdparams",
			
 
				+    "ResNet50":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_pretrained.pdparams",
			
 
				+    "ResNet50_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_vd_pretrained.pdparams",
			
 
				+    "ResNet101":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_pretrained.pdparams",
			
 
				+    "ResNet101_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_vd_pretrained.pdparams",
			
 
				+    "ResNet152":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_pretrained.pdparams",
			
 
				+    "ResNet152_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_vd_pretrained.pdparams",
			
 
				+    "ResNet200_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet200_vd_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+MODEL_STAGES_PATTERN = {
			
 
				+    "ResNet18": ["blocks[1]", "blocks[3]", "blocks[5]", "blocks[7]"],
			
 
				+    "ResNet34": ["blocks[2]", "blocks[6]", "blocks[12]", "blocks[15]"],
			
 
				+    "ResNet50": ["blocks[2]", "blocks[6]", "blocks[12]", "blocks[15]"],
			
 
				+    "ResNet101": ["blocks[2]", "blocks[6]", "blocks[29]", "blocks[32]"],
			
 
				+    "ResNet152": ["blocks[2]", "blocks[10]", "blocks[46]", "blocks[49]"],
			
 
				+    "ResNet200": ["blocks[2]", "blocks[14]", "blocks[62]", "blocks[65]"]
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+'''
			
 
				+ResNet config: dict.
			
 
				+    key: depth of ResNet.
			
 
				+    values: config's dict of specific model.
			
 
				+        keys:
			
 
				+            block_type: Two different blocks in ResNet, BasicBlock and BottleneckBlock are optional.
			
 
				+            block_depth: The number of blocks in different stages in ResNet.
			
 
				+            num_channels: The number of channels to enter the next stage.
			
 
				+'''
			
 
				+NET_CONFIG = {
			
 
				+    "18": {
			
 
				+        "block_type": "BasicBlock",
			
 
				+        "block_depth": [2, 2, 2, 2],
			
 
				+        "num_channels": [64, 64, 128, 256]
			
 
				+    },
			
 
				+    "34": {
			
 
				+        "block_type": "BasicBlock",
			
 
				+        "block_depth": [3, 4, 6, 3],
			
 
				+        "num_channels": [64, 64, 128, 256]
			
 
				+    },
			
 
				+    "50": {
			
 
				+        "block_type": "BottleneckBlock",
			
 
				+        "block_depth": [3, 4, 6, 3],
			
 
				+        "num_channels": [64, 256, 512, 1024]
			
 
				+    },
			
 
				+    "101": {
			
 
				+        "block_type": "BottleneckBlock",
			
 
				+        "block_depth": [3, 4, 23, 3],
			
 
				+        "num_channels": [64, 256, 512, 1024]
			
 
				+    },
			
 
				+    "152": {
			
 
				+        "block_type": "BottleneckBlock",
			
 
				+        "block_depth": [3, 8, 36, 3],
			
 
				+        "num_channels": [64, 256, 512, 1024]
			
 
				+    },
			
 
				+    "200": {
			
 
				+        "block_type": "BottleneckBlock",
			
 
				+        "block_depth": [3, 12, 48, 3],
			
 
				+        "num_channels": [64, 256, 512, 1024]
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 is_vd_mode=False,
			
 
				+                 act=None,
			
 
				+                 lr_mult=1.0,
			
 
				+                 data_format="NCHW"):
			
 
				+        super().__init__()
			
 
				+        self.is_vd_mode = is_vd_mode
			
 
				+        self.act = act
			
 
				+        self.avg_pool = AvgPool2D(
			
 
				+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
			
 
				+        self.conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(learning_rate=lr_mult),
			
 
				+            bias_attr=False,
			
 
				+            data_format=data_format)
			
 
				+        self.bn = BatchNorm(
			
 
				+            num_filters,
			
 
				+            param_attr=ParamAttr(learning_rate=lr_mult),
			
 
				+            bias_attr=ParamAttr(learning_rate=lr_mult),
			
 
				+            data_layout=data_format)
			
 
				+        self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.is_vd_mode:
			
 
				+            x = self.avg_pool(x)
			
 
				+        x = self.conv(x)
			
 
				+        x = self.bn(x)
			
 
				+        if self.act:
			
 
				+            x = self.relu(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 lr_mult=1.0,
			
 
				+                 data_format="NCHW"):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act="relu",
			
 
				+            lr_mult=lr_mult,
			
 
				+            data_format=data_format)
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            act="relu",
			
 
				+            lr_mult=lr_mult,
			
 
				+            data_format=data_format)
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters * 4,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            lr_mult=lr_mult,
			
 
				+            data_format=data_format)
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters * 4,
			
 
				+                filter_size=1,
			
 
				+                stride=stride if if_first else 1,
			
 
				+                is_vd_mode=False if if_first else True,
			
 
				+                lr_mult=lr_mult,
			
 
				+                data_format=data_format)
			
 
				+        self.relu = nn.ReLU()
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        identity = x
			
 
				+        x = self.conv0(x)
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.conv2(x)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = identity
			
 
				+        else:
			
 
				+            short = self.short(identity)
			
 
				+        x = paddle.add(x=x, y=short)
			
 
				+        x = self.relu(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class BasicBlock(TheseusLayer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 lr_mult=1.0,
			
 
				+                 data_format="NCHW"):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.stride = stride
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            act="relu",
			
 
				+            lr_mult=lr_mult,
			
 
				+            data_format=data_format)
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            act=None,
			
 
				+            lr_mult=lr_mult,
			
 
				+            data_format=data_format)
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters,
			
 
				+                filter_size=1,
			
 
				+                stride=stride if if_first else 1,
			
 
				+                is_vd_mode=False if if_first else True,
			
 
				+                lr_mult=lr_mult,
			
 
				+                data_format=data_format)
			
 
				+        self.shortcut = shortcut
			
 
				+        self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        identity = x
			
 
				+        x = self.conv0(x)
			
 
				+        x = self.conv1(x)
			
 
				+        if self.shortcut:
			
 
				+            short = identity
			
 
				+        else:
			
 
				+            short = self.short(identity)
			
 
				+        x = paddle.add(x=x, y=short)
			
 
				+        x = self.relu(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ResNet(TheseusLayer):
			
 
				+    """
			
 
				+    ResNet
			
 
				+    Args:
			
 
				+        config: dict. config of ResNet.
			
 
				+        version: str="vb". Different version of ResNet, version vd can perform better. 
			
 
				+        class_num: int=1000. The number of classes.
			
 
				+        lr_mult_list: list. Control the learning rate of different stages.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific ResNet model depends on args.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 config,
			
 
				+                 stages_pattern,
			
 
				+                 version="vb",
			
 
				+                 class_num=1000,
			
 
				+                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
			
 
				+                 data_format="NCHW",
			
 
				+                 input_image_channel=3,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.cfg = config
			
 
				+        self.lr_mult_list = lr_mult_list
			
 
				+        self.is_vd_mode = version == "vd"
			
 
				+        self.class_num = class_num
			
 
				+        self.num_filters = [64, 128, 256, 512]
			
 
				+        self.block_depth = self.cfg["block_depth"]
			
 
				+        self.block_type = self.cfg["block_type"]
			
 
				+        self.num_channels = self.cfg["num_channels"]
			
 
				+        self.channels_mult = 1 if self.num_channels[-1] == 256 else 4
			
 
				+
			
 
				+        assert isinstance(self.lr_mult_list, (
			
 
				+            list, tuple
			
 
				+        )), "lr_mult_list should be in (list, tuple) but got {}".format(
			
 
				+            type(self.lr_mult_list))
			
 
				+        assert len(self.lr_mult_list
			
 
				+                   ) == 5, "lr_mult_list length should be 5 but got {}".format(
			
 
				+                       len(self.lr_mult_list))
			
 
				+
			
 
				+        self.stem_cfg = {
			
 
				+            #num_channels, num_filters, filter_size, stride
			
 
				+            "vb": [[input_image_channel, 64, 7, 2]],
			
 
				+            "vd":
			
 
				+            [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]]
			
 
				+        }
			
 
				+
			
 
				+        self.stem = nn.Sequential(* [
			
 
				+            ConvBNLayer(
			
 
				+                num_channels=in_c,
			
 
				+                num_filters=out_c,
			
 
				+                filter_size=k,
			
 
				+                stride=s,
			
 
				+                act="relu",
			
 
				+                lr_mult=self.lr_mult_list[0],
			
 
				+                data_format=data_format)
			
 
				+            for in_c, out_c, k, s in self.stem_cfg[version]
			
 
				+        ])
			
 
				+
			
 
				+        self.max_pool = MaxPool2D(
			
 
				+            kernel_size=3, stride=2, padding=1, data_format=data_format)
			
 
				+        block_list = []
			
 
				+        for block_idx in range(len(self.block_depth)):
			
 
				+            shortcut = False
			
 
				+            for i in range(self.block_depth[block_idx]):
			
 
				+                block_list.append(globals()[self.block_type](
			
 
				+                    num_channels=self.num_channels[block_idx] if i == 0 else
			
 
				+                    self.num_filters[block_idx] * self.channels_mult,
			
 
				+                    num_filters=self.num_filters[block_idx],
			
 
				+                    stride=2 if i == 0 and block_idx != 0 else 1,
			
 
				+                    shortcut=shortcut,
			
 
				+                    if_first=block_idx == i == 0 if version == "vd" else True,
			
 
				+                    lr_mult=self.lr_mult_list[block_idx + 1],
			
 
				+                    data_format=data_format))
			
 
				+                shortcut = True
			
 
				+        self.blocks = nn.Sequential(*block_list)
			
 
				+
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1, data_format=data_format)
			
 
				+        self.flatten = nn.Flatten()
			
 
				+        self.avg_pool_channels = self.num_channels[-1] * 2
			
 
				+        stdv = 1.0 / math.sqrt(self.avg_pool_channels * 1.0)
			
 
				+        self.fc = Linear(
			
 
				+            self.avg_pool_channels,
			
 
				+            self.class_num,
			
 
				+            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
			
 
				+
			
 
				+        self.data_format = data_format
			
 
				+
			
 
				+        super().init_res(
			
 
				+            stages_pattern,
			
 
				+            return_patterns=return_patterns,
			
 
				+            return_stages=return_stages)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        with paddle.static.amp.fp16_guard():
			
 
				+            if self.data_format == "NHWC":
			
 
				+                x = paddle.transpose(x, [0, 2, 3, 1])
			
 
				+                x.stop_gradient = True
			
 
				+            x = self.stem(x)
			
 
				+            x = self.max_pool(x)
			
 
				+            x = self.blocks(x)
			
 
				+            x = self.avg_pool(x)
			
 
				+            x = self.flatten(x)
			
 
				+            x = self.fc(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ResNet18(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet18
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet18` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["18"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet18"],
			
 
				+        version="vb",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet18"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet18_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet18_vd
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet18_vd` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["18"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet18"],
			
 
				+        version="vd",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet18_vd"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet34(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet34
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet34` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["34"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet34"],
			
 
				+        version="vb",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet34"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet34_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet34_vd
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet34_vd` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["34"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet34"],
			
 
				+        version="vd",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet34_vd"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet50(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet50
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet50` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["50"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet50"],
			
 
				+        version="vb",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet50"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet50_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet50_vd
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet50_vd` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["50"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet50"],
			
 
				+        version="vd",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet50_vd"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet101(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet101
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet101` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["101"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet101"],
			
 
				+        version="vb",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet101"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet101_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet101_vd
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet101_vd` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["101"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet101"],
			
 
				+        version="vd",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet101_vd"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet152(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet152
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet152` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["152"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet152"],
			
 
				+        version="vb",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet152"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet152_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet152_vd
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet152_vd` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["152"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet152"],
			
 
				+        version="vd",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet152_vd"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNet200_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    ResNet200_vd
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `ResNet200_vd` model depends on args.
			
 
				+    """
			
 
				+    model = ResNet(
			
 
				+        config=NET_CONFIG["200"],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["ResNet200"],
			
 
				+        version="vd",
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet200_vd"], use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/legendary_models/vgg.py
+++ b/paddlers/models/ppcls/arch/backbone/legendary_models/vgg.py
@@ -0,0 +1,259 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+
			
 
				+import paddle.nn as nn
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import MaxPool2D
			
 
				+
			
 
				+from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "VGG11":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG11_pretrained.pdparams",
			
 
				+    "VGG13":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG13_pretrained.pdparams",
			
 
				+    "VGG16":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG16_pretrained.pdparams",
			
 
				+    "VGG19":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG19_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+MODEL_STAGES_PATTERN = {
			
 
				+    "VGG": [
			
 
				+        "conv_block_1", "conv_block_2", "conv_block_3", "conv_block_4",
			
 
				+        "conv_block_5"
			
 
				+    ]
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+
			
 
				+# VGG config
			
 
				+# key: VGG network depth
			
 
				+# value: conv num in different blocks
			
 
				+NET_CONFIG = {
			
 
				+    11: [1, 1, 2, 2, 2],
			
 
				+    13: [2, 2, 2, 2, 2],
			
 
				+    16: [2, 2, 3, 3, 3],
			
 
				+    19: [2, 2, 4, 4, 4]
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class ConvBlock(TheseusLayer):
			
 
				+    def __init__(self, input_channels, output_channels, groups):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.groups = groups
			
 
				+        self.conv1 = Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=3,
			
 
				+            stride=1,
			
 
				+            padding=1,
			
 
				+            bias_attr=False)
			
 
				+        if groups == 2 or groups == 3 or groups == 4:
			
 
				+            self.conv2 = Conv2D(
			
 
				+                in_channels=output_channels,
			
 
				+                out_channels=output_channels,
			
 
				+                kernel_size=3,
			
 
				+                stride=1,
			
 
				+                padding=1,
			
 
				+                bias_attr=False)
			
 
				+        if groups == 3 or groups == 4:
			
 
				+            self.conv3 = Conv2D(
			
 
				+                in_channels=output_channels,
			
 
				+                out_channels=output_channels,
			
 
				+                kernel_size=3,
			
 
				+                stride=1,
			
 
				+                padding=1,
			
 
				+                bias_attr=False)
			
 
				+        if groups == 4:
			
 
				+            self.conv4 = Conv2D(
			
 
				+                in_channels=output_channels,
			
 
				+                out_channels=output_channels,
			
 
				+                kernel_size=3,
			
 
				+                stride=1,
			
 
				+                padding=1,
			
 
				+                bias_attr=False)
			
 
				+
			
 
				+        self.max_pool = MaxPool2D(kernel_size=2, stride=2, padding=0)
			
 
				+        self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.conv1(inputs)
			
 
				+        x = self.relu(x)
			
 
				+        if self.groups == 2 or self.groups == 3 or self.groups == 4:
			
 
				+            x = self.conv2(x)
			
 
				+            x = self.relu(x)
			
 
				+        if self.groups == 3 or self.groups == 4:
			
 
				+            x = self.conv3(x)
			
 
				+            x = self.relu(x)
			
 
				+        if self.groups == 4:
			
 
				+            x = self.conv4(x)
			
 
				+            x = self.relu(x)
			
 
				+        x = self.max_pool(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class VGGNet(TheseusLayer):
			
 
				+    """
			
 
				+    VGGNet
			
 
				+    Args:
			
 
				+        config: list. VGGNet config.
			
 
				+        stop_grad_layers: int=0. The parameters in blocks which index larger than `stop_grad_layers`, will be set `param.trainable=False`
			
 
				+        class_num: int=1000. The number of classes.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific VGG model depends on args.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 config,
			
 
				+                 stages_pattern,
			
 
				+                 stop_grad_layers=0,
			
 
				+                 class_num=1000,
			
 
				+                 return_patterns=None,
			
 
				+                 return_stages=None):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.stop_grad_layers = stop_grad_layers
			
 
				+
			
 
				+        self.conv_block_1 = ConvBlock(3, 64, config[0])
			
 
				+        self.conv_block_2 = ConvBlock(64, 128, config[1])
			
 
				+        self.conv_block_3 = ConvBlock(128, 256, config[2])
			
 
				+        self.conv_block_4 = ConvBlock(256, 512, config[3])
			
 
				+        self.conv_block_5 = ConvBlock(512, 512, config[4])
			
 
				+
			
 
				+        self.relu = nn.ReLU()
			
 
				+        self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
			
 
				+
			
 
				+        for idx, block in enumerate([
			
 
				+                self.conv_block_1, self.conv_block_2, self.conv_block_3,
			
 
				+                self.conv_block_4, self.conv_block_5
			
 
				+        ]):
			
 
				+            if self.stop_grad_layers >= idx + 1:
			
 
				+                for param in block.parameters():
			
 
				+                    param.trainable = False
			
 
				+
			
 
				+        self.drop = Dropout(p=0.5, mode="downscale_in_infer")
			
 
				+        self.fc1 = Linear(7 * 7 * 512, 4096)
			
 
				+        self.fc2 = Linear(4096, 4096)
			
 
				+        self.fc3 = Linear(4096, class_num)
			
 
				+
			
 
				+        super().init_res(
			
 
				+            stages_pattern,
			
 
				+            return_patterns=return_patterns,
			
 
				+            return_stages=return_stages)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.conv_block_1(inputs)
			
 
				+        x = self.conv_block_2(x)
			
 
				+        x = self.conv_block_3(x)
			
 
				+        x = self.conv_block_4(x)
			
 
				+        x = self.conv_block_5(x)
			
 
				+        x = self.flatten(x)
			
 
				+        x = self.fc1(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.drop(x)
			
 
				+        x = self.fc2(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.drop(x)
			
 
				+        x = self.fc3(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def VGG11(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    VGG11
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `VGG11` model depends on args.
			
 
				+    """
			
 
				+    model = VGGNet(
			
 
				+        config=NET_CONFIG[11],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["VGG"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["VGG11"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def VGG13(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    VGG13
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `VGG13` model depends on args.
			
 
				+    """
			
 
				+    model = VGGNet(
			
 
				+        config=NET_CONFIG[13],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["VGG"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["VGG13"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def VGG16(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    VGG16
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `VGG16` model depends on args.
			
 
				+    """
			
 
				+    model = VGGNet(
			
 
				+        config=NET_CONFIG[16],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["VGG"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["VGG16"], use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def VGG19(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    VGG19
			
 
				+    Args:
			
 
				+        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
			
 
				+                    If str, means the path of the pretrained model.
			
 
				+        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
			
 
				+    Returns:
			
 
				+        model: nn.Layer. Specific `VGG19` model depends on args.
			
 
				+    """
			
 
				+    model = VGGNet(
			
 
				+        config=NET_CONFIG[19],
			
 
				+        stages_pattern=MODEL_STAGES_PATTERN["VGG"],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["VGG19"], use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/__init__.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/__init__.py
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/alexnet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/alexnet.py
@@ -0,0 +1,168 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout, ReLU
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "AlexNet":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvPoolLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 filter_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 stdv,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvPoolLayer, self).__init__()
			
 
				+
			
 
				+        self.relu = ReLU() if act == "relu" else None
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name=name + "_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                name=name + "_offset", initializer=Uniform(-stdv, stdv)))
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv(inputs)
			
 
				+        if self.relu is not None:
			
 
				+            x = self.relu(x)
			
 
				+        x = self._pool(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class AlexNetDY(nn.Layer):
			
 
				+    def __init__(self, class_num=1000):
			
 
				+        super(AlexNetDY, self).__init__()
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(3 * 11 * 11)
			
 
				+        self._conv1 = ConvPoolLayer(
			
 
				+            3, 64, 11, 4, 2, stdv, act="relu", name="conv1")
			
 
				+        stdv = 1.0 / math.sqrt(64 * 5 * 5)
			
 
				+        self._conv2 = ConvPoolLayer(
			
 
				+            64, 192, 5, 1, 2, stdv, act="relu", name="conv2")
			
 
				+        stdv = 1.0 / math.sqrt(192 * 3 * 3)
			
 
				+        self._conv3 = Conv2D(
			
 
				+            192,
			
 
				+            384,
			
 
				+            3,
			
 
				+            stride=1,
			
 
				+            padding=1,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name="conv3_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                name="conv3_offset", initializer=Uniform(-stdv, stdv)))
			
 
				+        stdv = 1.0 / math.sqrt(384 * 3 * 3)
			
 
				+        self._conv4 = Conv2D(
			
 
				+            384,
			
 
				+            256,
			
 
				+            3,
			
 
				+            stride=1,
			
 
				+            padding=1,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name="conv4_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                name="conv4_offset", initializer=Uniform(-stdv, stdv)))
			
 
				+        stdv = 1.0 / math.sqrt(256 * 3 * 3)
			
 
				+        self._conv5 = ConvPoolLayer(
			
 
				+            256, 256, 3, 1, 1, stdv, act="relu", name="conv5")
			
 
				+        stdv = 1.0 / math.sqrt(256 * 6 * 6)
			
 
				+
			
 
				+        self._drop1 = Dropout(p=0.5, mode="downscale_in_infer")
			
 
				+        self._fc6 = Linear(
			
 
				+            in_features=256 * 6 * 6,
			
 
				+            out_features=4096,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name="fc6_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                name="fc6_offset", initializer=Uniform(-stdv, stdv)))
			
 
				+
			
 
				+        self._drop2 = Dropout(p=0.5, mode="downscale_in_infer")
			
 
				+        self._fc7 = Linear(
			
 
				+            in_features=4096,
			
 
				+            out_features=4096,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name="fc7_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                name="fc7_offset", initializer=Uniform(-stdv, stdv)))
			
 
				+        self._fc8 = Linear(
			
 
				+            in_features=4096,
			
 
				+            out_features=class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name="fc8_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                name="fc8_offset", initializer=Uniform(-stdv, stdv)))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv1(inputs)
			
 
				+        x = self._conv2(x)
			
 
				+        x = self._conv3(x)
			
 
				+        x = F.relu(x)
			
 
				+        x = self._conv4(x)
			
 
				+        x = F.relu(x)
			
 
				+        x = self._conv5(x)
			
 
				+        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
			
 
				+        x = self._drop1(x)
			
 
				+        x = self._fc6(x)
			
 
				+        x = F.relu(x)
			
 
				+        x = self._drop2(x)
			
 
				+        x = self._fc7(x)
			
 
				+        x = F.relu(x)
			
 
				+        x = self._fc8(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def AlexNet(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = AlexNetDY(**kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/cspnet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/cspnet.py
@@ -0,0 +1,376 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was heavily based on https://github.com/rwightman/pytorch-image-models
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "CSPDarkNet53":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/CSPDarkNet53_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+MODEL_CFGS = {
			
 
				+    "CSPDarkNet53": dict(
			
 
				+        stem=dict(
			
 
				+            out_chs=32, kernel_size=3, stride=1, pool=''),
			
 
				+        stage=dict(
			
 
				+            out_chs=(64, 128, 256, 512, 1024),
			
 
				+            depth=(1, 2, 8, 8, 4),
			
 
				+            stride=(2, ) * 5,
			
 
				+            exp_ratio=(2., ) + (1., ) * 4,
			
 
				+            bottle_ratio=(0.5, ) + (1.0, ) * 4,
			
 
				+            block_ratio=(1., ) + (0.5, ) * 4,
			
 
				+            down_growth=True, ))
			
 
				+}
			
 
				+
			
 
				+__all__ = ['CSPDarkNet53'
			
 
				+           ]  # model_registry will add each entrypoint fn to this
			
 
				+
			
 
				+
			
 
				+class ConvBnAct(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 kernel_size=1,
			
 
				+                 stride=1,
			
 
				+                 padding=None,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 act_layer=nn.LeakyReLU,
			
 
				+                 norm_layer=nn.BatchNorm2D):
			
 
				+        super().__init__()
			
 
				+        if padding is None:
			
 
				+            padding = (kernel_size - 1) // 2
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            dilation=dilation,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self.bn = norm_layer(num_features=output_channels)
			
 
				+        self.act = act_layer()
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.conv(inputs)
			
 
				+        x = self.bn(x)
			
 
				+        if self.act is not None:
			
 
				+            x = self.act(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def create_stem(in_chans=3,
			
 
				+                out_chs=32,
			
 
				+                kernel_size=3,
			
 
				+                stride=2,
			
 
				+                pool='',
			
 
				+                act_layer=None,
			
 
				+                norm_layer=None):
			
 
				+    stem = nn.Sequential()
			
 
				+    if not isinstance(out_chs, (tuple, list)):
			
 
				+        out_chs = [out_chs]
			
 
				+    assert len(out_chs)
			
 
				+    in_c = in_chans
			
 
				+    for i, out_c in enumerate(out_chs):
			
 
				+        conv_name = f'conv{i + 1}'
			
 
				+        stem.add_sublayer(
			
 
				+            conv_name,
			
 
				+            ConvBnAct(
			
 
				+                in_c,
			
 
				+                out_c,
			
 
				+                kernel_size,
			
 
				+                stride=stride if i == 0 else 1,
			
 
				+                act_layer=act_layer,
			
 
				+                norm_layer=norm_layer))
			
 
				+        in_c = out_c
			
 
				+        last_conv = conv_name
			
 
				+    if pool:
			
 
				+        stem.add_sublayer(
			
 
				+            'pool', nn.MaxPool2D(
			
 
				+                kernel_size=3, stride=2, padding=1))
			
 
				+    return stem, dict(
			
 
				+        num_chs=in_c, reduction=stride, module='.'.join(['stem', last_conv]))
			
 
				+
			
 
				+
			
 
				+class DarkBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_chs,
			
 
				+                 out_chs,
			
 
				+                 dilation=1,
			
 
				+                 bottle_ratio=0.5,
			
 
				+                 groups=1,
			
 
				+                 act_layer=nn.ReLU,
			
 
				+                 norm_layer=nn.BatchNorm2D,
			
 
				+                 attn_layer=None,
			
 
				+                 drop_block=None):
			
 
				+        super(DarkBlock, self).__init__()
			
 
				+        mid_chs = int(round(out_chs * bottle_ratio))
			
 
				+        ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer)
			
 
				+        self.conv1 = ConvBnAct(in_chs, mid_chs, kernel_size=1, **ckwargs)
			
 
				+        self.conv2 = ConvBnAct(
			
 
				+            mid_chs,
			
 
				+            out_chs,
			
 
				+            kernel_size=3,
			
 
				+            dilation=dilation,
			
 
				+            groups=groups,
			
 
				+            **ckwargs)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        shortcut = x
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.conv2(x)
			
 
				+        x = x + shortcut
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class CrossStage(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_chs,
			
 
				+                 out_chs,
			
 
				+                 stride,
			
 
				+                 dilation,
			
 
				+                 depth,
			
 
				+                 block_ratio=1.,
			
 
				+                 bottle_ratio=1.,
			
 
				+                 exp_ratio=1.,
			
 
				+                 groups=1,
			
 
				+                 first_dilation=None,
			
 
				+                 down_growth=False,
			
 
				+                 cross_linear=False,
			
 
				+                 block_dpr=None,
			
 
				+                 block_fn=DarkBlock,
			
 
				+                 **block_kwargs):
			
 
				+        super(CrossStage, self).__init__()
			
 
				+        first_dilation = first_dilation or dilation
			
 
				+        down_chs = out_chs if down_growth else in_chs
			
 
				+        exp_chs = int(round(out_chs * exp_ratio))
			
 
				+        block_out_chs = int(round(out_chs * block_ratio))
			
 
				+        conv_kwargs = dict(
			
 
				+            act_layer=block_kwargs.get('act_layer'),
			
 
				+            norm_layer=block_kwargs.get('norm_layer'))
			
 
				+
			
 
				+        if stride != 1 or first_dilation != dilation:
			
 
				+            self.conv_down = ConvBnAct(
			
 
				+                in_chs,
			
 
				+                down_chs,
			
 
				+                kernel_size=3,
			
 
				+                stride=stride,
			
 
				+                dilation=first_dilation,
			
 
				+                groups=groups,
			
 
				+                **conv_kwargs)
			
 
				+            prev_chs = down_chs
			
 
				+        else:
			
 
				+            self.conv_down = None
			
 
				+            prev_chs = in_chs
			
 
				+
			
 
				+        self.conv_exp = ConvBnAct(
			
 
				+            prev_chs, exp_chs, kernel_size=1, **conv_kwargs)
			
 
				+        prev_chs = exp_chs // 2  # output of conv_exp is always split in two
			
 
				+
			
 
				+        self.blocks = nn.Sequential()
			
 
				+        for i in range(depth):
			
 
				+            self.blocks.add_sublayer(
			
 
				+                str(i),
			
 
				+                block_fn(prev_chs, block_out_chs, dilation, bottle_ratio,
			
 
				+                         groups, **block_kwargs))
			
 
				+            prev_chs = block_out_chs
			
 
				+
			
 
				+        # transition convs
			
 
				+        self.conv_transition_b = ConvBnAct(
			
 
				+            prev_chs, exp_chs // 2, kernel_size=1, **conv_kwargs)
			
 
				+        self.conv_transition = ConvBnAct(
			
 
				+            exp_chs, out_chs, kernel_size=1, **conv_kwargs)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.conv_down is not None:
			
 
				+            x = self.conv_down(x)
			
 
				+        x = self.conv_exp(x)
			
 
				+        split = x.shape[1] // 2
			
 
				+        xs, xb = x[:, :split], x[:, split:]
			
 
				+        xb = self.blocks(xb)
			
 
				+        xb = self.conv_transition_b(xb)
			
 
				+        out = self.conv_transition(paddle.concat([xs, xb], axis=1))
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class DarkStage(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_chs,
			
 
				+                 out_chs,
			
 
				+                 stride,
			
 
				+                 dilation,
			
 
				+                 depth,
			
 
				+                 block_ratio=1.,
			
 
				+                 bottle_ratio=1.,
			
 
				+                 groups=1,
			
 
				+                 first_dilation=None,
			
 
				+                 block_fn=DarkBlock,
			
 
				+                 block_dpr=None,
			
 
				+                 **block_kwargs):
			
 
				+        super().__init__()
			
 
				+        first_dilation = first_dilation or dilation
			
 
				+
			
 
				+        self.conv_down = ConvBnAct(
			
 
				+            in_chs,
			
 
				+            out_chs,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            dilation=first_dilation,
			
 
				+            groups=groups,
			
 
				+            act_layer=block_kwargs.get('act_layer'),
			
 
				+            norm_layer=block_kwargs.get('norm_layer'))
			
 
				+
			
 
				+        prev_chs = out_chs
			
 
				+        block_out_chs = int(round(out_chs * block_ratio))
			
 
				+        self.blocks = nn.Sequential()
			
 
				+        for i in range(depth):
			
 
				+            self.blocks.add_sublayer(
			
 
				+                str(i),
			
 
				+                block_fn(prev_chs, block_out_chs, dilation, bottle_ratio,
			
 
				+                         groups, **block_kwargs))
			
 
				+            prev_chs = block_out_chs
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv_down(x)
			
 
				+        x = self.blocks(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _cfg_to_stage_args(cfg, curr_stride=2, output_stride=32):
			
 
				+    # get per stage args for stage and containing blocks, calculate strides to meet target output_stride
			
 
				+    num_stages = len(cfg['depth'])
			
 
				+    if 'groups' not in cfg:
			
 
				+        cfg['groups'] = (1, ) * num_stages
			
 
				+    if 'down_growth' in cfg and not isinstance(cfg['down_growth'],
			
 
				+                                               (list, tuple)):
			
 
				+        cfg['down_growth'] = (cfg['down_growth'], ) * num_stages
			
 
				+    stage_strides = []
			
 
				+    stage_dilations = []
			
 
				+    stage_first_dilations = []
			
 
				+    dilation = 1
			
 
				+    for cfg_stride in cfg['stride']:
			
 
				+        stage_first_dilations.append(dilation)
			
 
				+        if curr_stride >= output_stride:
			
 
				+            dilation *= cfg_stride
			
 
				+            stride = 1
			
 
				+        else:
			
 
				+            stride = cfg_stride
			
 
				+            curr_stride *= stride
			
 
				+        stage_strides.append(stride)
			
 
				+        stage_dilations.append(dilation)
			
 
				+    cfg['stride'] = stage_strides
			
 
				+    cfg['dilation'] = stage_dilations
			
 
				+    cfg['first_dilation'] = stage_first_dilations
			
 
				+    stage_args = [
			
 
				+        dict(zip(cfg.keys(), values)) for values in zip(*cfg.values())
			
 
				+    ]
			
 
				+    return stage_args
			
 
				+
			
 
				+
			
 
				+class CSPNet(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 cfg,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 output_stride=32,
			
 
				+                 global_pool='avg',
			
 
				+                 drop_rate=0.,
			
 
				+                 act_layer=nn.LeakyReLU,
			
 
				+                 norm_layer=nn.BatchNorm2D,
			
 
				+                 zero_init_last_bn=True,
			
 
				+                 stage_fn=CrossStage,
			
 
				+                 block_fn=DarkBlock):
			
 
				+        super().__init__()
			
 
				+        self.class_num = class_num
			
 
				+        self.drop_rate = drop_rate
			
 
				+        assert output_stride in (8, 16, 32)
			
 
				+        layer_args = dict(act_layer=act_layer, norm_layer=norm_layer)
			
 
				+
			
 
				+        # Construct the stem
			
 
				+        self.stem, stem_feat_info = create_stem(in_chans, **cfg['stem'],
			
 
				+                                                **layer_args)
			
 
				+        self.feature_info = [stem_feat_info]
			
 
				+        prev_chs = stem_feat_info['num_chs']
			
 
				+        curr_stride = stem_feat_info[
			
 
				+            'reduction']  # reduction does not include pool
			
 
				+        if cfg['stem']['pool']:
			
 
				+            curr_stride *= 2
			
 
				+
			
 
				+        # Construct the stages
			
 
				+        per_stage_args = _cfg_to_stage_args(
			
 
				+            cfg['stage'], curr_stride=curr_stride, output_stride=output_stride)
			
 
				+        self.stages = nn.LayerList()
			
 
				+        for i, sa in enumerate(per_stage_args):
			
 
				+            self.stages.add_sublayer(
			
 
				+                str(i),
			
 
				+                stage_fn(
			
 
				+                    prev_chs, **sa, **layer_args, block_fn=block_fn))
			
 
				+            prev_chs = sa['out_chs']
			
 
				+            curr_stride *= sa['stride']
			
 
				+            self.feature_info += [
			
 
				+                dict(
			
 
				+                    num_chs=prev_chs,
			
 
				+                    reduction=curr_stride,
			
 
				+                    module=f'stages.{i}')
			
 
				+            ]
			
 
				+
			
 
				+        # Construct the head
			
 
				+        self.num_features = prev_chs
			
 
				+
			
 
				+        self.pool = nn.AdaptiveAvgPool2D(1)
			
 
				+        self.flatten = nn.Flatten(1)
			
 
				+        self.fc = nn.Linear(
			
 
				+            prev_chs,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(),
			
 
				+            bias_attr=ParamAttr())
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.stem(x)
			
 
				+        for stage in self.stages:
			
 
				+            x = stage(x)
			
 
				+        x = self.pool(x)
			
 
				+        x = self.flatten(x)
			
 
				+        x = self.fc(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def CSPDarkNet53(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = CSPNet(MODEL_CFGS["CSPDarkNet53"], block_fn=DarkBlock, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["CSPDarkNet53"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/darknet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/darknet.py
@@ -0,0 +1,197 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "DarkNet53":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 filter_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            weight_attr=ParamAttr(name=name + ".conv.weights"),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        bn_name = name + ".bn"
			
 
				+        self._bn = BatchNorm(
			
 
				+            num_channels=output_channels,
			
 
				+            act="relu",
			
 
				+            param_attr=ParamAttr(name=bn_name + ".scale"),
			
 
				+            bias_attr=ParamAttr(name=bn_name + ".offset"),
			
 
				+            moving_mean_name=bn_name + ".mean",
			
 
				+            moving_variance_name=bn_name + ".var")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv(inputs)
			
 
				+        x = self._bn(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+    def __init__(self, input_channels, output_channels, name=None):
			
 
				+        super(BasicBlock, self).__init__()
			
 
				+
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            input_channels, output_channels, 1, 1, 0, name=name + ".0")
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            output_channels, output_channels * 2, 3, 1, 1, name=name + ".1")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv1(inputs)
			
 
				+        x = self._conv2(x)
			
 
				+        return paddle.add(x=inputs, y=x)
			
 
				+
			
 
				+
			
 
				+class DarkNet(nn.Layer):
			
 
				+    def __init__(self, class_num=1000):
			
 
				+        super(DarkNet, self).__init__()
			
 
				+
			
 
				+        self.stages = [1, 2, 8, 8, 4]
			
 
				+        self._conv1 = ConvBNLayer(3, 32, 3, 1, 1, name="yolo_input")
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            32, 64, 3, 2, 1, name="yolo_input.downsample")
			
 
				+
			
 
				+        self._basic_block_01 = BasicBlock(64, 32, name="stage.0.0")
			
 
				+        self._downsample_0 = ConvBNLayer(
			
 
				+            64, 128, 3, 2, 1, name="stage.0.downsample")
			
 
				+
			
 
				+        self._basic_block_11 = BasicBlock(128, 64, name="stage.1.0")
			
 
				+        self._basic_block_12 = BasicBlock(128, 64, name="stage.1.1")
			
 
				+        self._downsample_1 = ConvBNLayer(
			
 
				+            128, 256, 3, 2, 1, name="stage.1.downsample")
			
 
				+
			
 
				+        self._basic_block_21 = BasicBlock(256, 128, name="stage.2.0")
			
 
				+        self._basic_block_22 = BasicBlock(256, 128, name="stage.2.1")
			
 
				+        self._basic_block_23 = BasicBlock(256, 128, name="stage.2.2")
			
 
				+        self._basic_block_24 = BasicBlock(256, 128, name="stage.2.3")
			
 
				+        self._basic_block_25 = BasicBlock(256, 128, name="stage.2.4")
			
 
				+        self._basic_block_26 = BasicBlock(256, 128, name="stage.2.5")
			
 
				+        self._basic_block_27 = BasicBlock(256, 128, name="stage.2.6")
			
 
				+        self._basic_block_28 = BasicBlock(256, 128, name="stage.2.7")
			
 
				+        self._downsample_2 = ConvBNLayer(
			
 
				+            256, 512, 3, 2, 1, name="stage.2.downsample")
			
 
				+
			
 
				+        self._basic_block_31 = BasicBlock(512, 256, name="stage.3.0")
			
 
				+        self._basic_block_32 = BasicBlock(512, 256, name="stage.3.1")
			
 
				+        self._basic_block_33 = BasicBlock(512, 256, name="stage.3.2")
			
 
				+        self._basic_block_34 = BasicBlock(512, 256, name="stage.3.3")
			
 
				+        self._basic_block_35 = BasicBlock(512, 256, name="stage.3.4")
			
 
				+        self._basic_block_36 = BasicBlock(512, 256, name="stage.3.5")
			
 
				+        self._basic_block_37 = BasicBlock(512, 256, name="stage.3.6")
			
 
				+        self._basic_block_38 = BasicBlock(512, 256, name="stage.3.7")
			
 
				+        self._downsample_3 = ConvBNLayer(
			
 
				+            512, 1024, 3, 2, 1, name="stage.3.downsample")
			
 
				+
			
 
				+        self._basic_block_41 = BasicBlock(1024, 512, name="stage.4.0")
			
 
				+        self._basic_block_42 = BasicBlock(1024, 512, name="stage.4.1")
			
 
				+        self._basic_block_43 = BasicBlock(1024, 512, name="stage.4.2")
			
 
				+        self._basic_block_44 = BasicBlock(1024, 512, name="stage.4.3")
			
 
				+
			
 
				+        self._pool = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(1024.0)
			
 
				+        self._out = Linear(
			
 
				+            1024,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name="fc_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv1(inputs)
			
 
				+        x = self._conv2(x)
			
 
				+
			
 
				+        x = self._basic_block_01(x)
			
 
				+        x = self._downsample_0(x)
			
 
				+
			
 
				+        x = self._basic_block_11(x)
			
 
				+        x = self._basic_block_12(x)
			
 
				+        x = self._downsample_1(x)
			
 
				+
			
 
				+        x = self._basic_block_21(x)
			
 
				+        x = self._basic_block_22(x)
			
 
				+        x = self._basic_block_23(x)
			
 
				+        x = self._basic_block_24(x)
			
 
				+        x = self._basic_block_25(x)
			
 
				+        x = self._basic_block_26(x)
			
 
				+        x = self._basic_block_27(x)
			
 
				+        x = self._basic_block_28(x)
			
 
				+        x = self._downsample_2(x)
			
 
				+
			
 
				+        x = self._basic_block_31(x)
			
 
				+        x = self._basic_block_32(x)
			
 
				+        x = self._basic_block_33(x)
			
 
				+        x = self._basic_block_34(x)
			
 
				+        x = self._basic_block_35(x)
			
 
				+        x = self._basic_block_36(x)
			
 
				+        x = self._basic_block_37(x)
			
 
				+        x = self._basic_block_38(x)
			
 
				+        x = self._downsample_3(x)
			
 
				+
			
 
				+        x = self._basic_block_41(x)
			
 
				+        x = self._basic_block_42(x)
			
 
				+        x = self._basic_block_43(x)
			
 
				+        x = self._basic_block_44(x)
			
 
				+
			
 
				+        x = self._pool(x)
			
 
				+        x = paddle.squeeze(x, axis=[2, 3])
			
 
				+        x = self._out(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def DarkNet53(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DarkNet(**kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/densenet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/densenet.py
@@ -0,0 +1,344 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "DenseNet121":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
			
 
				+    "DenseNet161":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
			
 
				+    "DenseNet169":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
			
 
				+    "DenseNet201":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
			
 
				+    "DenseNet264":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class BNACConvLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 pad=0,
			
 
				+                 groups=1,
			
 
				+                 act="relu",
			
 
				+                 name=None):
			
 
				+        super(BNACConvLayer, self).__init__()
			
 
				+
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_channels,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=name + '_bn_scale'),
			
 
				+            bias_attr=ParamAttr(name + '_bn_offset'),
			
 
				+            moving_mean_name=name + '_bn_mean',
			
 
				+            moving_variance_name=name + '_bn_variance')
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=pad,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        y = self._batch_norm(input)
			
 
				+        y = self._conv(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class DenseLayer(nn.Layer):
			
 
				+    def __init__(self, num_channels, growth_rate, bn_size, dropout, name=None):
			
 
				+        super(DenseLayer, self).__init__()
			
 
				+        self.dropout = dropout
			
 
				+
			
 
				+        self.bn_ac_func1 = BNACConvLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=bn_size * growth_rate,
			
 
				+            filter_size=1,
			
 
				+            pad=0,
			
 
				+            stride=1,
			
 
				+            name=name + "_x1")
			
 
				+
			
 
				+        self.bn_ac_func2 = BNACConvLayer(
			
 
				+            num_channels=bn_size * growth_rate,
			
 
				+            num_filters=growth_rate,
			
 
				+            filter_size=3,
			
 
				+            pad=1,
			
 
				+            stride=1,
			
 
				+            name=name + "_x2")
			
 
				+
			
 
				+        if dropout:
			
 
				+            self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer")
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        conv = self.bn_ac_func1(input)
			
 
				+        conv = self.bn_ac_func2(conv)
			
 
				+        if self.dropout:
			
 
				+            conv = self.dropout_func(conv)
			
 
				+        conv = paddle.concat([input, conv], axis=1)
			
 
				+        return conv
			
 
				+
			
 
				+
			
 
				+class DenseBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_layers,
			
 
				+                 bn_size,
			
 
				+                 growth_rate,
			
 
				+                 dropout,
			
 
				+                 name=None):
			
 
				+        super(DenseBlock, self).__init__()
			
 
				+        self.dropout = dropout
			
 
				+
			
 
				+        self.dense_layer_func = []
			
 
				+
			
 
				+        pre_channel = num_channels
			
 
				+        for layer in range(num_layers):
			
 
				+            self.dense_layer_func.append(
			
 
				+                self.add_sublayer(
			
 
				+                    "{}_{}".format(name, layer + 1),
			
 
				+                    DenseLayer(
			
 
				+                        num_channels=pre_channel,
			
 
				+                        growth_rate=growth_rate,
			
 
				+                        bn_size=bn_size,
			
 
				+                        dropout=dropout,
			
 
				+                        name=name + '_' + str(layer + 1))))
			
 
				+            pre_channel = pre_channel + growth_rate
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        conv = input
			
 
				+        for func in self.dense_layer_func:
			
 
				+            conv = func(conv)
			
 
				+        return conv
			
 
				+
			
 
				+
			
 
				+class TransitionLayer(nn.Layer):
			
 
				+    def __init__(self, num_channels, num_output_features, name=None):
			
 
				+        super(TransitionLayer, self).__init__()
			
 
				+
			
 
				+        self.conv_ac_func = BNACConvLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_output_features,
			
 
				+            filter_size=1,
			
 
				+            pad=0,
			
 
				+            stride=1,
			
 
				+            name=name)
			
 
				+
			
 
				+        self.pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        y = self.conv_ac_func(input)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 pad=0,
			
 
				+                 groups=1,
			
 
				+                 act="relu",
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=pad,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=name + '_bn_scale'),
			
 
				+            bias_attr=ParamAttr(name + '_bn_offset'),
			
 
				+            moving_mean_name=name + '_bn_mean',
			
 
				+            moving_variance_name=name + '_bn_variance')
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        y = self._conv(input)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class DenseNet(nn.Layer):
			
 
				+    def __init__(self, layers=60, bn_size=4, dropout=0, class_num=1000):
			
 
				+        super(DenseNet, self).__init__()
			
 
				+
			
 
				+        supported_layers = [121, 161, 169, 201, 264]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+        densenet_spec = {
			
 
				+            121: (64, 32, [6, 12, 24, 16]),
			
 
				+            161: (96, 48, [6, 12, 36, 24]),
			
 
				+            169: (64, 32, [6, 12, 32, 32]),
			
 
				+            201: (64, 32, [6, 12, 48, 32]),
			
 
				+            264: (64, 32, [6, 12, 64, 48])
			
 
				+        }
			
 
				+        num_init_features, growth_rate, block_config = densenet_spec[layers]
			
 
				+
			
 
				+        self.conv1_func = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=num_init_features,
			
 
				+            filter_size=7,
			
 
				+            stride=2,
			
 
				+            pad=3,
			
 
				+            act='relu',
			
 
				+            name="conv1")
			
 
				+
			
 
				+        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self.block_config = block_config
			
 
				+
			
 
				+        self.dense_block_func_list = []
			
 
				+        self.transition_func_list = []
			
 
				+        pre_num_channels = num_init_features
			
 
				+        num_features = num_init_features
			
 
				+        for i, num_layers in enumerate(block_config):
			
 
				+            self.dense_block_func_list.append(
			
 
				+                self.add_sublayer(
			
 
				+                    "db_conv_{}".format(i + 2),
			
 
				+                    DenseBlock(
			
 
				+                        num_channels=pre_num_channels,
			
 
				+                        num_layers=num_layers,
			
 
				+                        bn_size=bn_size,
			
 
				+                        growth_rate=growth_rate,
			
 
				+                        dropout=dropout,
			
 
				+                        name='conv' + str(i + 2))))
			
 
				+
			
 
				+            num_features = num_features + num_layers * growth_rate
			
 
				+            pre_num_channels = num_features
			
 
				+
			
 
				+            if i != len(block_config) - 1:
			
 
				+                self.transition_func_list.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        "tr_conv{}_blk".format(i + 2),
			
 
				+                        TransitionLayer(
			
 
				+                            num_channels=pre_num_channels,
			
 
				+                            num_output_features=num_features // 2,
			
 
				+                            name='conv' + str(i + 2) + "_blk")))
			
 
				+                pre_num_channels = num_features // 2
			
 
				+                num_features = num_features // 2
			
 
				+
			
 
				+        self.batch_norm = BatchNorm(
			
 
				+            num_features,
			
 
				+            act="relu",
			
 
				+            param_attr=ParamAttr(name='conv5_blk_bn_scale'),
			
 
				+            bias_attr=ParamAttr(name='conv5_blk_bn_offset'),
			
 
				+            moving_mean_name='conv5_blk_bn_mean',
			
 
				+            moving_variance_name='conv5_blk_bn_variance')
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(num_features * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            num_features,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        conv = self.conv1_func(input)
			
 
				+        conv = self.pool2d_max(conv)
			
 
				+
			
 
				+        for i, num_layers in enumerate(self.block_config):
			
 
				+            conv = self.dense_block_func_list[i](conv)
			
 
				+            if i != len(self.block_config) - 1:
			
 
				+                conv = self.transition_func_list[i](conv)
			
 
				+
			
 
				+        conv = self.batch_norm(conv)
			
 
				+        y = self.pool2d_avg(conv)
			
 
				+        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def DenseNet121(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DenseNet(layers=121, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DenseNet161(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DenseNet(layers=161, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DenseNet169(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DenseNet(layers=169, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DenseNet201(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DenseNet(layers=201, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DenseNet264(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DenseNet(layers=264, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
@@ -0,0 +1,272 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was heavily based on https://github.com/facebookresearch/deit
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zeros_
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "DeiT_tiny_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
			
 
				+    "DeiT_small_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
			
 
				+    "DeiT_base_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
			
 
				+    "DeiT_tiny_distilled_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
			
 
				+    "DeiT_small_distilled_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
			
 
				+    "DeiT_base_distilled_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams",
			
 
				+    "DeiT_base_patch16_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
			
 
				+    "DeiT_base_distilled_patch16_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class DistilledVisionTransformer(VisionTransformer):
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=16,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dim=768,
			
 
				+                 depth=12,
			
 
				+                 num_heads=12,
			
 
				+                 mlp_ratio=4,
			
 
				+                 qkv_bias=False,
			
 
				+                 norm_layer='nn.LayerNorm',
			
 
				+                 epsilon=1e-5,
			
 
				+                 **kwargs):
			
 
				+        super().__init__(
			
 
				+            img_size=img_size,
			
 
				+            patch_size=patch_size,
			
 
				+            class_num=class_num,
			
 
				+            embed_dim=embed_dim,
			
 
				+            depth=depth,
			
 
				+            num_heads=num_heads,
			
 
				+            mlp_ratio=mlp_ratio,
			
 
				+            qkv_bias=qkv_bias,
			
 
				+            norm_layer=norm_layer,
			
 
				+            epsilon=epsilon,
			
 
				+            **kwargs)
			
 
				+        self.pos_embed = self.create_parameter(
			
 
				+            shape=(1, self.patch_embed.num_patches + 2, self.embed_dim),
			
 
				+            default_initializer=zeros_)
			
 
				+        self.add_parameter("pos_embed", self.pos_embed)
			
 
				+
			
 
				+        self.dist_token = self.create_parameter(
			
 
				+            shape=(1, 1, self.embed_dim), default_initializer=zeros_)
			
 
				+        self.add_parameter("cls_token", self.cls_token)
			
 
				+
			
 
				+        self.head_dist = nn.Linear(
			
 
				+            self.embed_dim,
			
 
				+            self.class_num) if self.class_num > 0 else Identity()
			
 
				+
			
 
				+        trunc_normal_(self.dist_token)
			
 
				+        trunc_normal_(self.pos_embed)
			
 
				+        self.head_dist.apply(self._init_weights)
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        B = paddle.shape(x)[0]
			
 
				+        x = self.patch_embed(x)
			
 
				+
			
 
				+        cls_tokens = self.cls_token.expand((B, -1, -1))
			
 
				+        dist_token = self.dist_token.expand((B, -1, -1))
			
 
				+        x = paddle.concat((cls_tokens, dist_token, x), axis=1)
			
 
				+
			
 
				+        x = x + self.pos_embed
			
 
				+        x = self.pos_drop(x)
			
 
				+
			
 
				+        for blk in self.blocks:
			
 
				+            x = blk(x)
			
 
				+
			
 
				+        x = self.norm(x)
			
 
				+        return x[:, 0], x[:, 1]
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x, x_dist = self.forward_features(x)
			
 
				+        x = self.head(x)
			
 
				+        x_dist = self.head_dist(x_dist)
			
 
				+        return (x + x_dist) / 2
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=192,
			
 
				+        depth=12,
			
 
				+        num_heads=3,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["DeiT_tiny_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=384,
			
 
				+        depth=12,
			
 
				+        num_heads=6,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["DeiT_small_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=768,
			
 
				+        depth=12,
			
 
				+        num_heads=12,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["DeiT_base_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False,
			
 
				+                                    **kwargs):
			
 
				+    model = DistilledVisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=192,
			
 
				+        depth=12,
			
 
				+        num_heads=3,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["DeiT_tiny_distilled_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DeiT_small_distilled_patch16_224(pretrained=False,
			
 
				+                                     use_ssld=False,
			
 
				+                                     **kwargs):
			
 
				+    model = DistilledVisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=384,
			
 
				+        depth=12,
			
 
				+        num_heads=6,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["DeiT_small_distilled_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False,
			
 
				+                                    **kwargs):
			
 
				+    model = DistilledVisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=768,
			
 
				+        depth=12,
			
 
				+        num_heads=12,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["DeiT_base_distilled_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        img_size=384,
			
 
				+        patch_size=16,
			
 
				+        embed_dim=768,
			
 
				+        depth=12,
			
 
				+        num_heads=12,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["DeiT_base_patch16_384"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False,
			
 
				+                                    **kwargs):
			
 
				+    model = DistilledVisionTransformer(
			
 
				+        img_size=384,
			
 
				+        patch_size=16,
			
 
				+        embed_dim=768,
			
 
				+        depth=12,
			
 
				+        num_heads=12,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["DeiT_base_distilled_patch16_384"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/dla.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/dla.py
@@ -0,0 +1,528 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/ucbdrive/dla
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+
			
 
				+from paddle.nn.initializer import Normal, Constant
			
 
				+
			
 
				+from ppcls.arch.backbone.base.theseus_layer import Identity
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "DLA34":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams",
			
 
				+    "DLA46_c":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46_c_pretrained.pdparams",
			
 
				+    "DLA46x_c":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46x_c_pretrained.pdparams",
			
 
				+    "DLA60":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60_pretrained.pdparams",
			
 
				+    "DLA60x":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_pretrained.pdparams",
			
 
				+    "DLA60x_c":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_c_pretrained.pdparams",
			
 
				+    "DLA102":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102_pretrained.pdparams",
			
 
				+    "DLA102x":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x_pretrained.pdparams",
			
 
				+    "DLA102x2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x2_pretrained.pdparams",
			
 
				+    "DLA169":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+
			
 
				+zeros_ = Constant(value=0.)
			
 
				+ones_ = Constant(value=1.)
			
 
				+
			
 
				+
			
 
				+class DlaBasic(nn.Layer):
			
 
				+    def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
			
 
				+        super(DlaBasic, self).__init__()
			
 
				+        self.conv1 = nn.Conv2D(
			
 
				+            inplanes,
			
 
				+            planes,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=dilation,
			
 
				+            bias_attr=False,
			
 
				+            dilation=dilation)
			
 
				+        self.bn1 = nn.BatchNorm2D(planes)
			
 
				+        self.relu = nn.ReLU()
			
 
				+        self.conv2 = nn.Conv2D(
			
 
				+            planes,
			
 
				+            planes,
			
 
				+            kernel_size=3,
			
 
				+            stride=1,
			
 
				+            padding=dilation,
			
 
				+            bias_attr=False,
			
 
				+            dilation=dilation)
			
 
				+        self.bn2 = nn.BatchNorm2D(planes)
			
 
				+        self.stride = stride
			
 
				+
			
 
				+    def forward(self, x, residual=None):
			
 
				+        if residual is None:
			
 
				+            residual = x
			
 
				+
			
 
				+        out = self.conv1(x)
			
 
				+        out = self.bn1(out)
			
 
				+        out = self.relu(out)
			
 
				+
			
 
				+        out = self.conv2(out)
			
 
				+        out = self.bn2(out)
			
 
				+
			
 
				+        out += residual
			
 
				+        out = self.relu(out)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class DlaBottleneck(nn.Layer):
			
 
				+    expansion = 2
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 inplanes,
			
 
				+                 outplanes,
			
 
				+                 stride=1,
			
 
				+                 dilation=1,
			
 
				+                 cardinality=1,
			
 
				+                 base_width=64):
			
 
				+        super(DlaBottleneck, self).__init__()
			
 
				+        self.stride = stride
			
 
				+        mid_planes = int(
			
 
				+            math.floor(outplanes * (base_width / 64)) * cardinality)
			
 
				+        mid_planes = mid_planes // self.expansion
			
 
				+
			
 
				+        self.conv1 = nn.Conv2D(
			
 
				+            inplanes, mid_planes, kernel_size=1, bias_attr=False)
			
 
				+        self.bn1 = nn.BatchNorm2D(mid_planes)
			
 
				+        self.conv2 = nn.Conv2D(
			
 
				+            mid_planes,
			
 
				+            mid_planes,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=dilation,
			
 
				+            bias_attr=False,
			
 
				+            dilation=dilation,
			
 
				+            groups=cardinality)
			
 
				+        self.bn2 = nn.BatchNorm2D(mid_planes)
			
 
				+        self.conv3 = nn.Conv2D(
			
 
				+            mid_planes, outplanes, kernel_size=1, bias_attr=False)
			
 
				+        self.bn3 = nn.BatchNorm2D(outplanes)
			
 
				+        self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, x, residual=None):
			
 
				+        if residual is None:
			
 
				+            residual = x
			
 
				+
			
 
				+        out = self.conv1(x)
			
 
				+        out = self.bn1(out)
			
 
				+        out = self.relu(out)
			
 
				+
			
 
				+        out = self.conv2(out)
			
 
				+        out = self.bn2(out)
			
 
				+        out = self.relu(out)
			
 
				+
			
 
				+        out = self.conv3(out)
			
 
				+        out = self.bn3(out)
			
 
				+
			
 
				+        out += residual
			
 
				+        out = self.relu(out)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class DlaRoot(nn.Layer):
			
 
				+    def __init__(self, in_channels, out_channels, kernel_size, residual):
			
 
				+        super(DlaRoot, self).__init__()
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            1,
			
 
				+            stride=1,
			
 
				+            bias_attr=False,
			
 
				+            padding=(kernel_size - 1) // 2)
			
 
				+        self.bn = nn.BatchNorm2D(out_channels)
			
 
				+        self.relu = nn.ReLU()
			
 
				+        self.residual = residual
			
 
				+
			
 
				+    def forward(self, *x):
			
 
				+        children = x
			
 
				+        x = self.conv(paddle.concat(x, 1))
			
 
				+        x = self.bn(x)
			
 
				+        if self.residual:
			
 
				+            x += children[0]
			
 
				+        x = self.relu(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class DlaTree(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 levels,
			
 
				+                 block,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 stride=1,
			
 
				+                 dilation=1,
			
 
				+                 cardinality=1,
			
 
				+                 base_width=64,
			
 
				+                 level_root=False,
			
 
				+                 root_dim=0,
			
 
				+                 root_kernel_size=1,
			
 
				+                 root_residual=False):
			
 
				+        super(DlaTree, self).__init__()
			
 
				+        if root_dim == 0:
			
 
				+            root_dim = 2 * out_channels
			
 
				+        if level_root:
			
 
				+            root_dim += in_channels
			
 
				+
			
 
				+        self.downsample = nn.MaxPool2D(
			
 
				+            stride, stride=stride) if stride > 1 else Identity()
			
 
				+        self.project = Identity()
			
 
				+        cargs = dict(
			
 
				+            dilation=dilation, cardinality=cardinality, base_width=base_width)
			
 
				+
			
 
				+        if levels == 1:
			
 
				+            self.tree1 = block(in_channels, out_channels, stride, **cargs)
			
 
				+            self.tree2 = block(out_channels, out_channels, 1, **cargs)
			
 
				+            if in_channels != out_channels:
			
 
				+                self.project = nn.Sequential(
			
 
				+                    nn.Conv2D(
			
 
				+                        in_channels,
			
 
				+                        out_channels,
			
 
				+                        kernel_size=1,
			
 
				+                        stride=1,
			
 
				+                        bias_attr=False),
			
 
				+                    nn.BatchNorm2D(out_channels))
			
 
				+        else:
			
 
				+            cargs.update(
			
 
				+                dict(
			
 
				+                    root_kernel_size=root_kernel_size,
			
 
				+                    root_residual=root_residual))
			
 
				+            self.tree1 = DlaTree(
			
 
				+                levels - 1,
			
 
				+                block,
			
 
				+                in_channels,
			
 
				+                out_channels,
			
 
				+                stride,
			
 
				+                root_dim=0,
			
 
				+                **cargs)
			
 
				+            self.tree2 = DlaTree(
			
 
				+                levels - 1,
			
 
				+                block,
			
 
				+                out_channels,
			
 
				+                out_channels,
			
 
				+                root_dim=root_dim + out_channels,
			
 
				+                **cargs)
			
 
				+
			
 
				+        if levels == 1:
			
 
				+            self.root = DlaRoot(root_dim, out_channels, root_kernel_size,
			
 
				+                                root_residual)
			
 
				+
			
 
				+        self.level_root = level_root
			
 
				+        self.root_dim = root_dim
			
 
				+        self.levels = levels
			
 
				+
			
 
				+    def forward(self, x, residual=None, children=None):
			
 
				+        children = [] if children is None else children
			
 
				+        bottom = self.downsample(x)
			
 
				+        residual = self.project(bottom)
			
 
				+
			
 
				+        if self.level_root:
			
 
				+            children.append(bottom)
			
 
				+        x1 = self.tree1(x, residual)
			
 
				+
			
 
				+        if self.levels == 1:
			
 
				+            x2 = self.tree2(x1)
			
 
				+            x = self.root(x2, x1, *children)
			
 
				+        else:
			
 
				+            children.append(x1)
			
 
				+            x = self.tree2(x1, children=children)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class DLA(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 levels,
			
 
				+                 channels,
			
 
				+                 in_chans=3,
			
 
				+                 cardinality=1,
			
 
				+                 base_width=64,
			
 
				+                 block=DlaBottleneck,
			
 
				+                 residual_root=False,
			
 
				+                 drop_rate=0.0,
			
 
				+                 class_num=1000,
			
 
				+                 with_pool=True):
			
 
				+        super(DLA, self).__init__()
			
 
				+        self.channels = channels
			
 
				+        self.class_num = class_num
			
 
				+        self.with_pool = with_pool
			
 
				+        self.cardinality = cardinality
			
 
				+        self.base_width = base_width
			
 
				+        self.drop_rate = drop_rate
			
 
				+
			
 
				+        self.base_layer = nn.Sequential(
			
 
				+            nn.Conv2D(
			
 
				+                in_chans,
			
 
				+                channels[0],
			
 
				+                kernel_size=7,
			
 
				+                stride=1,
			
 
				+                padding=3,
			
 
				+                bias_attr=False),
			
 
				+            nn.BatchNorm2D(channels[0]),
			
 
				+            nn.ReLU())
			
 
				+
			
 
				+        self.level0 = self._make_conv_level(channels[0], channels[0],
			
 
				+                                            levels[0])
			
 
				+        self.level1 = self._make_conv_level(
			
 
				+            channels[0], channels[1], levels[1], stride=2)
			
 
				+
			
 
				+        cargs = dict(
			
 
				+            cardinality=cardinality,
			
 
				+            base_width=base_width,
			
 
				+            root_residual=residual_root)
			
 
				+
			
 
				+        self.level2 = DlaTree(
			
 
				+            levels[2],
			
 
				+            block,
			
 
				+            channels[1],
			
 
				+            channels[2],
			
 
				+            2,
			
 
				+            level_root=False,
			
 
				+            **cargs)
			
 
				+        self.level3 = DlaTree(
			
 
				+            levels[3],
			
 
				+            block,
			
 
				+            channels[2],
			
 
				+            channels[3],
			
 
				+            2,
			
 
				+            level_root=True,
			
 
				+            **cargs)
			
 
				+        self.level4 = DlaTree(
			
 
				+            levels[4],
			
 
				+            block,
			
 
				+            channels[3],
			
 
				+            channels[4],
			
 
				+            2,
			
 
				+            level_root=True,
			
 
				+            **cargs)
			
 
				+        self.level5 = DlaTree(
			
 
				+            levels[5],
			
 
				+            block,
			
 
				+            channels[4],
			
 
				+            channels[5],
			
 
				+            2,
			
 
				+            level_root=True,
			
 
				+            **cargs)
			
 
				+
			
 
				+        self.feature_info = [
			
 
				+            # rare to have a meaningful stride 1 level
			
 
				+            dict(
			
 
				+                num_chs=channels[0], reduction=1, module='level0'),
			
 
				+            dict(
			
 
				+                num_chs=channels[1], reduction=2, module='level1'),
			
 
				+            dict(
			
 
				+                num_chs=channels[2], reduction=4, module='level2'),
			
 
				+            dict(
			
 
				+                num_chs=channels[3], reduction=8, module='level3'),
			
 
				+            dict(
			
 
				+                num_chs=channels[4], reduction=16, module='level4'),
			
 
				+            dict(
			
 
				+                num_chs=channels[5], reduction=32, module='level5'),
			
 
				+        ]
			
 
				+
			
 
				+        self.num_features = channels[-1]
			
 
				+
			
 
				+        if with_pool:
			
 
				+            self.global_pool = nn.AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        if class_num > 0:
			
 
				+            self.fc = nn.Conv2D(self.num_features, class_num, 1)
			
 
				+
			
 
				+        for m in self.sublayers():
			
 
				+            if isinstance(m, nn.Conv2D):
			
 
				+                n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
			
 
				+                normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
			
 
				+                normal_(m.weight)
			
 
				+            elif isinstance(m, nn.BatchNorm2D):
			
 
				+                ones_(m.weight)
			
 
				+                zeros_(m.bias)
			
 
				+
			
 
				+    def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
			
 
				+        modules = []
			
 
				+        for i in range(convs):
			
 
				+            modules.extend([
			
 
				+                nn.Conv2D(
			
 
				+                    inplanes,
			
 
				+                    planes,
			
 
				+                    kernel_size=3,
			
 
				+                    stride=stride if i == 0 else 1,
			
 
				+                    padding=dilation,
			
 
				+                    bias_attr=False,
			
 
				+                    dilation=dilation), nn.BatchNorm2D(planes), nn.ReLU()
			
 
				+            ])
			
 
				+            inplanes = planes
			
 
				+        return nn.Sequential(*modules)
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        x = self.base_layer(x)
			
 
				+
			
 
				+        x = self.level0(x)
			
 
				+        x = self.level1(x)
			
 
				+        x = self.level2(x)
			
 
				+        x = self.level3(x)
			
 
				+        x = self.level4(x)
			
 
				+        x = self.level5(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.forward_features(x)
			
 
				+
			
 
				+        if self.with_pool:
			
 
				+            x = self.global_pool(x)
			
 
				+
			
 
				+        if self.drop_rate > 0.:
			
 
				+            x = F.dropout(x, p=self.drop_rate, training=self.training)
			
 
				+
			
 
				+        if self.class_num > 0:
			
 
				+            x = self.fc(x)
			
 
				+            x = x.flatten(1)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def DLA34(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
			
 
				+                channels=(16, 32, 64, 128, 256, 512),
			
 
				+                block=DlaBasic,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA46_c(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
			
 
				+                channels=(16, 32, 64, 64, 128, 256),
			
 
				+                block=DlaBottleneck,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA46x_c(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 2, 2, 1),
			
 
				+                channels=(16, 32, 64, 64, 128, 256),
			
 
				+                block=DlaBottleneck,
			
 
				+                cardinality=32,
			
 
				+                base_width=4,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA60(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
			
 
				+                channels=(16, 32, 128, 256, 512, 1024),
			
 
				+                block=DlaBottleneck,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA60x(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
			
 
				+                channels=(16, 32, 128, 256, 512, 1024),
			
 
				+                block=DlaBottleneck,
			
 
				+                cardinality=32,
			
 
				+                base_width=4,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA60x_c(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 2, 3, 1),
			
 
				+                channels=(16, 32, 64, 64, 128, 256),
			
 
				+                block=DlaBottleneck,
			
 
				+                cardinality=32,
			
 
				+                base_width=4,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA102(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
			
 
				+                channels=(16, 32, 128, 256, 512, 1024),
			
 
				+                block=DlaBottleneck,
			
 
				+                residual_root=True,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA102x(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
			
 
				+                channels=(16, 32, 128, 256, 512, 1024),
			
 
				+                block=DlaBottleneck,
			
 
				+                cardinality=32,
			
 
				+                base_width=4,
			
 
				+                residual_root=True,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA102x2(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 1, 3, 4, 1),
			
 
				+                channels=(16, 32, 128, 256, 512, 1024),
			
 
				+                block=DlaBottleneck,
			
 
				+                cardinality=64,
			
 
				+                base_width=4,
			
 
				+                residual_root=True,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DLA169(pretrained=False, **kwargs):
			
 
				+    model = DLA(levels=(1, 1, 2, 3, 5, 1),
			
 
				+                channels=(16, 32, 128, 256, 512, 1024),
			
 
				+                block=DlaBottleneck,
			
 
				+                residual_root=True,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/dpn.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/dpn.py
@@ -0,0 +1,451 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import sys
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "DPN68":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
			
 
				+    "DPN92":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
			
 
				+    "DPN98":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
			
 
				+    "DPN107":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
			
 
				+    "DPN131":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 pad=0,
			
 
				+                 groups=1,
			
 
				+                 act="relu",
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=pad,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=name + '_bn_scale'),
			
 
				+            bias_attr=ParamAttr(name + '_bn_offset'),
			
 
				+            moving_mean_name=name + '_bn_mean',
			
 
				+            moving_variance_name=name + '_bn_variance')
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        y = self._conv(input)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BNACConvLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 pad=0,
			
 
				+                 groups=1,
			
 
				+                 act="relu",
			
 
				+                 name=None):
			
 
				+        super(BNACConvLayer, self).__init__()
			
 
				+        self.num_channels = num_channels
			
 
				+
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_channels,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=name + '_bn_scale'),
			
 
				+            bias_attr=ParamAttr(name + '_bn_offset'),
			
 
				+            moving_mean_name=name + '_bn_mean',
			
 
				+            moving_variance_name=name + '_bn_variance')
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=pad,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        y = self._batch_norm(input)
			
 
				+        y = self._conv(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class DualPathFactory(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_1x1_a,
			
 
				+                 num_3x3_b,
			
 
				+                 num_1x1_c,
			
 
				+                 inc,
			
 
				+                 G,
			
 
				+                 _type='normal',
			
 
				+                 name=None):
			
 
				+        super(DualPathFactory, self).__init__()
			
 
				+
			
 
				+        self.num_1x1_c = num_1x1_c
			
 
				+        self.inc = inc
			
 
				+        self.name = name
			
 
				+
			
 
				+        kw = 3
			
 
				+        kh = 3
			
 
				+        pw = (kw - 1) // 2
			
 
				+        ph = (kh - 1) // 2
			
 
				+
			
 
				+        # type
			
 
				+        if _type == 'proj':
			
 
				+            key_stride = 1
			
 
				+            self.has_proj = True
			
 
				+        elif _type == 'down':
			
 
				+            key_stride = 2
			
 
				+            self.has_proj = True
			
 
				+        elif _type == 'normal':
			
 
				+            key_stride = 1
			
 
				+            self.has_proj = False
			
 
				+        else:
			
 
				+            print("not implemented now!!!")
			
 
				+            sys.exit(1)
			
 
				+
			
 
				+        data_in_ch = sum(num_channels) if isinstance(num_channels,
			
 
				+                                                     list) else num_channels
			
 
				+
			
 
				+        if self.has_proj:
			
 
				+            self.c1x1_w_func = BNACConvLayer(
			
 
				+                num_channels=data_in_ch,
			
 
				+                num_filters=num_1x1_c + 2 * inc,
			
 
				+                filter_size=(1, 1),
			
 
				+                pad=(0, 0),
			
 
				+                stride=(key_stride, key_stride),
			
 
				+                name=name + "_match")
			
 
				+
			
 
				+        self.c1x1_a_func = BNACConvLayer(
			
 
				+            num_channels=data_in_ch,
			
 
				+            num_filters=num_1x1_a,
			
 
				+            filter_size=(1, 1),
			
 
				+            pad=(0, 0),
			
 
				+            name=name + "_conv1")
			
 
				+
			
 
				+        self.c3x3_b_func = BNACConvLayer(
			
 
				+            num_channels=num_1x1_a,
			
 
				+            num_filters=num_3x3_b,
			
 
				+            filter_size=(kw, kh),
			
 
				+            pad=(pw, ph),
			
 
				+            stride=(key_stride, key_stride),
			
 
				+            groups=G,
			
 
				+            name=name + "_conv2")
			
 
				+
			
 
				+        self.c1x1_c_func = BNACConvLayer(
			
 
				+            num_channels=num_3x3_b,
			
 
				+            num_filters=num_1x1_c + inc,
			
 
				+            filter_size=(1, 1),
			
 
				+            pad=(0, 0),
			
 
				+            name=name + "_conv3")
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        # PROJ
			
 
				+        if isinstance(input, list):
			
 
				+            data_in = paddle.concat([input[0], input[1]], axis=1)
			
 
				+        else:
			
 
				+            data_in = input
			
 
				+
			
 
				+        if self.has_proj:
			
 
				+            c1x1_w = self.c1x1_w_func(data_in)
			
 
				+            data_o1, data_o2 = paddle.split(
			
 
				+                c1x1_w, num_or_sections=[self.num_1x1_c, 2 * self.inc], axis=1)
			
 
				+        else:
			
 
				+            data_o1 = input[0]
			
 
				+            data_o2 = input[1]
			
 
				+
			
 
				+        c1x1_a = self.c1x1_a_func(data_in)
			
 
				+        c3x3_b = self.c3x3_b_func(c1x1_a)
			
 
				+        c1x1_c = self.c1x1_c_func(c3x3_b)
			
 
				+
			
 
				+        c1x1_c1, c1x1_c2 = paddle.split(
			
 
				+            c1x1_c, num_or_sections=[self.num_1x1_c, self.inc], axis=1)
			
 
				+
			
 
				+        # OUTPUTS
			
 
				+        summ = paddle.add(x=data_o1, y=c1x1_c1)
			
 
				+        dense = paddle.concat([data_o2, c1x1_c2], axis=1)
			
 
				+        # tensor, channels
			
 
				+        return [summ, dense]
			
 
				+
			
 
				+
			
 
				+class DPN(nn.Layer):
			
 
				+    def __init__(self, layers=68, class_num=1000):
			
 
				+        super(DPN, self).__init__()
			
 
				+
			
 
				+        self._class_num = class_num
			
 
				+
			
 
				+        args = self.get_net_args(layers)
			
 
				+        bws = args['bw']
			
 
				+        inc_sec = args['inc_sec']
			
 
				+        rs = args['r']
			
 
				+        k_r = args['k_r']
			
 
				+        k_sec = args['k_sec']
			
 
				+        G = args['G']
			
 
				+        init_num_filter = args['init_num_filter']
			
 
				+        init_filter_size = args['init_filter_size']
			
 
				+        init_padding = args['init_padding']
			
 
				+
			
 
				+        self.k_sec = k_sec
			
 
				+
			
 
				+        self.conv1_x_1_func = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=init_num_filter,
			
 
				+            filter_size=init_filter_size,
			
 
				+            stride=2,
			
 
				+            pad=init_padding,
			
 
				+            act='relu',
			
 
				+            name="conv1")
			
 
				+
			
 
				+        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        num_channel_dpn = init_num_filter
			
 
				+
			
 
				+        self.dpn_func_list = []
			
 
				+        #conv2 - conv5
			
 
				+        match_list, num = [], 0
			
 
				+        for gc in range(4):
			
 
				+            bw = bws[gc]
			
 
				+            inc = inc_sec[gc]
			
 
				+            R = (k_r * bw) // rs[gc]
			
 
				+            if gc == 0:
			
 
				+                _type1 = 'proj'
			
 
				+                _type2 = 'normal'
			
 
				+                match = 1
			
 
				+            else:
			
 
				+                _type1 = 'down'
			
 
				+                _type2 = 'normal'
			
 
				+                match = match + k_sec[gc - 1]
			
 
				+            match_list.append(match)
			
 
				+            self.dpn_func_list.append(
			
 
				+                self.add_sublayer(
			
 
				+                    "dpn{}".format(match),
			
 
				+                    DualPathFactory(
			
 
				+                        num_channels=num_channel_dpn,
			
 
				+                        num_1x1_a=R,
			
 
				+                        num_3x3_b=R,
			
 
				+                        num_1x1_c=bw,
			
 
				+                        inc=inc,
			
 
				+                        G=G,
			
 
				+                        _type=_type1,
			
 
				+                        name="dpn" + str(match))))
			
 
				+            num_channel_dpn = [bw, 3 * inc]
			
 
				+
			
 
				+            for i_ly in range(2, k_sec[gc] + 1):
			
 
				+                num += 1
			
 
				+                if num in match_list:
			
 
				+                    num += 1
			
 
				+                self.dpn_func_list.append(
			
 
				+                    self.add_sublayer(
			
 
				+                        "dpn{}".format(num),
			
 
				+                        DualPathFactory(
			
 
				+                            num_channels=num_channel_dpn,
			
 
				+                            num_1x1_a=R,
			
 
				+                            num_3x3_b=R,
			
 
				+                            num_1x1_c=bw,
			
 
				+                            inc=inc,
			
 
				+                            G=G,
			
 
				+                            _type=_type2,
			
 
				+                            name="dpn" + str(num))))
			
 
				+
			
 
				+                num_channel_dpn = [
			
 
				+                    num_channel_dpn[0], num_channel_dpn[1] + inc
			
 
				+                ]
			
 
				+
			
 
				+        out_channel = sum(num_channel_dpn)
			
 
				+
			
 
				+        self.conv5_x_x_bn = BatchNorm(
			
 
				+            num_channels=sum(num_channel_dpn),
			
 
				+            act="relu",
			
 
				+            param_attr=ParamAttr(name='final_concat_bn_scale'),
			
 
				+            bias_attr=ParamAttr('final_concat_bn_offset'),
			
 
				+            moving_mean_name='final_concat_bn_mean',
			
 
				+            moving_variance_name='final_concat_bn_variance')
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        stdv = 0.01
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            out_channel,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        conv1_x_1 = self.conv1_x_1_func(input)
			
 
				+        convX_x_x = self.pool2d_max(conv1_x_1)
			
 
				+
			
 
				+        dpn_idx = 0
			
 
				+        for gc in range(4):
			
 
				+            convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
			
 
				+            dpn_idx += 1
			
 
				+            for i_ly in range(2, self.k_sec[gc] + 1):
			
 
				+                convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
			
 
				+                dpn_idx += 1
			
 
				+
			
 
				+        conv5_x_x = paddle.concat(convX_x_x, axis=1)
			
 
				+        conv5_x_x = self.conv5_x_x_bn(conv5_x_x)
			
 
				+
			
 
				+        y = self.pool2d_avg(conv5_x_x)
			
 
				+        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+    def get_net_args(self, layers):
			
 
				+        if layers == 68:
			
 
				+            k_r = 128
			
 
				+            G = 32
			
 
				+            k_sec = [3, 4, 12, 3]
			
 
				+            inc_sec = [16, 32, 32, 64]
			
 
				+            bw = [64, 128, 256, 512]
			
 
				+            r = [64, 64, 64, 64]
			
 
				+            init_num_filter = 10
			
 
				+            init_filter_size = 3
			
 
				+            init_padding = 1
			
 
				+        elif layers == 92:
			
 
				+            k_r = 96
			
 
				+            G = 32
			
 
				+            k_sec = [3, 4, 20, 3]
			
 
				+            inc_sec = [16, 32, 24, 128]
			
 
				+            bw = [256, 512, 1024, 2048]
			
 
				+            r = [256, 256, 256, 256]
			
 
				+            init_num_filter = 64
			
 
				+            init_filter_size = 7
			
 
				+            init_padding = 3
			
 
				+        elif layers == 98:
			
 
				+            k_r = 160
			
 
				+            G = 40
			
 
				+            k_sec = [3, 6, 20, 3]
			
 
				+            inc_sec = [16, 32, 32, 128]
			
 
				+            bw = [256, 512, 1024, 2048]
			
 
				+            r = [256, 256, 256, 256]
			
 
				+            init_num_filter = 96
			
 
				+            init_filter_size = 7
			
 
				+            init_padding = 3
			
 
				+        elif layers == 107:
			
 
				+            k_r = 200
			
 
				+            G = 50
			
 
				+            k_sec = [4, 8, 20, 3]
			
 
				+            inc_sec = [20, 64, 64, 128]
			
 
				+            bw = [256, 512, 1024, 2048]
			
 
				+            r = [256, 256, 256, 256]
			
 
				+            init_num_filter = 128
			
 
				+            init_filter_size = 7
			
 
				+            init_padding = 3
			
 
				+        elif layers == 131:
			
 
				+            k_r = 160
			
 
				+            G = 40
			
 
				+            k_sec = [4, 8, 28, 3]
			
 
				+            inc_sec = [16, 32, 32, 128]
			
 
				+            bw = [256, 512, 1024, 2048]
			
 
				+            r = [256, 256, 256, 256]
			
 
				+            init_num_filter = 128
			
 
				+            init_filter_size = 7
			
 
				+            init_padding = 3
			
 
				+        else:
			
 
				+            raise NotImplementedError
			
 
				+        net_arg = {
			
 
				+            'k_r': k_r,
			
 
				+            'G': G,
			
 
				+            'k_sec': k_sec,
			
 
				+            'inc_sec': inc_sec,
			
 
				+            'bw': bw,
			
 
				+            'r': r
			
 
				+        }
			
 
				+        net_arg['init_num_filter'] = init_num_filter
			
 
				+        net_arg['init_filter_size'] = init_filter_size
			
 
				+        net_arg['init_padding'] = init_padding
			
 
				+
			
 
				+        return net_arg
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def DPN68(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DPN(layers=68, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DPN68"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DPN92(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DPN(layers=92, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DPN92"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DPN98(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DPN(layers=98, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DPN98"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DPN107(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DPN(layers=107, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DPN107"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def DPN131(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = DPN(layers=131, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["DPN131"])
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/efficientnet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/efficientnet.py
@@ -0,0 +1,976 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/lukemelas/EfficientNet-PyTorch
			
 
				+
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+import math
			
 
				+import collections
			
 
				+import re
			
 
				+import copy
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "EfficientNetB0_small":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
			
 
				+    "EfficientNetB0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
			
 
				+    "EfficientNetB1":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
			
 
				+    "EfficientNetB2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
			
 
				+    "EfficientNetB3":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
			
 
				+    "EfficientNetB4":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
			
 
				+    "EfficientNetB5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
			
 
				+    "EfficientNetB6":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
			
 
				+    "EfficientNetB7":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+GlobalParams = collections.namedtuple('GlobalParams', [
			
 
				+    'batch_norm_momentum',
			
 
				+    'batch_norm_epsilon',
			
 
				+    'dropout_rate',
			
 
				+    'num_classes',
			
 
				+    'width_coefficient',
			
 
				+    'depth_coefficient',
			
 
				+    'depth_divisor',
			
 
				+    'min_depth',
			
 
				+    'drop_connect_rate',
			
 
				+])
			
 
				+
			
 
				+BlockArgs = collections.namedtuple('BlockArgs', [
			
 
				+    'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
			
 
				+    'expand_ratio', 'id_skip', 'stride', 'se_ratio'
			
 
				+])
			
 
				+
			
 
				+GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields)
			
 
				+BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields)
			
 
				+
			
 
				+
			
 
				+def efficientnet_params(model_name):
			
 
				+    """ Map EfficientNet model name to parameter coefficients. """
			
 
				+    params_dict = {
			
 
				+        # Coefficients:   width,depth,resolution,dropout
			
 
				+        'efficientnet-b0': (1.0, 1.0, 224, 0.2),
			
 
				+        'efficientnet-b1': (1.0, 1.1, 240, 0.2),
			
 
				+        'efficientnet-b2': (1.1, 1.2, 260, 0.3),
			
 
				+        'efficientnet-b3': (1.2, 1.4, 300, 0.3),
			
 
				+        'efficientnet-b4': (1.4, 1.8, 380, 0.4),
			
 
				+        'efficientnet-b5': (1.6, 2.2, 456, 0.4),
			
 
				+        'efficientnet-b6': (1.8, 2.6, 528, 0.5),
			
 
				+        'efficientnet-b7': (2.0, 3.1, 600, 0.5),
			
 
				+    }
			
 
				+    return params_dict[model_name]
			
 
				+
			
 
				+
			
 
				+def efficientnet(width_coefficient=None,
			
 
				+                 depth_coefficient=None,
			
 
				+                 dropout_rate=0.2,
			
 
				+                 drop_connect_rate=0.2):
			
 
				+    """ Get block arguments according to parameter and coefficients. """
			
 
				+    blocks_args = [
			
 
				+        'r1_k3_s11_e1_i32_o16_se0.25',
			
 
				+        'r2_k3_s22_e6_i16_o24_se0.25',
			
 
				+        'r2_k5_s22_e6_i24_o40_se0.25',
			
 
				+        'r3_k3_s22_e6_i40_o80_se0.25',
			
 
				+        'r3_k5_s11_e6_i80_o112_se0.25',
			
 
				+        'r4_k5_s22_e6_i112_o192_se0.25',
			
 
				+        'r1_k3_s11_e6_i192_o320_se0.25',
			
 
				+    ]
			
 
				+    blocks_args = BlockDecoder.decode(blocks_args)
			
 
				+
			
 
				+    global_params = GlobalParams(
			
 
				+        batch_norm_momentum=0.99,
			
 
				+        batch_norm_epsilon=1e-3,
			
 
				+        dropout_rate=dropout_rate,
			
 
				+        drop_connect_rate=drop_connect_rate,
			
 
				+        num_classes=1000,
			
 
				+        width_coefficient=width_coefficient,
			
 
				+        depth_coefficient=depth_coefficient,
			
 
				+        depth_divisor=8,
			
 
				+        min_depth=None)
			
 
				+
			
 
				+    return blocks_args, global_params
			
 
				+
			
 
				+
			
 
				+def get_model_params(model_name, override_params):
			
 
				+    """ Get the block args and global params for a given model """
			
 
				+    if model_name.startswith('efficientnet'):
			
 
				+        w, d, _, p = efficientnet_params(model_name)
			
 
				+        blocks_args, global_params = efficientnet(
			
 
				+            width_coefficient=w, depth_coefficient=d, dropout_rate=p)
			
 
				+    else:
			
 
				+        raise NotImplementedError('model name is not pre-defined: %s' %
			
 
				+                                  model_name)
			
 
				+    if override_params:
			
 
				+        global_params = global_params._replace(**override_params)
			
 
				+    return blocks_args, global_params
			
 
				+
			
 
				+
			
 
				+def round_filters(filters, global_params):
			
 
				+    """ Calculate and round number of filters based on depth multiplier. """
			
 
				+    multiplier = global_params.width_coefficient
			
 
				+    if not multiplier:
			
 
				+        return filters
			
 
				+    divisor = global_params.depth_divisor
			
 
				+    min_depth = global_params.min_depth
			
 
				+    filters *= multiplier
			
 
				+    min_depth = min_depth or divisor
			
 
				+    new_filters = max(min_depth,
			
 
				+                      int(filters + divisor / 2) // divisor * divisor)
			
 
				+    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
			
 
				+        new_filters += divisor
			
 
				+    return int(new_filters)
			
 
				+
			
 
				+
			
 
				+def round_repeats(repeats, global_params):
			
 
				+    """ Round number of filters based on depth multiplier. """
			
 
				+    multiplier = global_params.depth_coefficient
			
 
				+    if not multiplier:
			
 
				+        return repeats
			
 
				+    return int(math.ceil(multiplier * repeats))
			
 
				+
			
 
				+
			
 
				+class BlockDecoder(object):
			
 
				+    """
			
 
				+    Block Decoder, straight from the official TensorFlow repository.
			
 
				+    """
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _decode_block_string(block_string):
			
 
				+        """ Gets a block through a string notation of arguments. """
			
 
				+        assert isinstance(block_string, str)
			
 
				+
			
 
				+        ops = block_string.split('_')
			
 
				+        options = {}
			
 
				+        for op in ops:
			
 
				+            splits = re.split(r'(\d.*)', op)
			
 
				+            if len(splits) >= 2:
			
 
				+                key, value = splits[:2]
			
 
				+                options[key] = value
			
 
				+
			
 
				+        # Check stride
			
 
				+        cond_1 = ('s' in options and len(options['s']) == 1)
			
 
				+        cond_2 = ((len(options['s']) == 2) and
			
 
				+                  (options['s'][0] == options['s'][1]))
			
 
				+        assert (cond_1 or cond_2)
			
 
				+
			
 
				+        return BlockArgs(
			
 
				+            kernel_size=int(options['k']),
			
 
				+            num_repeat=int(options['r']),
			
 
				+            input_filters=int(options['i']),
			
 
				+            output_filters=int(options['o']),
			
 
				+            expand_ratio=int(options['e']),
			
 
				+            id_skip=('noskip' not in block_string),
			
 
				+            se_ratio=float(options['se']) if 'se' in options else None,
			
 
				+            stride=[int(options['s'][0])])
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _encode_block_string(block):
			
 
				+        """Encodes a block to a string."""
			
 
				+        args = [
			
 
				+            'r%d' % block.num_repeat, 'k%d' % block.kernel_size, 's%d%d' %
			
 
				+            (block.strides[0], block.strides[1]), 'e%s' % block.expand_ratio,
			
 
				+            'i%d' % block.input_filters, 'o%d' % block.output_filters
			
 
				+        ]
			
 
				+        if 0 < block.se_ratio <= 1:
			
 
				+            args.append('se%s' % block.se_ratio)
			
 
				+        if block.id_skip is False:
			
 
				+            args.append('noskip')
			
 
				+        return '_'.join(args)
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def decode(string_list):
			
 
				+        """
			
 
				+        Decode a list of string notations to specify blocks in the network.
			
 
				+
			
 
				+        string_list: list of strings, each string is a notation of block
			
 
				+        return
			
 
				+            list of BlockArgs namedtuples of block args
			
 
				+        """
			
 
				+        assert isinstance(string_list, list)
			
 
				+        blocks_args = []
			
 
				+        for block_string in string_list:
			
 
				+            blocks_args.append(BlockDecoder._decode_block_string(block_string))
			
 
				+        return blocks_args
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def encode(blocks_args):
			
 
				+        """
			
 
				+        Encodes a list of BlockArgs to a list of strings.
			
 
				+
			
 
				+        :param blocks_args: a list of BlockArgs namedtuples of block args
			
 
				+        :return: a list of strings, each string is a notation of block
			
 
				+        """
			
 
				+        block_strings = []
			
 
				+        for block in blocks_args:
			
 
				+            block_strings.append(BlockDecoder._encode_block_string(block))
			
 
				+        return block_strings
			
 
				+
			
 
				+
			
 
				+def initial_type(name, use_bias=False):
			
 
				+    param_attr = ParamAttr(name=name + "_weights")
			
 
				+    if use_bias:
			
 
				+        bias_attr = ParamAttr(name=name + "_offset")
			
 
				+    else:
			
 
				+        bias_attr = False
			
 
				+    return param_attr, bias_attr
			
 
				+
			
 
				+
			
 
				+def init_batch_norm_layer(name="batch_norm"):
			
 
				+    param_attr = ParamAttr(name=name + "_scale")
			
 
				+    bias_attr = ParamAttr(name=name + "_offset")
			
 
				+    return param_attr, bias_attr
			
 
				+
			
 
				+
			
 
				+def init_fc_layer(name="fc"):
			
 
				+    param_attr = ParamAttr(name=name + "_weights")
			
 
				+    bias_attr = ParamAttr(name=name + "_offset")
			
 
				+    return param_attr, bias_attr
			
 
				+
			
 
				+
			
 
				+def cal_padding(img_size, stride, filter_size, dilation=1):
			
 
				+    """Calculate padding size."""
			
 
				+    if img_size % stride == 0:
			
 
				+        out_size = max(filter_size - stride, 0)
			
 
				+    else:
			
 
				+        out_size = max(filter_size - (img_size % stride), 0)
			
 
				+    return out_size // 2, out_size - out_size // 2
			
 
				+
			
 
				+
			
 
				+inp_shape = {
			
 
				+    "b0_small": [224, 112, 112, 56, 28, 14, 14, 7],
			
 
				+    "b0": [224, 112, 112, 56, 28, 14, 14, 7],
			
 
				+    "b1": [240, 120, 120, 60, 30, 15, 15, 8],
			
 
				+    "b2": [260, 130, 130, 65, 33, 17, 17, 9],
			
 
				+    "b3": [300, 150, 150, 75, 38, 19, 19, 10],
			
 
				+    "b4": [380, 190, 190, 95, 48, 24, 24, 12],
			
 
				+    "b5": [456, 228, 228, 114, 57, 29, 29, 15],
			
 
				+    "b6": [528, 264, 264, 132, 66, 33, 33, 17],
			
 
				+    "b7": [600, 300, 300, 150, 75, 38, 38, 19]
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def _drop_connect(inputs, prob, is_test):
			
 
				+    if is_test:
			
 
				+        output = inputs
			
 
				+    else:
			
 
				+        keep_prob = 1.0 - prob
			
 
				+        inputs_shape = paddle.shape(inputs)
			
 
				+        random_tensor = keep_prob + paddle.rand(
			
 
				+            shape=[inputs_shape[0], 1, 1, 1])
			
 
				+        binary_tensor = paddle.floor(random_tensor)
			
 
				+        output = paddle.multiply(inputs, binary_tensor) / keep_prob
			
 
				+    return output
			
 
				+
			
 
				+
			
 
				+class Conv2ds(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 padding=0,
			
 
				+                 groups=None,
			
 
				+                 name="conv2d",
			
 
				+                 act=None,
			
 
				+                 use_bias=False,
			
 
				+                 padding_type=None,
			
 
				+                 model_name=None,
			
 
				+                 cur_stage=None):
			
 
				+        super(Conv2ds, self).__init__()
			
 
				+        assert act in [None, "swish", "sigmoid"]
			
 
				+        self.act = act
			
 
				+
			
 
				+        param_attr, bias_attr = initial_type(name=name, use_bias=use_bias)
			
 
				+
			
 
				+        def get_padding(filter_size, stride=1, dilation=1):
			
 
				+            padding = ((stride - 1) + dilation * (filter_size - 1)) // 2
			
 
				+            return padding
			
 
				+
			
 
				+        inps = 1 if model_name == None and cur_stage == None else inp_shape[
			
 
				+            model_name][cur_stage]
			
 
				+        self.need_crop = False
			
 
				+        if padding_type == "SAME":
			
 
				+            top_padding, bottom_padding = cal_padding(inps, stride,
			
 
				+                                                      filter_size)
			
 
				+            left_padding, right_padding = cal_padding(inps, stride,
			
 
				+                                                      filter_size)
			
 
				+            height_padding = bottom_padding
			
 
				+            width_padding = right_padding
			
 
				+            if top_padding != bottom_padding or left_padding != right_padding:
			
 
				+                height_padding = top_padding + stride
			
 
				+                width_padding = left_padding + stride
			
 
				+                self.need_crop = True
			
 
				+            padding = [height_padding, width_padding]
			
 
				+        elif padding_type == "VALID":
			
 
				+            height_padding = 0
			
 
				+            width_padding = 0
			
 
				+            padding = [height_padding, width_padding]
			
 
				+        elif padding_type == "DYNAMIC":
			
 
				+            padding = get_padding(filter_size, stride)
			
 
				+        else:
			
 
				+            padding = padding_type
			
 
				+
			
 
				+        groups = 1 if groups is None else groups
			
 
				+        self._conv = Conv2D(
			
 
				+            input_channels,
			
 
				+            output_channels,
			
 
				+            filter_size,
			
 
				+            groups=groups,
			
 
				+            stride=stride,
			
 
				+            #             act=act,
			
 
				+            padding=padding,
			
 
				+            weight_attr=param_attr,
			
 
				+            bias_attr=bias_attr)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv(inputs)
			
 
				+        if self.act == "swish":
			
 
				+            x = F.swish(x)
			
 
				+        elif self.act == "sigmoid":
			
 
				+            x = F.sigmoid(x)
			
 
				+
			
 
				+        if self.need_crop:
			
 
				+            x = x[:, :, 1:, 1:]
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 filter_size,
			
 
				+                 output_channels,
			
 
				+                 stride=1,
			
 
				+                 num_groups=1,
			
 
				+                 padding_type="SAME",
			
 
				+                 conv_act=None,
			
 
				+                 bn_act="swish",
			
 
				+                 use_bn=True,
			
 
				+                 use_bias=False,
			
 
				+                 name=None,
			
 
				+                 conv_name=None,
			
 
				+                 bn_name=None,
			
 
				+                 model_name=None,
			
 
				+                 cur_stage=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2ds(
			
 
				+            input_channels=input_channels,
			
 
				+            output_channels=output_channels,
			
 
				+            filter_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            groups=num_groups,
			
 
				+            act=conv_act,
			
 
				+            padding_type=padding_type,
			
 
				+            name=conv_name,
			
 
				+            use_bias=use_bias,
			
 
				+            model_name=model_name,
			
 
				+            cur_stage=cur_stage)
			
 
				+        self.use_bn = use_bn
			
 
				+        if use_bn is True:
			
 
				+            bn_name = name + bn_name
			
 
				+            param_attr, bias_attr = init_batch_norm_layer(bn_name)
			
 
				+
			
 
				+            self._bn = BatchNorm(
			
 
				+                num_channels=output_channels,
			
 
				+                act=bn_act,
			
 
				+                momentum=0.99,
			
 
				+                epsilon=0.001,
			
 
				+                moving_mean_name=bn_name + "_mean",
			
 
				+                moving_variance_name=bn_name + "_variance",
			
 
				+                param_attr=param_attr,
			
 
				+                bias_attr=bias_attr)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if self.use_bn:
			
 
				+            x = self._conv(inputs)
			
 
				+            x = self._bn(x)
			
 
				+            return x
			
 
				+        else:
			
 
				+            return self._conv(inputs)
			
 
				+
			
 
				+
			
 
				+class ExpandConvNorm(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 block_args,
			
 
				+                 padding_type,
			
 
				+                 name=None,
			
 
				+                 model_name=None,
			
 
				+                 cur_stage=None):
			
 
				+        super(ExpandConvNorm, self).__init__()
			
 
				+
			
 
				+        self.oup = block_args.input_filters * block_args.expand_ratio
			
 
				+        self.expand_ratio = block_args.expand_ratio
			
 
				+
			
 
				+        if self.expand_ratio != 1:
			
 
				+            self._conv = ConvBNLayer(
			
 
				+                input_channels,
			
 
				+                1,
			
 
				+                self.oup,
			
 
				+                bn_act=None,
			
 
				+                padding_type=padding_type,
			
 
				+                name=name,
			
 
				+                conv_name=name + "_expand_conv",
			
 
				+                bn_name="_bn0",
			
 
				+                model_name=model_name,
			
 
				+                cur_stage=cur_stage)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if self.expand_ratio != 1:
			
 
				+            return self._conv(inputs)
			
 
				+        else:
			
 
				+            return inputs
			
 
				+
			
 
				+
			
 
				+class DepthwiseConvNorm(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 block_args,
			
 
				+                 padding_type,
			
 
				+                 name=None,
			
 
				+                 model_name=None,
			
 
				+                 cur_stage=None):
			
 
				+        super(DepthwiseConvNorm, self).__init__()
			
 
				+
			
 
				+        self.k = block_args.kernel_size
			
 
				+        self.s = block_args.stride
			
 
				+        if isinstance(self.s, list) or isinstance(self.s, tuple):
			
 
				+            self.s = self.s[0]
			
 
				+        oup = block_args.input_filters * block_args.expand_ratio
			
 
				+
			
 
				+        self._conv = ConvBNLayer(
			
 
				+            input_channels,
			
 
				+            self.k,
			
 
				+            oup,
			
 
				+            self.s,
			
 
				+            num_groups=input_channels,
			
 
				+            bn_act=None,
			
 
				+            padding_type=padding_type,
			
 
				+            name=name,
			
 
				+            conv_name=name + "_depthwise_conv",
			
 
				+            bn_name="_bn1",
			
 
				+            model_name=model_name,
			
 
				+            cur_stage=cur_stage)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        return self._conv(inputs)
			
 
				+
			
 
				+
			
 
				+class ProjectConvNorm(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 block_args,
			
 
				+                 padding_type,
			
 
				+                 name=None,
			
 
				+                 model_name=None,
			
 
				+                 cur_stage=None):
			
 
				+        super(ProjectConvNorm, self).__init__()
			
 
				+
			
 
				+        final_oup = block_args.output_filters
			
 
				+
			
 
				+        self._conv = ConvBNLayer(
			
 
				+            input_channels,
			
 
				+            1,
			
 
				+            final_oup,
			
 
				+            bn_act=None,
			
 
				+            padding_type=padding_type,
			
 
				+            name=name,
			
 
				+            conv_name=name + "_project_conv",
			
 
				+            bn_name="_bn2",
			
 
				+            model_name=model_name,
			
 
				+            cur_stage=cur_stage)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        return self._conv(inputs)
			
 
				+
			
 
				+
			
 
				+class SEBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 num_squeezed_channels,
			
 
				+                 oup,
			
 
				+                 padding_type,
			
 
				+                 name=None,
			
 
				+                 model_name=None,
			
 
				+                 cur_stage=None):
			
 
				+        super(SEBlock, self).__init__()
			
 
				+
			
 
				+        self._pool = AdaptiveAvgPool2D(1)
			
 
				+        self._conv1 = Conv2ds(
			
 
				+            input_channels,
			
 
				+            num_squeezed_channels,
			
 
				+            1,
			
 
				+            use_bias=True,
			
 
				+            padding_type=padding_type,
			
 
				+            act="swish",
			
 
				+            name=name + "_se_reduce")
			
 
				+
			
 
				+        self._conv2 = Conv2ds(
			
 
				+            num_squeezed_channels,
			
 
				+            oup,
			
 
				+            1,
			
 
				+            act="sigmoid",
			
 
				+            use_bias=True,
			
 
				+            padding_type=padding_type,
			
 
				+            name=name + "_se_expand")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._pool(inputs)
			
 
				+        x = self._conv1(x)
			
 
				+        x = self._conv2(x)
			
 
				+        out = paddle.multiply(inputs, x)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class MbConvBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 block_args,
			
 
				+                 padding_type,
			
 
				+                 use_se,
			
 
				+                 name=None,
			
 
				+                 drop_connect_rate=None,
			
 
				+                 model_name=None,
			
 
				+                 cur_stage=None):
			
 
				+        super(MbConvBlock, self).__init__()
			
 
				+
			
 
				+        oup = block_args.input_filters * block_args.expand_ratio
			
 
				+        self.block_args = block_args
			
 
				+        self.has_se = use_se and (block_args.se_ratio is not None) and (
			
 
				+            0 < block_args.se_ratio <= 1)
			
 
				+        self.id_skip = block_args.id_skip
			
 
				+        self.expand_ratio = block_args.expand_ratio
			
 
				+        self.drop_connect_rate = drop_connect_rate
			
 
				+
			
 
				+        if self.expand_ratio != 1:
			
 
				+            self._ecn = ExpandConvNorm(
			
 
				+                input_channels,
			
 
				+                block_args,
			
 
				+                padding_type=padding_type,
			
 
				+                name=name,
			
 
				+                model_name=model_name,
			
 
				+                cur_stage=cur_stage)
			
 
				+
			
 
				+        self._dcn = DepthwiseConvNorm(
			
 
				+            input_channels * block_args.expand_ratio,
			
 
				+            block_args,
			
 
				+            padding_type=padding_type,
			
 
				+            name=name,
			
 
				+            model_name=model_name,
			
 
				+            cur_stage=cur_stage)
			
 
				+
			
 
				+        if self.has_se:
			
 
				+            num_squeezed_channels = max(
			
 
				+                1, int(block_args.input_filters * block_args.se_ratio))
			
 
				+            self._se = SEBlock(
			
 
				+                input_channels * block_args.expand_ratio,
			
 
				+                num_squeezed_channels,
			
 
				+                oup,
			
 
				+                padding_type=padding_type,
			
 
				+                name=name,
			
 
				+                model_name=model_name,
			
 
				+                cur_stage=cur_stage)
			
 
				+
			
 
				+        self._pcn = ProjectConvNorm(
			
 
				+            input_channels * block_args.expand_ratio,
			
 
				+            block_args,
			
 
				+            padding_type=padding_type,
			
 
				+            name=name,
			
 
				+            model_name=model_name,
			
 
				+            cur_stage=cur_stage)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = inputs
			
 
				+        if self.expand_ratio != 1:
			
 
				+            x = self._ecn(x)
			
 
				+            x = F.swish(x)
			
 
				+
			
 
				+        x = self._dcn(x)
			
 
				+        x = F.swish(x)
			
 
				+        if self.has_se:
			
 
				+            x = self._se(x)
			
 
				+        x = self._pcn(x)
			
 
				+
			
 
				+        if self.id_skip and \
			
 
				+                self.block_args.stride == 1 and \
			
 
				+                self.block_args.input_filters == self.block_args.output_filters:
			
 
				+            if self.drop_connect_rate:
			
 
				+                x = _drop_connect(x, self.drop_connect_rate, not self.training)
			
 
				+            x = paddle.add(x, inputs)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ConvStemNorm(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 padding_type,
			
 
				+                 _global_params,
			
 
				+                 name=None,
			
 
				+                 model_name=None,
			
 
				+                 cur_stage=None):
			
 
				+        super(ConvStemNorm, self).__init__()
			
 
				+
			
 
				+        output_channels = round_filters(32, _global_params)
			
 
				+        self._conv = ConvBNLayer(
			
 
				+            input_channels,
			
 
				+            filter_size=3,
			
 
				+            output_channels=output_channels,
			
 
				+            stride=2,
			
 
				+            bn_act=None,
			
 
				+            padding_type=padding_type,
			
 
				+            name="",
			
 
				+            conv_name="_conv_stem",
			
 
				+            bn_name="_bn0",
			
 
				+            model_name=model_name,
			
 
				+            cur_stage=cur_stage)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        return self._conv(inputs)
			
 
				+
			
 
				+
			
 
				+class ExtractFeatures(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 _block_args,
			
 
				+                 _global_params,
			
 
				+                 padding_type,
			
 
				+                 use_se,
			
 
				+                 model_name=None):
			
 
				+        super(ExtractFeatures, self).__init__()
			
 
				+
			
 
				+        self._global_params = _global_params
			
 
				+
			
 
				+        self._conv_stem = ConvStemNorm(
			
 
				+            input_channels,
			
 
				+            padding_type=padding_type,
			
 
				+            _global_params=_global_params,
			
 
				+            model_name=model_name,
			
 
				+            cur_stage=0)
			
 
				+
			
 
				+        self.block_args_copy = copy.deepcopy(_block_args)
			
 
				+        idx = 0
			
 
				+        block_size = 0
			
 
				+        for block_arg in self.block_args_copy:
			
 
				+            block_arg = block_arg._replace(
			
 
				+                input_filters=round_filters(block_arg.input_filters,
			
 
				+                                            _global_params),
			
 
				+                output_filters=round_filters(block_arg.output_filters,
			
 
				+                                             _global_params),
			
 
				+                num_repeat=round_repeats(block_arg.num_repeat, _global_params))
			
 
				+            block_size += 1
			
 
				+            for _ in range(block_arg.num_repeat - 1):
			
 
				+                block_size += 1
			
 
				+
			
 
				+        self.conv_seq = []
			
 
				+        cur_stage = 1
			
 
				+        for block_args in _block_args:
			
 
				+            block_args = block_args._replace(
			
 
				+                input_filters=round_filters(block_args.input_filters,
			
 
				+                                            _global_params),
			
 
				+                output_filters=round_filters(block_args.output_filters,
			
 
				+                                             _global_params),
			
 
				+                num_repeat=round_repeats(block_args.num_repeat,
			
 
				+                                         _global_params))
			
 
				+
			
 
				+            drop_connect_rate = self._global_params.drop_connect_rate
			
 
				+            if drop_connect_rate:
			
 
				+                drop_connect_rate *= float(idx) / block_size
			
 
				+
			
 
				+            _mc_block = self.add_sublayer(
			
 
				+                "_blocks." + str(idx) + ".",
			
 
				+                MbConvBlock(
			
 
				+                    block_args.input_filters,
			
 
				+                    block_args=block_args,
			
 
				+                    padding_type=padding_type,
			
 
				+                    use_se=use_se,
			
 
				+                    name="_blocks." + str(idx) + ".",
			
 
				+                    drop_connect_rate=drop_connect_rate,
			
 
				+                    model_name=model_name,
			
 
				+                    cur_stage=cur_stage))
			
 
				+            self.conv_seq.append(_mc_block)
			
 
				+            idx += 1
			
 
				+            if block_args.num_repeat > 1:
			
 
				+                block_args = block_args._replace(
			
 
				+                    input_filters=block_args.output_filters, stride=1)
			
 
				+            for _ in range(block_args.num_repeat - 1):
			
 
				+                drop_connect_rate = self._global_params.drop_connect_rate
			
 
				+                if drop_connect_rate:
			
 
				+                    drop_connect_rate *= float(idx) / block_size
			
 
				+                _mc_block = self.add_sublayer(
			
 
				+                    "block." + str(idx) + ".",
			
 
				+                    MbConvBlock(
			
 
				+                        block_args.input_filters,
			
 
				+                        block_args,
			
 
				+                        padding_type=padding_type,
			
 
				+                        use_se=use_se,
			
 
				+                        name="_blocks." + str(idx) + ".",
			
 
				+                        drop_connect_rate=drop_connect_rate,
			
 
				+                        model_name=model_name,
			
 
				+                        cur_stage=cur_stage))
			
 
				+                self.conv_seq.append(_mc_block)
			
 
				+                idx += 1
			
 
				+            cur_stage += 1
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv_stem(inputs)
			
 
				+        x = F.swish(x)
			
 
				+        for _mc_block in self.conv_seq:
			
 
				+            x = _mc_block(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class EfficientNet(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 name="b0",
			
 
				+                 padding_type="SAME",
			
 
				+                 override_params=None,
			
 
				+                 use_se=True,
			
 
				+                 class_num=1000):
			
 
				+        super(EfficientNet, self).__init__()
			
 
				+
			
 
				+        model_name = 'efficientnet-' + name
			
 
				+        self.name = name
			
 
				+        self._block_args, self._global_params = get_model_params(
			
 
				+            model_name, override_params)
			
 
				+        self.padding_type = padding_type
			
 
				+        self.use_se = use_se
			
 
				+
			
 
				+        self._ef = ExtractFeatures(
			
 
				+            3,
			
 
				+            self._block_args,
			
 
				+            self._global_params,
			
 
				+            self.padding_type,
			
 
				+            self.use_se,
			
 
				+            model_name=self.name)
			
 
				+
			
 
				+        output_channels = round_filters(1280, self._global_params)
			
 
				+        if name == "b0_small" or name == "b0" or name == "b1":
			
 
				+            oup = 320
			
 
				+        elif name == "b2":
			
 
				+            oup = 352
			
 
				+        elif name == "b3":
			
 
				+            oup = 384
			
 
				+        elif name == "b4":
			
 
				+            oup = 448
			
 
				+        elif name == "b5":
			
 
				+            oup = 512
			
 
				+        elif name == "b6":
			
 
				+            oup = 576
			
 
				+        elif name == "b7":
			
 
				+            oup = 640
			
 
				+        self._conv = ConvBNLayer(
			
 
				+            oup,
			
 
				+            1,
			
 
				+            output_channels,
			
 
				+            bn_act="swish",
			
 
				+            padding_type=self.padding_type,
			
 
				+            name="",
			
 
				+            conv_name="_conv_head",
			
 
				+            bn_name="_bn1",
			
 
				+            model_name=self.name,
			
 
				+            cur_stage=7)
			
 
				+        self._pool = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        if self._global_params.dropout_rate:
			
 
				+            self._drop = Dropout(
			
 
				+                p=self._global_params.dropout_rate, mode="upscale_in_train")
			
 
				+
			
 
				+        param_attr, bias_attr = init_fc_layer("_fc")
			
 
				+        self._fc = Linear(
			
 
				+            output_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=param_attr,
			
 
				+            bias_attr=bias_attr)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._ef(inputs)
			
 
				+        x = self._conv(x)
			
 
				+        x = self._pool(x)
			
 
				+        if self._global_params.dropout_rate:
			
 
				+            x = self._drop(x)
			
 
				+        x = paddle.squeeze(x, axis=[2, 3])
			
 
				+        x = self._fc(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def EfficientNetB0_small(padding_type='DYNAMIC',
			
 
				+                         override_params=None,
			
 
				+                         use_se=False,
			
 
				+                         pretrained=False,
			
 
				+                         use_ssld=False,
			
 
				+                         **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b0',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0_small"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def EfficientNetB0(padding_type='SAME',
			
 
				+                   override_params=None,
			
 
				+                   use_se=True,
			
 
				+                   pretrained=False,
			
 
				+                   use_ssld=False,
			
 
				+                   **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b0',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def EfficientNetB1(padding_type='SAME',
			
 
				+                   override_params=None,
			
 
				+                   use_se=True,
			
 
				+                   pretrained=False,
			
 
				+                   use_ssld=False,
			
 
				+                   **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b1',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB1"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def EfficientNetB2(padding_type='SAME',
			
 
				+                   override_params=None,
			
 
				+                   use_se=True,
			
 
				+                   pretrained=False,
			
 
				+                   use_ssld=False,
			
 
				+                   **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b2',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB2"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def EfficientNetB3(padding_type='SAME',
			
 
				+                   override_params=None,
			
 
				+                   use_se=True,
			
 
				+                   pretrained=False,
			
 
				+                   use_ssld=False,
			
 
				+                   **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b3',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB3"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def EfficientNetB4(padding_type='SAME',
			
 
				+                   override_params=None,
			
 
				+                   use_se=True,
			
 
				+                   pretrained=False,
			
 
				+                   use_ssld=False,
			
 
				+                   **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b4',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB4"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def EfficientNetB5(padding_type='SAME',
			
 
				+                   override_params=None,
			
 
				+                   use_se=True,
			
 
				+                   pretrained=False,
			
 
				+                   use_ssld=False,
			
 
				+                   **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b5',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB5"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def EfficientNetB6(padding_type='SAME',
			
 
				+                   override_params=None,
			
 
				+                   use_se=True,
			
 
				+                   pretrained=False,
			
 
				+                   use_ssld=False,
			
 
				+                   **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b6',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB6"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def EfficientNetB7(padding_type='SAME',
			
 
				+                   override_params=None,
			
 
				+                   use_se=True,
			
 
				+                   pretrained=False,
			
 
				+                   use_ssld=False,
			
 
				+                   **kwargs):
			
 
				+    model = EfficientNet(
			
 
				+        name='b7',
			
 
				+        padding_type=padding_type,
			
 
				+        override_params=override_params,
			
 
				+        use_se=use_se,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"])
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/ghostnet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/ghostnet.py
@@ -0,0 +1,363 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
			
 
				+
			
 
				+import math
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
			
 
				+from paddle.regularizer import L2Decay
			
 
				+from paddle.nn.initializer import Uniform, KaimingNormal
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "GhostNet_x0_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
			
 
				+    "GhostNet_x1_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
			
 
				+    "GhostNet_x1_3":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 act="relu",
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=(kernel_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=KaimingNormal(), name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        bn_name = name + "_bn"
			
 
				+
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_channels=out_channels,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(
			
 
				+                name=bn_name + "_scale", regularizer=L2Decay(0.0)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                name=bn_name + "_offset", regularizer=L2Decay(0.0)),
			
 
				+            moving_mean_name=bn_name + "_mean",
			
 
				+            moving_variance_name=bn_name + "_variance")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class SEBlock(nn.Layer):
			
 
				+    def __init__(self, num_channels, reduction_ratio=4, name=None):
			
 
				+        super(SEBlock, self).__init__()
			
 
				+        self.pool2d_gap = AdaptiveAvgPool2D(1)
			
 
				+        self._num_channels = num_channels
			
 
				+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
			
 
				+        med_ch = num_channels // reduction_ratio
			
 
				+        self.squeeze = Linear(
			
 
				+            num_channels,
			
 
				+            med_ch,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + "_1_offset"))
			
 
				+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
			
 
				+        self.excitation = Linear(
			
 
				+            med_ch,
			
 
				+            num_channels,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + "_2_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        pool = self.pool2d_gap(inputs)
			
 
				+        pool = paddle.squeeze(pool, axis=[2, 3])
			
 
				+        squeeze = self.squeeze(pool)
			
 
				+        squeeze = F.relu(squeeze)
			
 
				+        excitation = self.excitation(squeeze)
			
 
				+        excitation = paddle.clip(x=excitation, min=0, max=1)
			
 
				+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
			
 
				+        out = paddle.multiply(inputs, excitation)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class GhostModule(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 output_channels,
			
 
				+                 kernel_size=1,
			
 
				+                 ratio=2,
			
 
				+                 dw_size=3,
			
 
				+                 stride=1,
			
 
				+                 relu=True,
			
 
				+                 name=None):
			
 
				+        super(GhostModule, self).__init__()
			
 
				+        init_channels = int(math.ceil(output_channels / ratio))
			
 
				+        new_channels = int(init_channels * (ratio - 1))
			
 
				+        self.primary_conv = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=init_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            groups=1,
			
 
				+            act="relu" if relu else None,
			
 
				+            name=name + "_primary_conv")
			
 
				+        self.cheap_operation = ConvBNLayer(
			
 
				+            in_channels=init_channels,
			
 
				+            out_channels=new_channels,
			
 
				+            kernel_size=dw_size,
			
 
				+            stride=1,
			
 
				+            groups=init_channels,
			
 
				+            act="relu" if relu else None,
			
 
				+            name=name + "_cheap_operation")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.primary_conv(inputs)
			
 
				+        y = self.cheap_operation(x)
			
 
				+        out = paddle.concat([x, y], axis=1)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class GhostBottleneck(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 hidden_dim,
			
 
				+                 output_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 use_se,
			
 
				+                 name=None):
			
 
				+        super(GhostBottleneck, self).__init__()
			
 
				+        self._stride = stride
			
 
				+        self._use_se = use_se
			
 
				+        self._num_channels = in_channels
			
 
				+        self._output_channels = output_channels
			
 
				+        self.ghost_module_1 = GhostModule(
			
 
				+            in_channels=in_channels,
			
 
				+            output_channels=hidden_dim,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            relu=True,
			
 
				+            name=name + "_ghost_module_1")
			
 
				+        if stride == 2:
			
 
				+            self.depthwise_conv = ConvBNLayer(
			
 
				+                in_channels=hidden_dim,
			
 
				+                out_channels=hidden_dim,
			
 
				+                kernel_size=kernel_size,
			
 
				+                stride=stride,
			
 
				+                groups=hidden_dim,
			
 
				+                act=None,
			
 
				+                name=name +
			
 
				+                "_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
			
 
				+            )
			
 
				+        if use_se:
			
 
				+            self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
			
 
				+        self.ghost_module_2 = GhostModule(
			
 
				+            in_channels=hidden_dim,
			
 
				+            output_channels=output_channels,
			
 
				+            kernel_size=1,
			
 
				+            relu=False,
			
 
				+            name=name + "_ghost_module_2")
			
 
				+        if stride != 1 or in_channels != output_channels:
			
 
				+            self.shortcut_depthwise = ConvBNLayer(
			
 
				+                in_channels=in_channels,
			
 
				+                out_channels=in_channels,
			
 
				+                kernel_size=kernel_size,
			
 
				+                stride=stride,
			
 
				+                groups=in_channels,
			
 
				+                act=None,
			
 
				+                name=name +
			
 
				+                "_shortcut_depthwise_depthwise"  # looks strange due to an old typo, will be fixed later.
			
 
				+            )
			
 
				+            self.shortcut_conv = ConvBNLayer(
			
 
				+                in_channels=in_channels,
			
 
				+                out_channels=output_channels,
			
 
				+                kernel_size=1,
			
 
				+                stride=1,
			
 
				+                groups=1,
			
 
				+                act=None,
			
 
				+                name=name + "_shortcut_conv")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.ghost_module_1(inputs)
			
 
				+        if self._stride == 2:
			
 
				+            x = self.depthwise_conv(x)
			
 
				+        if self._use_se:
			
 
				+            x = self.se_block(x)
			
 
				+        x = self.ghost_module_2(x)
			
 
				+        if self._stride == 1 and self._num_channels == self._output_channels:
			
 
				+            shortcut = inputs
			
 
				+        else:
			
 
				+            shortcut = self.shortcut_depthwise(inputs)
			
 
				+            shortcut = self.shortcut_conv(shortcut)
			
 
				+        return paddle.add(x=x, y=shortcut)
			
 
				+
			
 
				+
			
 
				+class GhostNet(nn.Layer):
			
 
				+    def __init__(self, scale, class_num=1000):
			
 
				+        super(GhostNet, self).__init__()
			
 
				+        self.cfgs = [
			
 
				+            # k, t, c, SE, s
			
 
				+            [3, 16, 16, 0, 1],
			
 
				+            [3, 48, 24, 0, 2],
			
 
				+            [3, 72, 24, 0, 1],
			
 
				+            [5, 72, 40, 1, 2],
			
 
				+            [5, 120, 40, 1, 1],
			
 
				+            [3, 240, 80, 0, 2],
			
 
				+            [3, 200, 80, 0, 1],
			
 
				+            [3, 184, 80, 0, 1],
			
 
				+            [3, 184, 80, 0, 1],
			
 
				+            [3, 480, 112, 1, 1],
			
 
				+            [3, 672, 112, 1, 1],
			
 
				+            [5, 672, 160, 1, 2],
			
 
				+            [5, 960, 160, 0, 1],
			
 
				+            [5, 960, 160, 1, 1],
			
 
				+            [5, 960, 160, 0, 1],
			
 
				+            [5, 960, 160, 1, 1]
			
 
				+        ]
			
 
				+        self.scale = scale
			
 
				+        output_channels = int(self._make_divisible(16 * self.scale, 4))
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            in_channels=3,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            groups=1,
			
 
				+            act="relu",
			
 
				+            name="conv1")
			
 
				+        # build inverted residual blocks
			
 
				+        idx = 0
			
 
				+        self.ghost_bottleneck_list = []
			
 
				+        for k, exp_size, c, use_se, s in self.cfgs:
			
 
				+            in_channels = output_channels
			
 
				+            output_channels = int(self._make_divisible(c * self.scale, 4))
			
 
				+            hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
			
 
				+            ghost_bottleneck = self.add_sublayer(
			
 
				+                name="_ghostbottleneck_" + str(idx),
			
 
				+                sublayer=GhostBottleneck(
			
 
				+                    in_channels=in_channels,
			
 
				+                    hidden_dim=hidden_dim,
			
 
				+                    output_channels=output_channels,
			
 
				+                    kernel_size=k,
			
 
				+                    stride=s,
			
 
				+                    use_se=use_se,
			
 
				+                    name="_ghostbottleneck_" + str(idx)))
			
 
				+            self.ghost_bottleneck_list.append(ghost_bottleneck)
			
 
				+            idx += 1
			
 
				+        # build last several layers
			
 
				+        in_channels = output_channels
			
 
				+        output_channels = int(self._make_divisible(exp_size * self.scale, 4))
			
 
				+        self.conv_last = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            act="relu",
			
 
				+            name="conv_last")
			
 
				+        self.pool2d_gap = AdaptiveAvgPool2D(1)
			
 
				+        in_channels = output_channels
			
 
				+        self._fc0_output_channels = 1280
			
 
				+        self.fc_0 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=self._fc0_output_channels,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            act="relu",
			
 
				+            name="fc_0")
			
 
				+        self.dropout = nn.Dropout(p=0.2)
			
 
				+        stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0)
			
 
				+        self.fc_1 = Linear(
			
 
				+            self._fc0_output_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name="fc_1_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(name="fc_1_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self.conv1(inputs)
			
 
				+        for ghost_bottleneck in self.ghost_bottleneck_list:
			
 
				+            x = ghost_bottleneck(x)
			
 
				+        x = self.conv_last(x)
			
 
				+        x = self.pool2d_gap(x)
			
 
				+        x = self.fc_0(x)
			
 
				+        x = self.dropout(x)
			
 
				+        x = paddle.reshape(x, shape=[-1, self._fc0_output_channels])
			
 
				+        x = self.fc_1(x)
			
 
				+        return x
			
 
				+
			
 
				+    def _make_divisible(self, v, divisor, min_value=None):
			
 
				+        """
			
 
				+        This function is taken from the original tf repo.
			
 
				+        It ensures that all layers have a channel number that is divisible by 8
			
 
				+        It can be seen here:
			
 
				+        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
			
 
				+        """
			
 
				+        if min_value is None:
			
 
				+            min_value = divisor
			
 
				+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
			
 
				+        # Make sure that round down does not go down by more than 10%.
			
 
				+        if new_v < 0.9 * v:
			
 
				+            new_v += divisor
			
 
				+        return new_v
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = GhostNet(scale=0.5, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = GhostNet(scale=1.0, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = GhostNet(scale=1.3, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/googlenet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/googlenet.py
@@ -0,0 +1,229 @@
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "GoogLeNet":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+def xavier(channels, filter_size, name):
			
 
				+    stdv = (3.0 / (filter_size**2 * channels))**0.5
			
 
				+    param_attr = ParamAttr(
			
 
				+        initializer=Uniform(-stdv, stdv), name=name + "_weights")
			
 
				+    return param_attr
			
 
				+
			
 
				+
			
 
				+class ConvLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class Inception(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 filter1,
			
 
				+                 filter3R,
			
 
				+                 filter3,
			
 
				+                 filter5R,
			
 
				+                 filter5,
			
 
				+                 proj,
			
 
				+                 name=None):
			
 
				+        super(Inception, self).__init__()
			
 
				+
			
 
				+        self._conv1 = ConvLayer(
			
 
				+            input_channels, filter1, 1, name="inception_" + name + "_1x1")
			
 
				+        self._conv3r = ConvLayer(
			
 
				+            input_channels,
			
 
				+            filter3R,
			
 
				+            1,
			
 
				+            name="inception_" + name + "_3x3_reduce")
			
 
				+        self._conv3 = ConvLayer(
			
 
				+            filter3R, filter3, 3, name="inception_" + name + "_3x3")
			
 
				+        self._conv5r = ConvLayer(
			
 
				+            input_channels,
			
 
				+            filter5R,
			
 
				+            1,
			
 
				+            name="inception_" + name + "_5x5_reduce")
			
 
				+        self._conv5 = ConvLayer(
			
 
				+            filter5R, filter5, 5, name="inception_" + name + "_5x5")
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=1, padding=1)
			
 
				+
			
 
				+        self._convprj = ConvLayer(
			
 
				+            input_channels, proj, 1, name="inception_" + name + "_3x3_proj")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        conv1 = self._conv1(inputs)
			
 
				+
			
 
				+        conv3r = self._conv3r(inputs)
			
 
				+        conv3 = self._conv3(conv3r)
			
 
				+
			
 
				+        conv5r = self._conv5r(inputs)
			
 
				+        conv5 = self._conv5(conv5r)
			
 
				+
			
 
				+        pool = self._pool(inputs)
			
 
				+        convprj = self._convprj(pool)
			
 
				+
			
 
				+        cat = paddle.concat([conv1, conv3, conv5, convprj], axis=1)
			
 
				+        cat = F.relu(cat)
			
 
				+        return cat
			
 
				+
			
 
				+
			
 
				+class GoogLeNetDY(nn.Layer):
			
 
				+    def __init__(self, class_num=1000):
			
 
				+        super(GoogLeNetDY, self).__init__()
			
 
				+        self._conv = ConvLayer(3, 64, 7, 2, name="conv1")
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=2)
			
 
				+        self._conv_1 = ConvLayer(64, 64, 1, name="conv2_1x1")
			
 
				+        self._conv_2 = ConvLayer(64, 192, 3, name="conv2_3x3")
			
 
				+
			
 
				+        self._ince3a = Inception(
			
 
				+            192, 192, 64, 96, 128, 16, 32, 32, name="ince3a")
			
 
				+        self._ince3b = Inception(
			
 
				+            256, 256, 128, 128, 192, 32, 96, 64, name="ince3b")
			
 
				+
			
 
				+        self._ince4a = Inception(
			
 
				+            480, 480, 192, 96, 208, 16, 48, 64, name="ince4a")
			
 
				+        self._ince4b = Inception(
			
 
				+            512, 512, 160, 112, 224, 24, 64, 64, name="ince4b")
			
 
				+        self._ince4c = Inception(
			
 
				+            512, 512, 128, 128, 256, 24, 64, 64, name="ince4c")
			
 
				+        self._ince4d = Inception(
			
 
				+            512, 512, 112, 144, 288, 32, 64, 64, name="ince4d")
			
 
				+        self._ince4e = Inception(
			
 
				+            528, 528, 256, 160, 320, 32, 128, 128, name="ince4e")
			
 
				+
			
 
				+        self._ince5a = Inception(
			
 
				+            832, 832, 256, 160, 320, 32, 128, 128, name="ince5a")
			
 
				+        self._ince5b = Inception(
			
 
				+            832, 832, 384, 192, 384, 48, 128, 128, name="ince5b")
			
 
				+
			
 
				+        self._pool_5 = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self._drop = Dropout(p=0.4, mode="downscale_in_infer")
			
 
				+        self._fc_out = Linear(
			
 
				+            1024,
			
 
				+            class_num,
			
 
				+            weight_attr=xavier(1024, 1, "out"),
			
 
				+            bias_attr=ParamAttr(name="out_offset"))
			
 
				+        self._pool_o1 = AvgPool2D(kernel_size=5, stride=3)
			
 
				+        self._conv_o1 = ConvLayer(512, 128, 1, name="conv_o1")
			
 
				+        self._fc_o1 = Linear(
			
 
				+            1152,
			
 
				+            1024,
			
 
				+            weight_attr=xavier(2048, 1, "fc_o1"),
			
 
				+            bias_attr=ParamAttr(name="fc_o1_offset"))
			
 
				+        self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer")
			
 
				+        self._out1 = Linear(
			
 
				+            1024,
			
 
				+            class_num,
			
 
				+            weight_attr=xavier(1024, 1, "out1"),
			
 
				+            bias_attr=ParamAttr(name="out1_offset"))
			
 
				+        self._pool_o2 = AvgPool2D(kernel_size=5, stride=3)
			
 
				+        self._conv_o2 = ConvLayer(528, 128, 1, name="conv_o2")
			
 
				+        self._fc_o2 = Linear(
			
 
				+            1152,
			
 
				+            1024,
			
 
				+            weight_attr=xavier(2048, 1, "fc_o2"),
			
 
				+            bias_attr=ParamAttr(name="fc_o2_offset"))
			
 
				+        self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer")
			
 
				+        self._out2 = Linear(
			
 
				+            1024,
			
 
				+            class_num,
			
 
				+            weight_attr=xavier(1024, 1, "out2"),
			
 
				+            bias_attr=ParamAttr(name="out2_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv(inputs)
			
 
				+        x = self._pool(x)
			
 
				+        x = self._conv_1(x)
			
 
				+        x = self._conv_2(x)
			
 
				+        x = self._pool(x)
			
 
				+
			
 
				+        x = self._ince3a(x)
			
 
				+        x = self._ince3b(x)
			
 
				+        x = self._pool(x)
			
 
				+
			
 
				+        ince4a = self._ince4a(x)
			
 
				+        x = self._ince4b(ince4a)
			
 
				+        x = self._ince4c(x)
			
 
				+        ince4d = self._ince4d(x)
			
 
				+        x = self._ince4e(ince4d)
			
 
				+        x = self._pool(x)
			
 
				+
			
 
				+        x = self._ince5a(x)
			
 
				+        ince5b = self._ince5b(x)
			
 
				+
			
 
				+        x = self._pool_5(ince5b)
			
 
				+        x = self._drop(x)
			
 
				+        x = paddle.squeeze(x, axis=[2, 3])
			
 
				+        out = self._fc_out(x)
			
 
				+
			
 
				+        x = self._pool_o1(ince4a)
			
 
				+        x = self._conv_o1(x)
			
 
				+        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
			
 
				+        x = self._fc_o1(x)
			
 
				+        x = F.relu(x)
			
 
				+        x = self._drop_o1(x)
			
 
				+        out1 = self._out1(x)
			
 
				+
			
 
				+        x = self._pool_o2(ince4d)
			
 
				+        x = self._conv_o2(x)
			
 
				+        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
			
 
				+        x = self._fc_o2(x)
			
 
				+        x = self._drop_o2(x)
			
 
				+        out2 = self._out2(x)
			
 
				+        return [out, out1, out2]
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def GoogLeNet(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = GoogLeNetDY(**kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/gvt.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/gvt.py
@@ -0,0 +1,693 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/Meituan-AutoML/Twins
			
 
				+
			
 
				+from functools import partial
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.regularizer import L2Decay
			
 
				+
			
 
				+from .vision_transformer import trunc_normal_, normal_, zeros_, ones_, to_2tuple, DropPath, Identity, Mlp
			
 
				+from .vision_transformer import Block as ViTBlock
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "pcpvt_small":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams",
			
 
				+    "pcpvt_base":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams",
			
 
				+    "pcpvt_large":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams",
			
 
				+    "alt_gvt_small":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams",
			
 
				+    "alt_gvt_base":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams",
			
 
				+    "alt_gvt_large":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class GroupAttention(nn.Layer):
			
 
				+    """LSA: self attention within a group.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads=8,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 attn_drop=0.,
			
 
				+                 proj_drop=0.,
			
 
				+                 ws=1):
			
 
				+        super().__init__()
			
 
				+        if ws == 1:
			
 
				+            raise Exception("ws {ws} should not be 1")
			
 
				+        if dim % num_heads != 0:
			
 
				+            raise Exception(
			
 
				+                "dim {dim} should be divided by num_heads {num_heads}.")
			
 
				+
			
 
				+        self.dim = dim
			
 
				+        self.num_heads = num_heads
			
 
				+        head_dim = dim // num_heads
			
 
				+        self.scale = qk_scale or head_dim**-0.5
			
 
				+
			
 
				+        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
			
 
				+        self.attn_drop = nn.Dropout(attn_drop)
			
 
				+        self.proj = nn.Linear(dim, dim)
			
 
				+        self.proj_drop = nn.Dropout(proj_drop)
			
 
				+        self.ws = ws
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        B, N, C = x.shape
			
 
				+        h_group, w_group = H // self.ws, W // self.ws
			
 
				+        total_groups = h_group * w_group
			
 
				+        x = x.reshape([B, h_group, self.ws, w_group, self.ws, C]).transpose(
			
 
				+            [0, 1, 3, 2, 4, 5])
			
 
				+        qkv = self.qkv(x).reshape([
			
 
				+            B, total_groups, self.ws**2, 3, self.num_heads, C // self.num_heads
			
 
				+        ]).transpose([3, 0, 1, 4, 2, 5])
			
 
				+        q, k, v = qkv[0], qkv[1], qkv[2]
			
 
				+        attn = paddle.matmul(q, k.transpose([0, 1, 2, 4, 3])) * self.scale
			
 
				+
			
 
				+        attn = nn.Softmax(axis=-1)(attn)
			
 
				+        attn = self.attn_drop(attn)
			
 
				+        attn = paddle.matmul(attn, v).transpose([0, 1, 3, 2, 4]).reshape(
			
 
				+            [B, h_group, w_group, self.ws, self.ws, C])
			
 
				+
			
 
				+        x = attn.transpose([0, 1, 3, 2, 4, 5]).reshape([B, N, C])
			
 
				+        x = self.proj(x)
			
 
				+        x = self.proj_drop(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Attention(nn.Layer):
			
 
				+    """GSA: using a key to summarize the information for a group to be efficient.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads=8,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 attn_drop=0.,
			
 
				+                 proj_drop=0.,
			
 
				+                 sr_ratio=1):
			
 
				+        super().__init__()
			
 
				+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
			
 
				+
			
 
				+        self.dim = dim
			
 
				+        self.num_heads = num_heads
			
 
				+        head_dim = dim // num_heads
			
 
				+        self.scale = qk_scale or head_dim**-0.5
			
 
				+
			
 
				+        self.q = nn.Linear(dim, dim, bias_attr=qkv_bias)
			
 
				+        self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias)
			
 
				+        self.attn_drop = nn.Dropout(attn_drop)
			
 
				+        self.proj = nn.Linear(dim, dim)
			
 
				+        self.proj_drop = nn.Dropout(proj_drop)
			
 
				+
			
 
				+        self.sr_ratio = sr_ratio
			
 
				+        if sr_ratio > 1:
			
 
				+            self.sr = nn.Conv2D(
			
 
				+                dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
			
 
				+            self.norm = nn.LayerNorm(dim)
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        B, N, C = x.shape
			
 
				+        q = self.q(x).reshape(
			
 
				+            [B, N, self.num_heads, C // self.num_heads]).transpose(
			
 
				+                [0, 2, 1, 3])
			
 
				+
			
 
				+        if self.sr_ratio > 1:
			
 
				+            x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
			
 
				+            tmp_n = H * W // self.sr_ratio**2
			
 
				+            x_ = self.sr(x_).reshape([B, C, tmp_n]).transpose([0, 2, 1])
			
 
				+            x_ = self.norm(x_)
			
 
				+            kv = self.kv(x_).reshape(
			
 
				+                [B, tmp_n, 2, self.num_heads, C // self.num_heads]).transpose(
			
 
				+                    [2, 0, 3, 1, 4])
			
 
				+        else:
			
 
				+            kv = self.kv(x).reshape(
			
 
				+                [B, N, 2, self.num_heads, C // self.num_heads]).transpose(
			
 
				+                    [2, 0, 3, 1, 4])
			
 
				+        k, v = kv[0], kv[1]
			
 
				+
			
 
				+        attn = paddle.matmul(q, k.transpose([0, 1, 3, 2])) * self.scale
			
 
				+        attn = nn.Softmax(axis=-1)(attn)
			
 
				+        attn = self.attn_drop(attn)
			
 
				+
			
 
				+        x = paddle.matmul(attn, v).transpose([0, 2, 1, 3]).reshape([B, N, C])
			
 
				+        x = self.proj(x)
			
 
				+        x = self.proj_drop(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Block(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop=0.,
			
 
				+                 attn_drop=0.,
			
 
				+                 drop_path=0.,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 sr_ratio=1):
			
 
				+        super().__init__()
			
 
				+        self.norm1 = norm_layer(dim)
			
 
				+        self.attn = Attention(
			
 
				+            dim,
			
 
				+            num_heads=num_heads,
			
 
				+            qkv_bias=qkv_bias,
			
 
				+            qk_scale=qk_scale,
			
 
				+            attn_drop=attn_drop,
			
 
				+            proj_drop=drop,
			
 
				+            sr_ratio=sr_ratio)
			
 
				+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
			
 
				+        self.norm2 = norm_layer(dim)
			
 
				+        mlp_hidden_dim = int(dim * mlp_ratio)
			
 
				+        self.mlp = Mlp(in_features=dim,
			
 
				+                       hidden_features=mlp_hidden_dim,
			
 
				+                       act_layer=act_layer,
			
 
				+                       drop=drop)
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
			
 
				+        x = x + self.drop_path(self.mlp(self.norm2(x)))
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SBlock(ViTBlock):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop=0.,
			
 
				+                 attn_drop=0.,
			
 
				+                 drop_path=0.,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 sr_ratio=1):
			
 
				+        super().__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop,
			
 
				+                         attn_drop, drop_path, act_layer, norm_layer)
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        return super().forward(x)
			
 
				+
			
 
				+
			
 
				+class GroupBlock(ViTBlock):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop=0.,
			
 
				+                 attn_drop=0.,
			
 
				+                 drop_path=0.,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 sr_ratio=1,
			
 
				+                 ws=1):
			
 
				+        super().__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop,
			
 
				+                         attn_drop, drop_path, act_layer, norm_layer)
			
 
				+        del self.attn
			
 
				+        if ws == 1:
			
 
				+            self.attn = Attention(dim, num_heads, qkv_bias, qk_scale,
			
 
				+                                  attn_drop, drop, sr_ratio)
			
 
				+        else:
			
 
				+            self.attn = GroupAttention(dim, num_heads, qkv_bias, qk_scale,
			
 
				+                                       attn_drop, drop, ws)
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
			
 
				+        x = x + self.drop_path(self.mlp(self.norm2(x)))
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class PatchEmbed(nn.Layer):
			
 
				+    """ Image to Patch Embedding.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
			
 
				+        super().__init__()
			
 
				+        if img_size % patch_size != 0:
			
 
				+            raise Exception(
			
 
				+                f"img_size {img_size} should be divided by patch_size {patch_size}."
			
 
				+            )
			
 
				+
			
 
				+        img_size = to_2tuple(img_size)
			
 
				+        patch_size = to_2tuple(patch_size)
			
 
				+
			
 
				+        self.img_size = img_size
			
 
				+        self.patch_size = patch_size
			
 
				+        self.H, self.W = img_size[0] // patch_size[0], img_size[
			
 
				+            1] // patch_size[1]
			
 
				+        self.num_patches = self.H * self.W
			
 
				+        self.proj = nn.Conv2D(
			
 
				+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
			
 
				+        self.norm = nn.LayerNorm(embed_dim)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        B, C, H, W = x.shape
			
 
				+        x = self.proj(x).flatten(2).transpose([0, 2, 1])
			
 
				+        x = self.norm(x)
			
 
				+        H, W = H // self.patch_size[0], W // self.patch_size[1]
			
 
				+        return x, (H, W)
			
 
				+
			
 
				+
			
 
				+# borrow from PVT https://github.com/whai362/PVT.git
			
 
				+class PyramidVisionTransformer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=16,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dims=[64, 128, 256, 512],
			
 
				+                 num_heads=[1, 2, 4, 8],
			
 
				+                 mlp_ratios=[4, 4, 4, 4],
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop_rate=0.,
			
 
				+                 attn_drop_rate=0.,
			
 
				+                 drop_path_rate=0.,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 depths=[3, 4, 6, 3],
			
 
				+                 sr_ratios=[8, 4, 2, 1],
			
 
				+                 block_cls=Block):
			
 
				+        super().__init__()
			
 
				+        self.class_num = class_num
			
 
				+        self.depths = depths
			
 
				+
			
 
				+        # patch_embed
			
 
				+        self.patch_embeds = nn.LayerList()
			
 
				+        self.pos_embeds = nn.ParameterList()
			
 
				+        self.pos_drops = nn.LayerList()
			
 
				+        self.blocks = nn.LayerList()
			
 
				+
			
 
				+        for i in range(len(depths)):
			
 
				+            if i == 0:
			
 
				+                self.patch_embeds.append(
			
 
				+                    PatchEmbed(img_size, patch_size, in_chans, embed_dims[i]))
			
 
				+            else:
			
 
				+                self.patch_embeds.append(
			
 
				+                    PatchEmbed(img_size // patch_size // 2**(i - 1), 2,
			
 
				+                               embed_dims[i - 1], embed_dims[i]))
			
 
				+            patch_num = self.patch_embeds[i].num_patches + 1 if i == len(
			
 
				+                embed_dims) - 1 else self.patch_embeds[i].num_patches
			
 
				+            self.pos_embeds.append(
			
 
				+                self.create_parameter(
			
 
				+                    shape=[1, patch_num, embed_dims[i]],
			
 
				+                    default_initializer=zeros_))
			
 
				+            self.pos_drops.append(nn.Dropout(p=drop_rate))
			
 
				+
			
 
				+        dpr = [
			
 
				+            x.numpy()[0]
			
 
				+            for x in paddle.linspace(0, drop_path_rate, sum(depths))
			
 
				+        ]  # stochastic depth decay rule
			
 
				+
			
 
				+        cur = 0
			
 
				+        for k in range(len(depths)):
			
 
				+            _block = nn.LayerList([
			
 
				+                block_cls(
			
 
				+                    dim=embed_dims[k],
			
 
				+                    num_heads=num_heads[k],
			
 
				+                    mlp_ratio=mlp_ratios[k],
			
 
				+                    qkv_bias=qkv_bias,
			
 
				+                    qk_scale=qk_scale,
			
 
				+                    drop=drop_rate,
			
 
				+                    attn_drop=attn_drop_rate,
			
 
				+                    drop_path=dpr[cur + i],
			
 
				+                    norm_layer=norm_layer,
			
 
				+                    sr_ratio=sr_ratios[k]) for i in range(depths[k])
			
 
				+            ])
			
 
				+            self.blocks.append(_block)
			
 
				+            cur += depths[k]
			
 
				+
			
 
				+        self.norm = norm_layer(embed_dims[-1])
			
 
				+
			
 
				+        # cls_token
			
 
				+        self.cls_token = self.create_parameter(
			
 
				+            shape=[1, 1, embed_dims[-1]],
			
 
				+            default_initializer=zeros_,
			
 
				+            attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+
			
 
				+        # classification head
			
 
				+        self.head = nn.Linear(embed_dims[-1],
			
 
				+                              class_num) if class_num > 0 else Identity()
			
 
				+
			
 
				+        # init weights
			
 
				+        for pos_emb in self.pos_embeds:
			
 
				+            trunc_normal_(pos_emb)
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+    def _init_weights(self, m):
			
 
				+        if isinstance(m, nn.Linear):
			
 
				+            trunc_normal_(m.weight)
			
 
				+            if isinstance(m, nn.Linear) and m.bias is not None:
			
 
				+                zeros_(m.bias)
			
 
				+        elif isinstance(m, nn.LayerNorm):
			
 
				+            zeros_(m.bias)
			
 
				+            ones_(m.weight)
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        B = x.shape[0]
			
 
				+        for i in range(len(self.depths)):
			
 
				+            x, (H, W) = self.patch_embeds[i](x)
			
 
				+            if i == len(self.depths) - 1:
			
 
				+                cls_tokens = self.cls_token.expand([B, -1, -1])
			
 
				+                x = paddle.concat([cls_tokens, x], dim=1)
			
 
				+            x = x + self.pos_embeds[i]
			
 
				+            x = self.pos_drops[i](x)
			
 
				+            for blk in self.blocks[i]:
			
 
				+                x = blk(x, H, W)
			
 
				+            if i < len(self.depths) - 1:
			
 
				+                x = x.reshape([B, H, W, -1]).transpose(
			
 
				+                    [0, 3, 1, 2]).contiguous()
			
 
				+        x = self.norm(x)
			
 
				+        return x[:, 0]
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.forward_features(x)
			
 
				+        x = self.head(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+# PEG  from https://arxiv.org/abs/2102.10882
			
 
				+class PosCNN(nn.Layer):
			
 
				+    def __init__(self, in_chans, embed_dim=768, s=1):
			
 
				+        super().__init__()
			
 
				+        self.proj = nn.Sequential(
			
 
				+            nn.Conv2D(
			
 
				+                in_chans,
			
 
				+                embed_dim,
			
 
				+                3,
			
 
				+                s,
			
 
				+                1,
			
 
				+                bias_attr=paddle.ParamAttr(regularizer=L2Decay(0.0)),
			
 
				+                groups=embed_dim,
			
 
				+                weight_attr=paddle.ParamAttr(regularizer=L2Decay(0.0)), ))
			
 
				+        self.s = s
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        B, N, C = x.shape
			
 
				+        feat_token = x
			
 
				+        cnn_feat = feat_token.transpose([0, 2, 1]).reshape([B, C, H, W])
			
 
				+        if self.s == 1:
			
 
				+            x = self.proj(cnn_feat) + cnn_feat
			
 
				+        else:
			
 
				+            x = self.proj(cnn_feat)
			
 
				+        x = x.flatten(2).transpose([0, 2, 1])
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class CPVTV2(PyramidVisionTransformer):
			
 
				+    """
			
 
				+    Use useful results from CPVT. PEG and GAP.
			
 
				+    Therefore, cls token is no longer required.
			
 
				+    PEG is used to encode the absolute position on the fly, which greatly affects the performance when input resolution
			
 
				+    changes during the training (such as segmentation, detection)
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=4,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dims=[64, 128, 256, 512],
			
 
				+                 num_heads=[1, 2, 4, 8],
			
 
				+                 mlp_ratios=[4, 4, 4, 4],
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop_rate=0.,
			
 
				+                 attn_drop_rate=0.,
			
 
				+                 drop_path_rate=0.,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 depths=[3, 4, 6, 3],
			
 
				+                 sr_ratios=[8, 4, 2, 1],
			
 
				+                 block_cls=Block):
			
 
				+        super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
			
 
				+                         num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
			
 
				+                         attn_drop_rate, drop_path_rate, norm_layer, depths,
			
 
				+                         sr_ratios, block_cls)
			
 
				+        del self.pos_embeds
			
 
				+        del self.cls_token
			
 
				+        self.pos_block = nn.LayerList(
			
 
				+            [PosCNN(embed_dim, embed_dim) for embed_dim in embed_dims])
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+    def _init_weights(self, m):
			
 
				+        import math
			
 
				+        if isinstance(m, nn.Linear):
			
 
				+            trunc_normal_(m.weight)
			
 
				+            if isinstance(m, nn.Linear) and m.bias is not None:
			
 
				+                zeros_(m.bias)
			
 
				+        elif isinstance(m, nn.LayerNorm):
			
 
				+            zeros_(m.bias)
			
 
				+            ones_(m.weight)
			
 
				+        elif isinstance(m, nn.Conv2D):
			
 
				+            fan_out = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
			
 
				+            fan_out //= m._groups
			
 
				+            normal_(0, math.sqrt(2.0 / fan_out))(m.weight)
			
 
				+            if m.bias is not None:
			
 
				+                zeros_(m.bias)
			
 
				+        elif isinstance(m, nn.BatchNorm2D):
			
 
				+            m.weight.data.fill_(1.0)
			
 
				+            m.bias.data.zero_()
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        B = x.shape[0]
			
 
				+
			
 
				+        for i in range(len(self.depths)):
			
 
				+            x, (H, W) = self.patch_embeds[i](x)
			
 
				+            x = self.pos_drops[i](x)
			
 
				+
			
 
				+            for j, blk in enumerate(self.blocks[i]):
			
 
				+                x = blk(x, H, W)
			
 
				+                if j == 0:
			
 
				+                    x = self.pos_block[i](x, H, W)  # PEG here
			
 
				+
			
 
				+            if i < len(self.depths) - 1:
			
 
				+                x = x.reshape([B, H, W, x.shape[-1]]).transpose([0, 3, 1, 2])
			
 
				+
			
 
				+        x = self.norm(x)
			
 
				+        return x.mean(axis=1)  # GAP here
			
 
				+
			
 
				+
			
 
				+class PCPVT(CPVTV2):
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=4,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dims=[64, 128, 256],
			
 
				+                 num_heads=[1, 2, 4],
			
 
				+                 mlp_ratios=[4, 4, 4],
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop_rate=0.,
			
 
				+                 attn_drop_rate=0.,
			
 
				+                 drop_path_rate=0.,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 depths=[4, 4, 4],
			
 
				+                 sr_ratios=[4, 2, 1],
			
 
				+                 block_cls=SBlock):
			
 
				+        super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
			
 
				+                         num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
			
 
				+                         attn_drop_rate, drop_path_rate, norm_layer, depths,
			
 
				+                         sr_ratios, block_cls)
			
 
				+
			
 
				+
			
 
				+class ALTGVT(PCPVT):
			
 
				+    """
			
 
				+    alias Twins-SVT
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=4,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dims=[64, 128, 256],
			
 
				+                 num_heads=[1, 2, 4],
			
 
				+                 mlp_ratios=[4, 4, 4],
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop_rate=0.,
			
 
				+                 attn_drop_rate=0.,
			
 
				+                 drop_path_rate=0.,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 depths=[4, 4, 4],
			
 
				+                 sr_ratios=[4, 2, 1],
			
 
				+                 block_cls=GroupBlock,
			
 
				+                 wss=[7, 7, 7]):
			
 
				+        super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
			
 
				+                         num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
			
 
				+                         attn_drop_rate, drop_path_rate, norm_layer, depths,
			
 
				+                         sr_ratios, block_cls)
			
 
				+        del self.blocks
			
 
				+        self.wss = wss
			
 
				+        # transformer encoder
			
 
				+        dpr = [
			
 
				+            x.numpy()[0]
			
 
				+            for x in paddle.linspace(0, drop_path_rate, sum(depths))
			
 
				+        ]  # stochastic depth decay rule
			
 
				+        cur = 0
			
 
				+        self.blocks = nn.LayerList()
			
 
				+        for k in range(len(depths)):
			
 
				+            _block = nn.LayerList([
			
 
				+                block_cls(
			
 
				+                    dim=embed_dims[k],
			
 
				+                    num_heads=num_heads[k],
			
 
				+                    mlp_ratio=mlp_ratios[k],
			
 
				+                    qkv_bias=qkv_bias,
			
 
				+                    qk_scale=qk_scale,
			
 
				+                    drop=drop_rate,
			
 
				+                    attn_drop=attn_drop_rate,
			
 
				+                    drop_path=dpr[cur + i],
			
 
				+                    norm_layer=norm_layer,
			
 
				+                    sr_ratio=sr_ratios[k],
			
 
				+                    ws=1 if i % 2 == 1 else wss[k]) for i in range(depths[k])
			
 
				+            ])
			
 
				+            self.blocks.append(_block)
			
 
				+            cur += depths[k]
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def pcpvt_small(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = CPVTV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[3, 4, 6, 3],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def pcpvt_base(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = CPVTV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[3, 4, 18, 3],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def pcpvt_large(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = CPVTV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[3, 8, 27, 3],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def alt_gvt_small(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ALTGVT(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 256, 512],
			
 
				+        num_heads=[2, 4, 8, 16],
			
 
				+        mlp_ratios=[4, 4, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[2, 2, 10, 4],
			
 
				+        wss=[7, 7, 7, 7],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def alt_gvt_base(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ALTGVT(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[96, 192, 384, 768],
			
 
				+        num_heads=[3, 6, 12, 24],
			
 
				+        mlp_ratios=[4, 4, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[2, 2, 18, 2],
			
 
				+        wss=[7, 7, 7, 7],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def alt_gvt_large(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ALTGVT(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[128, 256, 512, 1024],
			
 
				+        num_heads=[4, 8, 16, 32],
			
 
				+        mlp_ratios=[4, 4, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[2, 2, 18, 2],
			
 
				+        wss=[7, 7, 7, 7],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/hardnet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/hardnet.py
@@ -0,0 +1,293 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/PingoLH/Pytorch-HarDNet
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    'HarDNet39_ds':
			
 
				+    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams',
			
 
				+    'HarDNet68_ds':
			
 
				+    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_ds_pretrained.pdparams',
			
 
				+    'HarDNet68':
			
 
				+    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_pretrained.pdparams',
			
 
				+    'HarDNet85':
			
 
				+    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams'
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+
			
 
				+
			
 
				+def ConvLayer(in_channels,
			
 
				+              out_channels,
			
 
				+              kernel_size=3,
			
 
				+              stride=1,
			
 
				+              bias_attr=False):
			
 
				+    layer = nn.Sequential(
			
 
				+        ('conv', nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=kernel_size // 2,
			
 
				+            groups=1,
			
 
				+            bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)),
			
 
				+        ('relu', nn.ReLU6()))
			
 
				+    return layer
			
 
				+
			
 
				+
			
 
				+def DWConvLayer(in_channels,
			
 
				+                out_channels,
			
 
				+                kernel_size=3,
			
 
				+                stride=1,
			
 
				+                bias_attr=False):
			
 
				+    layer = nn.Sequential(
			
 
				+        ('dwconv', nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=out_channels,
			
 
				+            bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)))
			
 
				+    return layer
			
 
				+
			
 
				+
			
 
				+def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
			
 
				+    layer = nn.Sequential(
			
 
				+        ('layer1', ConvLayer(
			
 
				+            in_channels, out_channels, kernel_size=kernel_size)),
			
 
				+        ('layer2', DWConvLayer(
			
 
				+            out_channels, out_channels, stride=stride)))
			
 
				+    return layer
			
 
				+
			
 
				+
			
 
				+class HarDBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 growth_rate,
			
 
				+                 grmul,
			
 
				+                 n_layers,
			
 
				+                 keepBase=False,
			
 
				+                 residual_out=False,
			
 
				+                 dwconv=False):
			
 
				+        super().__init__()
			
 
				+        self.keepBase = keepBase
			
 
				+        self.links = []
			
 
				+        layers_ = []
			
 
				+        self.out_channels = 0  # if upsample else in_channels
			
 
				+        for i in range(n_layers):
			
 
				+            outch, inch, link = self.get_link(i + 1, in_channels, growth_rate,
			
 
				+                                              grmul)
			
 
				+            self.links.append(link)
			
 
				+            if dwconv:
			
 
				+                layers_.append(CombConvLayer(inch, outch))
			
 
				+            else:
			
 
				+                layers_.append(ConvLayer(inch, outch))
			
 
				+
			
 
				+            if (i % 2 == 0) or (i == n_layers - 1):
			
 
				+                self.out_channels += outch
			
 
				+        # print("Blk out =",self.out_channels)
			
 
				+        self.layers = nn.LayerList(layers_)
			
 
				+
			
 
				+    def get_link(self, layer, base_ch, growth_rate, grmul):
			
 
				+        if layer == 0:
			
 
				+            return base_ch, 0, []
			
 
				+        out_channels = growth_rate
			
 
				+
			
 
				+        link = []
			
 
				+        for i in range(10):
			
 
				+            dv = 2**i
			
 
				+            if layer % dv == 0:
			
 
				+                k = layer - dv
			
 
				+                link.append(k)
			
 
				+                if i > 0:
			
 
				+                    out_channels *= grmul
			
 
				+
			
 
				+        out_channels = int(int(out_channels + 1) / 2) * 2
			
 
				+        in_channels = 0
			
 
				+
			
 
				+        for i in link:
			
 
				+            ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
			
 
				+            in_channels += ch
			
 
				+
			
 
				+        return out_channels, in_channels, link
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        layers_ = [x]
			
 
				+
			
 
				+        for layer in range(len(self.layers)):
			
 
				+            link = self.links[layer]
			
 
				+            tin = []
			
 
				+            for i in link:
			
 
				+                tin.append(layers_[i])
			
 
				+            if len(tin) > 1:
			
 
				+                x = paddle.concat(tin, 1)
			
 
				+            else:
			
 
				+                x = tin[0]
			
 
				+            out = self.layers[layer](x)
			
 
				+            layers_.append(out)
			
 
				+
			
 
				+        t = len(layers_)
			
 
				+        out_ = []
			
 
				+        for i in range(t):
			
 
				+            if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1):
			
 
				+                out_.append(layers_[i])
			
 
				+        out = paddle.concat(out_, 1)
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class HarDNet(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 depth_wise=False,
			
 
				+                 arch=85,
			
 
				+                 class_num=1000,
			
 
				+                 with_pool=True):
			
 
				+        super().__init__()
			
 
				+        first_ch = [32, 64]
			
 
				+        second_kernel = 3
			
 
				+        max_pool = True
			
 
				+        grmul = 1.7
			
 
				+        drop_rate = 0.1
			
 
				+
			
 
				+        # HarDNet68
			
 
				+        ch_list = [128, 256, 320, 640, 1024]
			
 
				+        gr = [14, 16, 20, 40, 160]
			
 
				+        n_layers = [8, 16, 16, 16, 4]
			
 
				+        downSamp = [1, 0, 1, 1, 0]
			
 
				+
			
 
				+        if arch == 85:
			
 
				+            # HarDNet85
			
 
				+            first_ch = [48, 96]
			
 
				+            ch_list = [192, 256, 320, 480, 720, 1280]
			
 
				+            gr = [24, 24, 28, 36, 48, 256]
			
 
				+            n_layers = [8, 16, 16, 16, 16, 4]
			
 
				+            downSamp = [1, 0, 1, 0, 1, 0]
			
 
				+            drop_rate = 0.2
			
 
				+
			
 
				+        elif arch == 39:
			
 
				+            # HarDNet39
			
 
				+            first_ch = [24, 48]
			
 
				+            ch_list = [96, 320, 640, 1024]
			
 
				+            grmul = 1.6
			
 
				+            gr = [16, 20, 64, 160]
			
 
				+            n_layers = [4, 16, 8, 4]
			
 
				+            downSamp = [1, 1, 1, 0]
			
 
				+
			
 
				+        if depth_wise:
			
 
				+            second_kernel = 1
			
 
				+            max_pool = False
			
 
				+            drop_rate = 0.05
			
 
				+
			
 
				+        blks = len(n_layers)
			
 
				+        self.base = nn.LayerList([])
			
 
				+
			
 
				+        # First Layer: Standard Conv3x3, Stride=2
			
 
				+        self.base.append(
			
 
				+            ConvLayer(
			
 
				+                in_channels=3,
			
 
				+                out_channels=first_ch[0],
			
 
				+                kernel_size=3,
			
 
				+                stride=2,
			
 
				+                bias_attr=False))
			
 
				+
			
 
				+        # Second Layer
			
 
				+        self.base.append(
			
 
				+            ConvLayer(
			
 
				+                first_ch[0], first_ch[1], kernel_size=second_kernel))
			
 
				+
			
 
				+        # Maxpooling or DWConv3x3 downsampling
			
 
				+        if max_pool:
			
 
				+            self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1))
			
 
				+        else:
			
 
				+            self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
			
 
				+
			
 
				+        # Build all HarDNet blocks
			
 
				+        ch = first_ch[1]
			
 
				+        for i in range(blks):
			
 
				+            blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
			
 
				+            ch = blk.out_channels
			
 
				+            self.base.append(blk)
			
 
				+
			
 
				+            if i == blks - 1 and arch == 85:
			
 
				+                self.base.append(nn.Dropout(0.1))
			
 
				+
			
 
				+            self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
			
 
				+            ch = ch_list[i]
			
 
				+            if downSamp[i] == 1:
			
 
				+                if max_pool:
			
 
				+                    self.base.append(nn.MaxPool2D(kernel_size=2, stride=2))
			
 
				+                else:
			
 
				+                    self.base.append(DWConvLayer(ch, ch, stride=2))
			
 
				+
			
 
				+        ch = ch_list[blks - 1]
			
 
				+
			
 
				+        layers = []
			
 
				+
			
 
				+        if with_pool:
			
 
				+            layers.append(nn.AdaptiveAvgPool2D((1, 1)))
			
 
				+
			
 
				+        if class_num > 0:
			
 
				+            layers.append(nn.Flatten())
			
 
				+            layers.append(nn.Dropout(drop_rate))
			
 
				+            layers.append(nn.Linear(ch, class_num))
			
 
				+
			
 
				+        self.base.append(nn.Sequential(*layers))
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        for layer in self.base:
			
 
				+            x = layer(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def HarDNet39_ds(pretrained=False, **kwargs):
			
 
				+    model = HarDNet(arch=39, depth_wise=True, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HarDNet68_ds(pretrained=False, **kwargs):
			
 
				+    model = HarDNet(arch=68, depth_wise=True, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HarDNet68(pretrained=False, **kwargs):
			
 
				+    model = HarDNet(arch=68, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def HarDNet85(pretrained=False, **kwargs):
			
 
				+    model = HarDNet(arch=85, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"])
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/inception_v4.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/inception_v4.py
@@ -0,0 +1,477 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "InceptionV4":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 padding=0,
			
 
				+                 groups=1,
			
 
				+                 act='relu',
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        bn_name = name + "_bn"
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + "_scale"),
			
 
				+            bias_attr=ParamAttr(name=bn_name + "_offset"),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class InceptionStem(nn.Layer):
			
 
				+    def __init__(self):
			
 
				+        super(InceptionStem, self).__init__()
			
 
				+        self._conv_1 = ConvBNLayer(
			
 
				+            3, 32, 3, stride=2, act="relu", name="conv1_3x3_s2")
			
 
				+        self._conv_2 = ConvBNLayer(32, 32, 3, act="relu", name="conv2_3x3_s1")
			
 
				+        self._conv_3 = ConvBNLayer(
			
 
				+            32, 64, 3, padding=1, act="relu", name="conv3_3x3_s1")
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            64, 96, 3, stride=2, act="relu", name="inception_stem1_3x3_s2")
			
 
				+        self._conv1_1 = ConvBNLayer(
			
 
				+            160, 64, 1, act="relu", name="inception_stem2_3x3_reduce")
			
 
				+        self._conv1_2 = ConvBNLayer(
			
 
				+            64, 96, 3, act="relu", name="inception_stem2_3x3")
			
 
				+        self._conv2_1 = ConvBNLayer(
			
 
				+            160, 64, 1, act="relu", name="inception_stem2_1x7_reduce")
			
 
				+        self._conv2_2 = ConvBNLayer(
			
 
				+            64,
			
 
				+            64, (7, 1),
			
 
				+            padding=(3, 0),
			
 
				+            act="relu",
			
 
				+            name="inception_stem2_1x7")
			
 
				+        self._conv2_3 = ConvBNLayer(
			
 
				+            64,
			
 
				+            64, (1, 7),
			
 
				+            padding=(0, 3),
			
 
				+            act="relu",
			
 
				+            name="inception_stem2_7x1")
			
 
				+        self._conv2_4 = ConvBNLayer(
			
 
				+            64, 96, 3, act="relu", name="inception_stem2_3x3_2")
			
 
				+        self._conv3 = ConvBNLayer(
			
 
				+            192, 192, 3, stride=2, act="relu", name="inception_stem3_3x3_s2")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        conv = self._conv_1(inputs)
			
 
				+        conv = self._conv_2(conv)
			
 
				+        conv = self._conv_3(conv)
			
 
				+
			
 
				+        pool1 = self._pool(conv)
			
 
				+        conv2 = self._conv2(conv)
			
 
				+        concat = paddle.concat([pool1, conv2], axis=1)
			
 
				+
			
 
				+        conv1 = self._conv1_1(concat)
			
 
				+        conv1 = self._conv1_2(conv1)
			
 
				+
			
 
				+        conv2 = self._conv2_1(concat)
			
 
				+        conv2 = self._conv2_2(conv2)
			
 
				+        conv2 = self._conv2_3(conv2)
			
 
				+        conv2 = self._conv2_4(conv2)
			
 
				+
			
 
				+        concat = paddle.concat([conv1, conv2], axis=1)
			
 
				+
			
 
				+        conv1 = self._conv3(concat)
			
 
				+        pool1 = self._pool(concat)
			
 
				+
			
 
				+        concat = paddle.concat([conv1, pool1], axis=1)
			
 
				+        return concat
			
 
				+
			
 
				+
			
 
				+class InceptionA(nn.Layer):
			
 
				+    def __init__(self, name):
			
 
				+        super(InceptionA, self).__init__()
			
 
				+        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            384, 96, 1, act="relu", name="inception_a" + name + "_1x1")
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            384, 96, 1, act="relu", name="inception_a" + name + "_1x1_2")
			
 
				+        self._conv3_1 = ConvBNLayer(
			
 
				+            384, 64, 1, act="relu", name="inception_a" + name + "_3x3_reduce")
			
 
				+        self._conv3_2 = ConvBNLayer(
			
 
				+            64,
			
 
				+            96,
			
 
				+            3,
			
 
				+            padding=1,
			
 
				+            act="relu",
			
 
				+            name="inception_a" + name + "_3x3")
			
 
				+        self._conv4_1 = ConvBNLayer(
			
 
				+            384,
			
 
				+            64,
			
 
				+            1,
			
 
				+            act="relu",
			
 
				+            name="inception_a" + name + "_3x3_2_reduce")
			
 
				+        self._conv4_2 = ConvBNLayer(
			
 
				+            64,
			
 
				+            96,
			
 
				+            3,
			
 
				+            padding=1,
			
 
				+            act="relu",
			
 
				+            name="inception_a" + name + "_3x3_2")
			
 
				+        self._conv4_3 = ConvBNLayer(
			
 
				+            96,
			
 
				+            96,
			
 
				+            3,
			
 
				+            padding=1,
			
 
				+            act="relu",
			
 
				+            name="inception_a" + name + "_3x3_3")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        pool1 = self._pool(inputs)
			
 
				+        conv1 = self._conv1(pool1)
			
 
				+
			
 
				+        conv2 = self._conv2(inputs)
			
 
				+
			
 
				+        conv3 = self._conv3_1(inputs)
			
 
				+        conv3 = self._conv3_2(conv3)
			
 
				+
			
 
				+        conv4 = self._conv4_1(inputs)
			
 
				+        conv4 = self._conv4_2(conv4)
			
 
				+        conv4 = self._conv4_3(conv4)
			
 
				+
			
 
				+        concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
			
 
				+        return concat
			
 
				+
			
 
				+
			
 
				+class ReductionA(nn.Layer):
			
 
				+    def __init__(self):
			
 
				+        super(ReductionA, self).__init__()
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            384, 384, 3, stride=2, act="relu", name="reduction_a_3x3")
			
 
				+        self._conv3_1 = ConvBNLayer(
			
 
				+            384, 192, 1, act="relu", name="reduction_a_3x3_2_reduce")
			
 
				+        self._conv3_2 = ConvBNLayer(
			
 
				+            192, 224, 3, padding=1, act="relu", name="reduction_a_3x3_2")
			
 
				+        self._conv3_3 = ConvBNLayer(
			
 
				+            224, 256, 3, stride=2, act="relu", name="reduction_a_3x3_3")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        pool1 = self._pool(inputs)
			
 
				+        conv2 = self._conv2(inputs)
			
 
				+        conv3 = self._conv3_1(inputs)
			
 
				+        conv3 = self._conv3_2(conv3)
			
 
				+        conv3 = self._conv3_3(conv3)
			
 
				+        concat = paddle.concat([pool1, conv2, conv3], axis=1)
			
 
				+        return concat
			
 
				+
			
 
				+
			
 
				+class InceptionB(nn.Layer):
			
 
				+    def __init__(self, name=None):
			
 
				+        super(InceptionB, self).__init__()
			
 
				+        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            1024, 128, 1, act="relu", name="inception_b" + name + "_1x1")
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            1024, 384, 1, act="relu", name="inception_b" + name + "_1x1_2")
			
 
				+        self._conv3_1 = ConvBNLayer(
			
 
				+            1024,
			
 
				+            192,
			
 
				+            1,
			
 
				+            act="relu",
			
 
				+            name="inception_b" + name + "_1x7_reduce")
			
 
				+        self._conv3_2 = ConvBNLayer(
			
 
				+            192,
			
 
				+            224, (1, 7),
			
 
				+            padding=(0, 3),
			
 
				+            act="relu",
			
 
				+            name="inception_b" + name + "_1x7")
			
 
				+        self._conv3_3 = ConvBNLayer(
			
 
				+            224,
			
 
				+            256, (7, 1),
			
 
				+            padding=(3, 0),
			
 
				+            act="relu",
			
 
				+            name="inception_b" + name + "_7x1")
			
 
				+        self._conv4_1 = ConvBNLayer(
			
 
				+            1024,
			
 
				+            192,
			
 
				+            1,
			
 
				+            act="relu",
			
 
				+            name="inception_b" + name + "_7x1_2_reduce")
			
 
				+        self._conv4_2 = ConvBNLayer(
			
 
				+            192,
			
 
				+            192, (1, 7),
			
 
				+            padding=(0, 3),
			
 
				+            act="relu",
			
 
				+            name="inception_b" + name + "_1x7_2")
			
 
				+        self._conv4_3 = ConvBNLayer(
			
 
				+            192,
			
 
				+            224, (7, 1),
			
 
				+            padding=(3, 0),
			
 
				+            act="relu",
			
 
				+            name="inception_b" + name + "_7x1_2")
			
 
				+        self._conv4_4 = ConvBNLayer(
			
 
				+            224,
			
 
				+            224, (1, 7),
			
 
				+            padding=(0, 3),
			
 
				+            act="relu",
			
 
				+            name="inception_b" + name + "_1x7_3")
			
 
				+        self._conv4_5 = ConvBNLayer(
			
 
				+            224,
			
 
				+            256, (7, 1),
			
 
				+            padding=(3, 0),
			
 
				+            act="relu",
			
 
				+            name="inception_b" + name + "_7x1_3")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        pool1 = self._pool(inputs)
			
 
				+        conv1 = self._conv1(pool1)
			
 
				+
			
 
				+        conv2 = self._conv2(inputs)
			
 
				+
			
 
				+        conv3 = self._conv3_1(inputs)
			
 
				+        conv3 = self._conv3_2(conv3)
			
 
				+        conv3 = self._conv3_3(conv3)
			
 
				+
			
 
				+        conv4 = self._conv4_1(inputs)
			
 
				+        conv4 = self._conv4_2(conv4)
			
 
				+        conv4 = self._conv4_3(conv4)
			
 
				+        conv4 = self._conv4_4(conv4)
			
 
				+        conv4 = self._conv4_5(conv4)
			
 
				+
			
 
				+        concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
			
 
				+        return concat
			
 
				+
			
 
				+
			
 
				+class ReductionB(nn.Layer):
			
 
				+    def __init__(self):
			
 
				+        super(ReductionB, self).__init__()
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
			
 
				+        self._conv2_1 = ConvBNLayer(
			
 
				+            1024, 192, 1, act="relu", name="reduction_b_3x3_reduce")
			
 
				+        self._conv2_2 = ConvBNLayer(
			
 
				+            192, 192, 3, stride=2, act="relu", name="reduction_b_3x3")
			
 
				+        self._conv3_1 = ConvBNLayer(
			
 
				+            1024, 256, 1, act="relu", name="reduction_b_1x7_reduce")
			
 
				+        self._conv3_2 = ConvBNLayer(
			
 
				+            256,
			
 
				+            256, (1, 7),
			
 
				+            padding=(0, 3),
			
 
				+            act="relu",
			
 
				+            name="reduction_b_1x7")
			
 
				+        self._conv3_3 = ConvBNLayer(
			
 
				+            256,
			
 
				+            320, (7, 1),
			
 
				+            padding=(3, 0),
			
 
				+            act="relu",
			
 
				+            name="reduction_b_7x1")
			
 
				+        self._conv3_4 = ConvBNLayer(
			
 
				+            320, 320, 3, stride=2, act="relu", name="reduction_b_3x3_2")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        pool1 = self._pool(inputs)
			
 
				+
			
 
				+        conv2 = self._conv2_1(inputs)
			
 
				+        conv2 = self._conv2_2(conv2)
			
 
				+
			
 
				+        conv3 = self._conv3_1(inputs)
			
 
				+        conv3 = self._conv3_2(conv3)
			
 
				+        conv3 = self._conv3_3(conv3)
			
 
				+        conv3 = self._conv3_4(conv3)
			
 
				+
			
 
				+        concat = paddle.concat([pool1, conv2, conv3], axis=1)
			
 
				+
			
 
				+        return concat
			
 
				+
			
 
				+
			
 
				+class InceptionC(nn.Layer):
			
 
				+    def __init__(self, name=None):
			
 
				+        super(InceptionC, self).__init__()
			
 
				+        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            1536, 256, 1, act="relu", name="inception_c" + name + "_1x1")
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            1536, 256, 1, act="relu", name="inception_c" + name + "_1x1_2")
			
 
				+        self._conv3_0 = ConvBNLayer(
			
 
				+            1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_3")
			
 
				+        self._conv3_1 = ConvBNLayer(
			
 
				+            384,
			
 
				+            256, (1, 3),
			
 
				+            padding=(0, 1),
			
 
				+            act="relu",
			
 
				+            name="inception_c" + name + "_1x3")
			
 
				+        self._conv3_2 = ConvBNLayer(
			
 
				+            384,
			
 
				+            256, (3, 1),
			
 
				+            padding=(1, 0),
			
 
				+            act="relu",
			
 
				+            name="inception_c" + name + "_3x1")
			
 
				+        self._conv4_0 = ConvBNLayer(
			
 
				+            1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_4")
			
 
				+        self._conv4_00 = ConvBNLayer(
			
 
				+            384,
			
 
				+            448, (1, 3),
			
 
				+            padding=(0, 1),
			
 
				+            act="relu",
			
 
				+            name="inception_c" + name + "_1x3_2")
			
 
				+        self._conv4_000 = ConvBNLayer(
			
 
				+            448,
			
 
				+            512, (3, 1),
			
 
				+            padding=(1, 0),
			
 
				+            act="relu",
			
 
				+            name="inception_c" + name + "_3x1_2")
			
 
				+        self._conv4_1 = ConvBNLayer(
			
 
				+            512,
			
 
				+            256, (1, 3),
			
 
				+            padding=(0, 1),
			
 
				+            act="relu",
			
 
				+            name="inception_c" + name + "_1x3_3")
			
 
				+        self._conv4_2 = ConvBNLayer(
			
 
				+            512,
			
 
				+            256, (3, 1),
			
 
				+            padding=(1, 0),
			
 
				+            act="relu",
			
 
				+            name="inception_c" + name + "_3x1_3")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        pool1 = self._pool(inputs)
			
 
				+        conv1 = self._conv1(pool1)
			
 
				+
			
 
				+        conv2 = self._conv2(inputs)
			
 
				+
			
 
				+        conv3 = self._conv3_0(inputs)
			
 
				+        conv3_1 = self._conv3_1(conv3)
			
 
				+        conv3_2 = self._conv3_2(conv3)
			
 
				+
			
 
				+        conv4 = self._conv4_0(inputs)
			
 
				+        conv4 = self._conv4_00(conv4)
			
 
				+        conv4 = self._conv4_000(conv4)
			
 
				+        conv4_1 = self._conv4_1(conv4)
			
 
				+        conv4_2 = self._conv4_2(conv4)
			
 
				+
			
 
				+        concat = paddle.concat(
			
 
				+            [conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1)
			
 
				+
			
 
				+        return concat
			
 
				+
			
 
				+
			
 
				+class InceptionV4DY(nn.Layer):
			
 
				+    def __init__(self, class_num=1000):
			
 
				+        super(InceptionV4DY, self).__init__()
			
 
				+        self._inception_stem = InceptionStem()
			
 
				+
			
 
				+        self._inceptionA_1 = InceptionA(name="1")
			
 
				+        self._inceptionA_2 = InceptionA(name="2")
			
 
				+        self._inceptionA_3 = InceptionA(name="3")
			
 
				+        self._inceptionA_4 = InceptionA(name="4")
			
 
				+        self._reductionA = ReductionA()
			
 
				+
			
 
				+        self._inceptionB_1 = InceptionB(name="1")
			
 
				+        self._inceptionB_2 = InceptionB(name="2")
			
 
				+        self._inceptionB_3 = InceptionB(name="3")
			
 
				+        self._inceptionB_4 = InceptionB(name="4")
			
 
				+        self._inceptionB_5 = InceptionB(name="5")
			
 
				+        self._inceptionB_6 = InceptionB(name="6")
			
 
				+        self._inceptionB_7 = InceptionB(name="7")
			
 
				+        self._reductionB = ReductionB()
			
 
				+
			
 
				+        self._inceptionC_1 = InceptionC(name="1")
			
 
				+        self._inceptionC_2 = InceptionC(name="2")
			
 
				+        self._inceptionC_3 = InceptionC(name="3")
			
 
				+
			
 
				+        self.avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self._drop = Dropout(p=0.2, mode="downscale_in_infer")
			
 
				+        stdv = 1.0 / math.sqrt(1536 * 1.0)
			
 
				+        self.out = Linear(
			
 
				+            1536,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="final_fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="final_fc_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._inception_stem(inputs)
			
 
				+
			
 
				+        x = self._inceptionA_1(x)
			
 
				+        x = self._inceptionA_2(x)
			
 
				+        x = self._inceptionA_3(x)
			
 
				+        x = self._inceptionA_4(x)
			
 
				+        x = self._reductionA(x)
			
 
				+
			
 
				+        x = self._inceptionB_1(x)
			
 
				+        x = self._inceptionB_2(x)
			
 
				+        x = self._inceptionB_3(x)
			
 
				+        x = self._inceptionB_4(x)
			
 
				+        x = self._inceptionB_5(x)
			
 
				+        x = self._inceptionB_6(x)
			
 
				+        x = self._inceptionB_7(x)
			
 
				+        x = self._reductionB(x)
			
 
				+
			
 
				+        x = self._inceptionC_1(x)
			
 
				+        x = self._inceptionC_2(x)
			
 
				+        x = self._inceptionC_3(x)
			
 
				+
			
 
				+        x = self.avg_pool(x)
			
 
				+        x = paddle.squeeze(x, axis=[2, 3])
			
 
				+        x = self._drop(x)
			
 
				+        x = self.out(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def InceptionV4(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = InceptionV4DY(**kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/levit.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/levit.py
@@ -0,0 +1,589 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/facebookresearch/LeViT
			
 
				+
			
 
				+import itertools
			
 
				+import math
			
 
				+import warnings
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn.initializer import TruncatedNormal, Constant
			
 
				+from paddle.regularizer import L2Decay
			
 
				+
			
 
				+from .vision_transformer import trunc_normal_, zeros_, ones_, Identity
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "LeViT_128S":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
			
 
				+    "LeViT_128":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
			
 
				+    "LeViT_192":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
			
 
				+    "LeViT_256":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
			
 
				+    "LeViT_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+def cal_attention_biases(attention_biases, attention_bias_idxs):
			
 
				+    gather_list = []
			
 
				+    attention_bias_t = paddle.transpose(attention_biases, (1, 0))
			
 
				+    nums = attention_bias_idxs.shape[0]
			
 
				+    for idx in range(nums):
			
 
				+        gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx])
			
 
				+        gather_list.append(gather)
			
 
				+    shape0, shape1 = attention_bias_idxs.shape
			
 
				+    gather = paddle.concat(gather_list)
			
 
				+    return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1))
			
 
				+
			
 
				+
			
 
				+class Conv2d_BN(nn.Sequential):
			
 
				+    def __init__(self,
			
 
				+                 a,
			
 
				+                 b,
			
 
				+                 ks=1,
			
 
				+                 stride=1,
			
 
				+                 pad=0,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 bn_weight_init=1,
			
 
				+                 resolution=-10000):
			
 
				+        super().__init__()
			
 
				+        self.add_sublayer(
			
 
				+            'c',
			
 
				+            nn.Conv2D(
			
 
				+                a, b, ks, stride, pad, dilation, groups, bias_attr=False))
			
 
				+        bn = nn.BatchNorm2D(b)
			
 
				+        ones_(bn.weight)
			
 
				+        zeros_(bn.bias)
			
 
				+        self.add_sublayer('bn', bn)
			
 
				+
			
 
				+
			
 
				+class Linear_BN(nn.Sequential):
			
 
				+    def __init__(self, a, b, bn_weight_init=1):
			
 
				+        super().__init__()
			
 
				+        self.add_sublayer('c', nn.Linear(a, b, bias_attr=False))
			
 
				+        bn = nn.BatchNorm1D(b)
			
 
				+        if bn_weight_init == 0:
			
 
				+            zeros_(bn.weight)
			
 
				+        else:
			
 
				+            ones_(bn.weight)
			
 
				+        zeros_(bn.bias)
			
 
				+        self.add_sublayer('bn', bn)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        l, bn = self._sub_layers.values()
			
 
				+        x = l(x)
			
 
				+        return paddle.reshape(bn(x.flatten(0, 1)), x.shape)
			
 
				+
			
 
				+
			
 
				+class BN_Linear(nn.Sequential):
			
 
				+    def __init__(self, a, b, bias=True, std=0.02):
			
 
				+        super().__init__()
			
 
				+        self.add_sublayer('bn', nn.BatchNorm1D(a))
			
 
				+        l = nn.Linear(a, b, bias_attr=bias)
			
 
				+        trunc_normal_(l.weight)
			
 
				+        if bias:
			
 
				+            zeros_(l.bias)
			
 
				+        self.add_sublayer('l', l)
			
 
				+
			
 
				+
			
 
				+def b16(n, activation, resolution=224):
			
 
				+    return nn.Sequential(
			
 
				+        Conv2d_BN(
			
 
				+            3, n // 8, 3, 2, 1, resolution=resolution),
			
 
				+        activation(),
			
 
				+        Conv2d_BN(
			
 
				+            n // 8, n // 4, 3, 2, 1, resolution=resolution // 2),
			
 
				+        activation(),
			
 
				+        Conv2d_BN(
			
 
				+            n // 4, n // 2, 3, 2, 1, resolution=resolution // 4),
			
 
				+        activation(),
			
 
				+        Conv2d_BN(
			
 
				+            n // 2, n, 3, 2, 1, resolution=resolution // 8))
			
 
				+
			
 
				+
			
 
				+class Residual(nn.Layer):
			
 
				+    def __init__(self, m, drop):
			
 
				+        super().__init__()
			
 
				+        self.m = m
			
 
				+        self.drop = drop
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.training and self.drop > 0:
			
 
				+            y = paddle.rand(
			
 
				+                shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32")
			
 
				+            y = y.divide(paddle.full_like(y, 1 - self.drop))
			
 
				+            return paddle.add(x, y)
			
 
				+        else:
			
 
				+            return paddle.add(x, self.m(x))
			
 
				+
			
 
				+
			
 
				+class Attention(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 key_dim,
			
 
				+                 num_heads=8,
			
 
				+                 attn_ratio=4,
			
 
				+                 activation=None,
			
 
				+                 resolution=14):
			
 
				+        super().__init__()
			
 
				+        self.num_heads = num_heads
			
 
				+        self.scale = key_dim**-0.5
			
 
				+        self.key_dim = key_dim
			
 
				+        self.nh_kd = nh_kd = key_dim * num_heads
			
 
				+        self.d = int(attn_ratio * key_dim)
			
 
				+        self.dh = int(attn_ratio * key_dim) * num_heads
			
 
				+        self.attn_ratio = attn_ratio
			
 
				+        self.h = self.dh + nh_kd * 2
			
 
				+        self.qkv = Linear_BN(dim, self.h)
			
 
				+        self.proj = nn.Sequential(
			
 
				+            activation(), Linear_BN(
			
 
				+                self.dh, dim, bn_weight_init=0))
			
 
				+        points = list(itertools.product(range(resolution), range(resolution)))
			
 
				+        N = len(points)
			
 
				+        attention_offsets = {}
			
 
				+        idxs = []
			
 
				+        for p1 in points:
			
 
				+            for p2 in points:
			
 
				+                offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1]))
			
 
				+                if offset not in attention_offsets:
			
 
				+                    attention_offsets[offset] = len(attention_offsets)
			
 
				+                idxs.append(attention_offsets[offset])
			
 
				+        self.attention_biases = self.create_parameter(
			
 
				+            shape=(num_heads, len(attention_offsets)),
			
 
				+            default_initializer=zeros_,
			
 
				+            attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+        tensor_idxs = paddle.to_tensor(idxs, dtype='int64')
			
 
				+        self.register_buffer('attention_bias_idxs',
			
 
				+                             paddle.reshape(tensor_idxs, [N, N]))
			
 
				+
			
 
				+    @paddle.no_grad()
			
 
				+    def train(self, mode=True):
			
 
				+        if mode:
			
 
				+            super().train()
			
 
				+        else:
			
 
				+            super().eval()
			
 
				+        if mode and hasattr(self, 'ab'):
			
 
				+            del self.ab
			
 
				+        else:
			
 
				+            self.ab = cal_attention_biases(self.attention_biases,
			
 
				+                                           self.attention_bias_idxs)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        self.training = True
			
 
				+        B, N, C = x.shape
			
 
				+        qkv = self.qkv(x)
			
 
				+        qkv = paddle.reshape(qkv,
			
 
				+                             [B, N, self.num_heads, self.h // self.num_heads])
			
 
				+        q, k, v = paddle.split(
			
 
				+            qkv, [self.key_dim, self.key_dim, self.d], axis=3)
			
 
				+        q = paddle.transpose(q, perm=[0, 2, 1, 3])
			
 
				+        k = paddle.transpose(k, perm=[0, 2, 1, 3])
			
 
				+        v = paddle.transpose(v, perm=[0, 2, 1, 3])
			
 
				+        k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2])
			
 
				+
			
 
				+        if self.training:
			
 
				+            attention_biases = cal_attention_biases(self.attention_biases,
			
 
				+                                                    self.attention_bias_idxs)
			
 
				+        else:
			
 
				+            attention_biases = self.ab
			
 
				+        attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases)
			
 
				+        attn = F.softmax(attn)
			
 
				+        x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3])
			
 
				+        x = paddle.reshape(x, [B, N, self.dh])
			
 
				+        x = self.proj(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Subsample(nn.Layer):
			
 
				+    def __init__(self, stride, resolution):
			
 
				+        super().__init__()
			
 
				+        self.stride = stride
			
 
				+        self.resolution = resolution
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        B, N, C = x.shape
			
 
				+        x = paddle.reshape(x, [B, self.resolution, self.resolution, C])
			
 
				+        end1, end2 = x.shape[1], x.shape[2]
			
 
				+        x = x[:, 0:end1:self.stride, 0:end2:self.stride]
			
 
				+        x = paddle.reshape(x, [B, -1, C])
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class AttentionSubsample(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_dim,
			
 
				+                 out_dim,
			
 
				+                 key_dim,
			
 
				+                 num_heads=8,
			
 
				+                 attn_ratio=2,
			
 
				+                 activation=None,
			
 
				+                 stride=2,
			
 
				+                 resolution=14,
			
 
				+                 resolution_=7):
			
 
				+        super().__init__()
			
 
				+        self.num_heads = num_heads
			
 
				+        self.scale = key_dim**-0.5
			
 
				+        self.key_dim = key_dim
			
 
				+        self.nh_kd = nh_kd = key_dim * num_heads
			
 
				+        self.d = int(attn_ratio * key_dim)
			
 
				+        self.dh = int(attn_ratio * key_dim) * self.num_heads
			
 
				+        self.attn_ratio = attn_ratio
			
 
				+        self.resolution_ = resolution_
			
 
				+        self.resolution_2 = resolution_**2
			
 
				+        self.training = True
			
 
				+        h = self.dh + nh_kd
			
 
				+        self.kv = Linear_BN(in_dim, h)
			
 
				+
			
 
				+        self.q = nn.Sequential(
			
 
				+            Subsample(stride, resolution), Linear_BN(in_dim, nh_kd))
			
 
				+        self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim))
			
 
				+
			
 
				+        self.stride = stride
			
 
				+        self.resolution = resolution
			
 
				+        points = list(itertools.product(range(resolution), range(resolution)))
			
 
				+        points_ = list(
			
 
				+            itertools.product(range(resolution_), range(resolution_)))
			
 
				+
			
 
				+        N = len(points)
			
 
				+        N_ = len(points_)
			
 
				+        attention_offsets = {}
			
 
				+        idxs = []
			
 
				+        i = 0
			
 
				+        j = 0
			
 
				+        for p1 in points_:
			
 
				+            i += 1
			
 
				+            for p2 in points:
			
 
				+                j += 1
			
 
				+                size = 1
			
 
				+                offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2),
			
 
				+                          abs(p1[1] * stride - p2[1] + (size - 1) / 2))
			
 
				+                if offset not in attention_offsets:
			
 
				+                    attention_offsets[offset] = len(attention_offsets)
			
 
				+                idxs.append(attention_offsets[offset])
			
 
				+        self.attention_biases = self.create_parameter(
			
 
				+            shape=(num_heads, len(attention_offsets)),
			
 
				+            default_initializer=zeros_,
			
 
				+            attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
			
 
				+
			
 
				+        tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64')
			
 
				+        self.register_buffer('attention_bias_idxs',
			
 
				+                             paddle.reshape(tensor_idxs_, [N_, N]))
			
 
				+
			
 
				+    @paddle.no_grad()
			
 
				+    def train(self, mode=True):
			
 
				+        if mode:
			
 
				+            super().train()
			
 
				+        else:
			
 
				+            super().eval()
			
 
				+        if mode and hasattr(self, 'ab'):
			
 
				+            del self.ab
			
 
				+        else:
			
 
				+            self.ab = cal_attention_biases(self.attention_biases,
			
 
				+                                           self.attention_bias_idxs)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        self.training = True
			
 
				+        B, N, C = x.shape
			
 
				+        kv = self.kv(x)
			
 
				+        kv = paddle.reshape(kv, [B, N, self.num_heads, -1])
			
 
				+        k, v = paddle.split(kv, [self.key_dim, self.d], axis=3)
			
 
				+        k = paddle.transpose(k, perm=[0, 2, 1, 3])  # BHNC
			
 
				+        v = paddle.transpose(v, perm=[0, 2, 1, 3])
			
 
				+        q = paddle.reshape(
			
 
				+            self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim])
			
 
				+        q = paddle.transpose(q, perm=[0, 2, 1, 3])
			
 
				+
			
 
				+        if self.training:
			
 
				+            attention_biases = cal_attention_biases(self.attention_biases,
			
 
				+                                                    self.attention_bias_idxs)
			
 
				+        else:
			
 
				+            attention_biases = self.ab
			
 
				+
			
 
				+        attn = (paddle.matmul(
			
 
				+            q, paddle.transpose(
			
 
				+                k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases
			
 
				+        attn = F.softmax(attn)
			
 
				+
			
 
				+        x = paddle.reshape(
			
 
				+            paddle.transpose(
			
 
				+                paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh])
			
 
				+        x = self.proj(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class LeViT(nn.Layer):
			
 
				+    """ Vision Transformer with support for patch or hybrid CNN input stage
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=16,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dim=[192],
			
 
				+                 key_dim=[64],
			
 
				+                 depth=[12],
			
 
				+                 num_heads=[3],
			
 
				+                 attn_ratio=[2],
			
 
				+                 mlp_ratio=[2],
			
 
				+                 hybrid_backbone=None,
			
 
				+                 down_ops=[],
			
 
				+                 attention_activation=nn.Hardswish,
			
 
				+                 mlp_activation=nn.Hardswish,
			
 
				+                 distillation=True,
			
 
				+                 drop_path=0):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.class_num = class_num
			
 
				+        self.num_features = embed_dim[-1]
			
 
				+        self.embed_dim = embed_dim
			
 
				+        self.distillation = distillation
			
 
				+
			
 
				+        self.patch_embed = hybrid_backbone
			
 
				+
			
 
				+        self.blocks = []
			
 
				+        down_ops.append([''])
			
 
				+        resolution = img_size // patch_size
			
 
				+        for i, (ed, kd, dpth, nh, ar, mr, do) in enumerate(
			
 
				+                zip(embed_dim, key_dim, depth, num_heads, attn_ratio,
			
 
				+                    mlp_ratio, down_ops)):
			
 
				+            for _ in range(dpth):
			
 
				+                self.blocks.append(
			
 
				+                    Residual(
			
 
				+                        Attention(
			
 
				+                            ed,
			
 
				+                            kd,
			
 
				+                            nh,
			
 
				+                            attn_ratio=ar,
			
 
				+                            activation=attention_activation,
			
 
				+                            resolution=resolution, ),
			
 
				+                        drop_path))
			
 
				+                if mr > 0:
			
 
				+                    h = int(ed * mr)
			
 
				+                    self.blocks.append(
			
 
				+                        Residual(
			
 
				+                            nn.Sequential(
			
 
				+                                Linear_BN(ed, h),
			
 
				+                                mlp_activation(),
			
 
				+                                Linear_BN(
			
 
				+                                    h, ed, bn_weight_init=0), ),
			
 
				+                            drop_path))
			
 
				+            if do[0] == 'Subsample':
			
 
				+                #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
			
 
				+                resolution_ = (resolution - 1) // do[5] + 1
			
 
				+                self.blocks.append(
			
 
				+                    AttentionSubsample(
			
 
				+                        *embed_dim[i:i + 2],
			
 
				+                        key_dim=do[1],
			
 
				+                        num_heads=do[2],
			
 
				+                        attn_ratio=do[3],
			
 
				+                        activation=attention_activation,
			
 
				+                        stride=do[5],
			
 
				+                        resolution=resolution,
			
 
				+                        resolution_=resolution_))
			
 
				+                resolution = resolution_
			
 
				+                if do[4] > 0:  # mlp_ratio
			
 
				+                    h = int(embed_dim[i + 1] * do[4])
			
 
				+                    self.blocks.append(
			
 
				+                        Residual(
			
 
				+                            nn.Sequential(
			
 
				+                                Linear_BN(embed_dim[i + 1], h),
			
 
				+                                mlp_activation(),
			
 
				+                                Linear_BN(
			
 
				+                                    h, embed_dim[i + 1], bn_weight_init=0), ),
			
 
				+                            drop_path))
			
 
				+        self.blocks = nn.Sequential(*self.blocks)
			
 
				+
			
 
				+        # Classifier head
			
 
				+        self.head = BN_Linear(embed_dim[-1],
			
 
				+                              class_num) if class_num > 0 else Identity()
			
 
				+        if distillation:
			
 
				+            self.head_dist = BN_Linear(
			
 
				+                embed_dim[-1], class_num) if class_num > 0 else Identity()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.patch_embed(x)
			
 
				+        x = x.flatten(2)
			
 
				+        x = paddle.transpose(x, perm=[0, 2, 1])
			
 
				+        x = self.blocks(x)
			
 
				+        x = x.mean(1)
			
 
				+
			
 
				+        x = paddle.reshape(x, [-1, self.embed_dim[-1]])
			
 
				+        if self.distillation:
			
 
				+            x = self.head(x), self.head_dist(x)
			
 
				+            if not self.training:
			
 
				+                x = (x[0] + x[1]) / 2
			
 
				+        else:
			
 
				+            x = self.head(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def model_factory(C, D, X, N, drop_path, class_num, distillation):
			
 
				+    embed_dim = [int(x) for x in C.split('_')]
			
 
				+    num_heads = [int(x) for x in N.split('_')]
			
 
				+    depth = [int(x) for x in X.split('_')]
			
 
				+    act = nn.Hardswish
			
 
				+    model = LeViT(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=embed_dim,
			
 
				+        num_heads=num_heads,
			
 
				+        key_dim=[D] * 3,
			
 
				+        depth=depth,
			
 
				+        attn_ratio=[2, 2, 2],
			
 
				+        mlp_ratio=[2, 2, 2],
			
 
				+        down_ops=[
			
 
				+            #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
			
 
				+            ['Subsample', D, embed_dim[0] // D, 4, 2, 2],
			
 
				+            ['Subsample', D, embed_dim[1] // D, 4, 2, 2],
			
 
				+        ],
			
 
				+        attention_activation=act,
			
 
				+        mlp_activation=act,
			
 
				+        hybrid_backbone=b16(embed_dim[0], activation=act),
			
 
				+        class_num=class_num,
			
 
				+        drop_path=drop_path,
			
 
				+        distillation=distillation)
			
 
				+
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+specification = {
			
 
				+    'LeViT_128S': {
			
 
				+        'C': '128_256_384',
			
 
				+        'D': 16,
			
 
				+        'N': '4_6_8',
			
 
				+        'X': '2_3_4',
			
 
				+        'drop_path': 0
			
 
				+    },
			
 
				+    'LeViT_128': {
			
 
				+        'C': '128_256_384',
			
 
				+        'D': 16,
			
 
				+        'N': '4_8_12',
			
 
				+        'X': '4_4_4',
			
 
				+        'drop_path': 0
			
 
				+    },
			
 
				+    'LeViT_192': {
			
 
				+        'C': '192_288_384',
			
 
				+        'D': 32,
			
 
				+        'N': '3_5_6',
			
 
				+        'X': '4_4_4',
			
 
				+        'drop_path': 0
			
 
				+    },
			
 
				+    'LeViT_256': {
			
 
				+        'C': '256_384_512',
			
 
				+        'D': 32,
			
 
				+        'N': '4_6_8',
			
 
				+        'X': '4_4_4',
			
 
				+        'drop_path': 0
			
 
				+    },
			
 
				+    'LeViT_384': {
			
 
				+        'C': '384_512_768',
			
 
				+        'D': 32,
			
 
				+        'N': '6_9_12',
			
 
				+        'X': '4_4_4',
			
 
				+        'drop_path': 0.1
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def LeViT_128S(pretrained=False,
			
 
				+               use_ssld=False,
			
 
				+               class_num=1000,
			
 
				+               distillation=False,
			
 
				+               **kwargs):
			
 
				+    model = model_factory(
			
 
				+        **specification['LeViT_128S'],
			
 
				+        class_num=class_num,
			
 
				+        distillation=distillation)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def LeViT_128(pretrained=False,
			
 
				+              use_ssld=False,
			
 
				+              class_num=1000,
			
 
				+              distillation=False,
			
 
				+              **kwargs):
			
 
				+    model = model_factory(
			
 
				+        **specification['LeViT_128'],
			
 
				+        class_num=class_num,
			
 
				+        distillation=distillation)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def LeViT_192(pretrained=False,
			
 
				+              use_ssld=False,
			
 
				+              class_num=1000,
			
 
				+              distillation=False,
			
 
				+              **kwargs):
			
 
				+    model = model_factory(
			
 
				+        **specification['LeViT_192'],
			
 
				+        class_num=class_num,
			
 
				+        distillation=distillation)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def LeViT_256(pretrained=False,
			
 
				+              use_ssld=False,
			
 
				+              class_num=1000,
			
 
				+              distillation=False,
			
 
				+              **kwargs):
			
 
				+    model = model_factory(
			
 
				+        **specification['LeViT_256'],
			
 
				+        class_num=class_num,
			
 
				+        distillation=distillation)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def LeViT_384(pretrained=False,
			
 
				+              use_ssld=False,
			
 
				+              class_num=1000,
			
 
				+              distillation=False,
			
 
				+              **kwargs):
			
 
				+    model = model_factory(
			
 
				+        **specification['LeViT_384'],
			
 
				+        class_num=class_num,
			
 
				+        distillation=distillation)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/mixnet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/mixnet.py
@@ -0,0 +1,815 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+"""
			
 
				+    MixNet for ImageNet-1K, implemented in Paddle.
			
 
				+    Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,'
			
 
				+    https://arxiv.org/abs/1907.09595.
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+from inspect import isfunction
			
 
				+from functools import reduce
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "MixNet_S":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams",
			
 
				+    "MixNet_M":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams",
			
 
				+    "MixNet_L":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class Identity(nn.Layer):
			
 
				+    """
			
 
				+    Identity block.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(Identity, self).__init__()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def round_channels(channels, divisor=8):
			
 
				+    """
			
 
				+    Round weighted channel number (make divisible operation).
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    channels : int or float
			
 
				+        Original number of channels.
			
 
				+    divisor : int, default 8
			
 
				+        Alignment value.
			
 
				+
			
 
				+    Returns:
			
 
				+    -------
			
 
				+    int
			
 
				+        Weighted number of channels.
			
 
				+    """
			
 
				+    rounded_channels = max(
			
 
				+        int(channels + divisor / 2.0) // divisor * divisor, divisor)
			
 
				+    if float(rounded_channels) < 0.9 * channels:
			
 
				+        rounded_channels += divisor
			
 
				+    return rounded_channels
			
 
				+
			
 
				+
			
 
				+def get_activation_layer(activation):
			
 
				+    """
			
 
				+    Create activation layer from string/function.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    activation : function, or str, or nn.Module
			
 
				+        Activation function or name of activation function.
			
 
				+
			
 
				+    Returns:
			
 
				+    -------
			
 
				+    nn.Module
			
 
				+        Activation layer.
			
 
				+    """
			
 
				+    assert activation is not None
			
 
				+    if isfunction(activation):
			
 
				+        return activation()
			
 
				+    elif isinstance(activation, str):
			
 
				+        if activation == "relu":
			
 
				+            return nn.ReLU()
			
 
				+        elif activation == "relu6":
			
 
				+            return nn.ReLU6()
			
 
				+        elif activation == "swish":
			
 
				+            return nn.Swish()
			
 
				+        elif activation == "hswish":
			
 
				+            return nn.Hardswish()
			
 
				+        elif activation == "sigmoid":
			
 
				+            return nn.Sigmoid()
			
 
				+        elif activation == "hsigmoid":
			
 
				+            return nn.Hardsigmoid()
			
 
				+        elif activation == "identity":
			
 
				+            return Identity()
			
 
				+        else:
			
 
				+            raise NotImplementedError()
			
 
				+    else:
			
 
				+        assert isinstance(activation, nn.Layer)
			
 
				+        return activation
			
 
				+
			
 
				+
			
 
				+class ConvBlock(nn.Layer):
			
 
				+    """
			
 
				+    Standard convolution block with Batch normalization and activation.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    in_channels : int
			
 
				+        Number of input channels.
			
 
				+    out_channels : int
			
 
				+        Number of output channels.
			
 
				+    kernel_size : int or tuple/list of 2 int
			
 
				+        Convolution window size.
			
 
				+    stride : int or tuple/list of 2 int
			
 
				+        Strides of the convolution.
			
 
				+    padding : int, or tuple/list of 2 int, or tuple/list of 4 int
			
 
				+        Padding value for convolution layer.
			
 
				+    dilation : int or tuple/list of 2 int, default 1
			
 
				+        Dilation value for convolution layer.
			
 
				+    groups : int, default 1
			
 
				+        Number of groups.
			
 
				+    bias : bool, default False
			
 
				+        Whether the layer uses a bias vector.
			
 
				+    use_bn : bool, default True
			
 
				+        Whether to use BatchNorm layer.
			
 
				+    bn_eps : float, default 1e-5
			
 
				+        Small float added to variance in Batch norm.
			
 
				+    activation : function or str or None, default nn.ReLU()
			
 
				+        Activation function or name of activation function.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 bias=False,
			
 
				+                 use_bn=True,
			
 
				+                 bn_eps=1e-5,
			
 
				+                 activation=nn.ReLU()):
			
 
				+        super(ConvBlock, self).__init__()
			
 
				+        self.activate = (activation is not None)
			
 
				+        self.use_bn = use_bn
			
 
				+        self.use_pad = (isinstance(padding, (list, tuple)) and
			
 
				+                        (len(padding) == 4))
			
 
				+
			
 
				+        if self.use_pad:
			
 
				+            self.pad = padding
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            dilation=dilation,
			
 
				+            groups=groups,
			
 
				+            bias_attr=bias,
			
 
				+            weight_attr=None)
			
 
				+        if self.use_bn:
			
 
				+            self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps)
			
 
				+        if self.activate:
			
 
				+            self.activ = get_activation_layer(activation)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        if self.use_bn:
			
 
				+            x = self.bn(x)
			
 
				+        if self.activate:
			
 
				+            x = self.activ(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SEBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 channels,
			
 
				+                 reduction=16,
			
 
				+                 mid_channels=None,
			
 
				+                 round_mid=False,
			
 
				+                 use_conv=True,
			
 
				+                 mid_activation=nn.ReLU(),
			
 
				+                 out_activation=nn.Sigmoid()):
			
 
				+        super(SEBlock, self).__init__()
			
 
				+        self.use_conv = use_conv
			
 
				+        if mid_channels is None:
			
 
				+            mid_channels = channels // reduction if not round_mid else round_channels(
			
 
				+                float(channels) / reduction)
			
 
				+
			
 
				+        self.pool = nn.AdaptiveAvgPool2D(output_size=1)
			
 
				+        if use_conv:
			
 
				+            self.conv1 = nn.Conv2D(
			
 
				+                in_channels=channels,
			
 
				+                out_channels=mid_channels,
			
 
				+                kernel_size=1,
			
 
				+                stride=1,
			
 
				+                groups=1,
			
 
				+                bias_attr=True,
			
 
				+                weight_attr=None)
			
 
				+
			
 
				+        else:
			
 
				+            self.fc1 = nn.Linear(
			
 
				+                in_features=channels, out_features=mid_channels)
			
 
				+        self.activ = get_activation_layer(mid_activation)
			
 
				+        if use_conv:
			
 
				+            self.conv2 = nn.Conv2D(
			
 
				+                in_channels=mid_channels,
			
 
				+                out_channels=channels,
			
 
				+                kernel_size=1,
			
 
				+                stride=1,
			
 
				+                groups=1,
			
 
				+                bias_attr=True,
			
 
				+                weight_attr=None)
			
 
				+        else:
			
 
				+            self.fc2 = nn.Linear(
			
 
				+                in_features=mid_channels, out_features=channels)
			
 
				+        self.sigmoid = get_activation_layer(out_activation)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        w = self.pool(x)
			
 
				+        if not self.use_conv:
			
 
				+            w = w.reshape(shape=[w.shape[0], -1])
			
 
				+        w = self.conv1(w) if self.use_conv else self.fc1(w)
			
 
				+        w = self.activ(w)
			
 
				+        w = self.conv2(w) if self.use_conv else self.fc2(w)
			
 
				+        w = self.sigmoid(w)
			
 
				+        if not self.use_conv:
			
 
				+            w = w.unsqueeze(2).unsqueeze(3)
			
 
				+        x = x * w
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class MixConv(nn.Layer):
			
 
				+    """
			
 
				+    Mixed convolution layer from 'MixConv: Mixed Depthwise Convolutional Kernels,'
			
 
				+    https://arxiv.org/abs/1907.09595.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    in_channels : int
			
 
				+        Number of input channels.
			
 
				+    out_channels : int
			
 
				+        Number of output channels.
			
 
				+    kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int
			
 
				+        Convolution window size.
			
 
				+    stride : int or tuple/list of 2 int
			
 
				+        Strides of the convolution.
			
 
				+    padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int
			
 
				+        Padding value for convolution layer.
			
 
				+    dilation : int or tuple/list of 2 int, default 1
			
 
				+        Dilation value for convolution layer.
			
 
				+    groups : int, default 1
			
 
				+        Number of groups.
			
 
				+    bias : bool, default False
			
 
				+        Whether the layer uses a bias vector.
			
 
				+    axis : int, default 1
			
 
				+        The axis on which to concatenate the outputs.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 bias=False,
			
 
				+                 axis=1):
			
 
				+        super(MixConv, self).__init__()
			
 
				+        kernel_size = kernel_size if isinstance(kernel_size,
			
 
				+                                                list) else [kernel_size]
			
 
				+        padding = padding if isinstance(padding, list) else [padding]
			
 
				+        kernel_count = len(kernel_size)
			
 
				+        self.splitted_in_channels = self.split_channels(in_channels,
			
 
				+                                                        kernel_count)
			
 
				+        splitted_out_channels = self.split_channels(out_channels, kernel_count)
			
 
				+        for i, kernel_size_i in enumerate(kernel_size):
			
 
				+            in_channels_i = self.splitted_in_channels[i]
			
 
				+            out_channels_i = splitted_out_channels[i]
			
 
				+            padding_i = padding[i]
			
 
				+            _ = self.add_sublayer(
			
 
				+                name=str(i),
			
 
				+                sublayer=nn.Conv2D(
			
 
				+                    in_channels=in_channels_i,
			
 
				+                    out_channels=out_channels_i,
			
 
				+                    kernel_size=kernel_size_i,
			
 
				+                    stride=stride,
			
 
				+                    padding=padding_i,
			
 
				+                    dilation=dilation,
			
 
				+                    groups=(out_channels_i
			
 
				+                            if out_channels == groups else groups),
			
 
				+                    bias_attr=bias,
			
 
				+                    weight_attr=None))
			
 
				+        self.axis = axis
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        xx = paddle.split(x, self.splitted_in_channels, axis=self.axis)
			
 
				+        xx = paddle.split(x, self.splitted_in_channels, axis=self.axis)
			
 
				+        out = [
			
 
				+            conv_i(x_i) for x_i, conv_i in zip(xx, self._sub_layers.values())
			
 
				+        ]
			
 
				+        x = paddle.concat(tuple(out), axis=self.axis)
			
 
				+        return x
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def split_channels(channels, kernel_count):
			
 
				+        splitted_channels = [channels // kernel_count] * kernel_count
			
 
				+        splitted_channels[0] += channels - sum(splitted_channels)
			
 
				+        return splitted_channels
			
 
				+
			
 
				+
			
 
				+class MixConvBlock(nn.Layer):
			
 
				+    """
			
 
				+    Mixed convolution block with Batch normalization and activation.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    in_channels : int
			
 
				+        Number of input channels.
			
 
				+    out_channels : int
			
 
				+        Number of output channels.
			
 
				+    kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int
			
 
				+        Convolution window size.
			
 
				+    stride : int or tuple/list of 2 int
			
 
				+        Strides of the convolution.
			
 
				+    padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int
			
 
				+        Padding value for convolution layer.
			
 
				+    dilation : int or tuple/list of 2 int, default 1
			
 
				+        Dilation value for convolution layer.
			
 
				+    groups : int, default 1
			
 
				+        Number of groups.
			
 
				+    bias : bool, default False
			
 
				+        Whether the layer uses a bias vector.
			
 
				+    use_bn : bool, default True
			
 
				+        Whether to use BatchNorm layer.
			
 
				+    bn_eps : float, default 1e-5
			
 
				+        Small float added to variance in Batch norm.
			
 
				+    activation : function or str or None, default nn.ReLU()
			
 
				+        Activation function or name of activation function.
			
 
				+    activate : bool, default True
			
 
				+        Whether activate the convolution block.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 bias=False,
			
 
				+                 use_bn=True,
			
 
				+                 bn_eps=1e-5,
			
 
				+                 activation=nn.ReLU()):
			
 
				+        super(MixConvBlock, self).__init__()
			
 
				+        self.activate = (activation is not None)
			
 
				+        self.use_bn = use_bn
			
 
				+
			
 
				+        self.conv = MixConv(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            dilation=dilation,
			
 
				+            groups=groups,
			
 
				+            bias=bias)
			
 
				+        if self.use_bn:
			
 
				+            self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps)
			
 
				+        if self.activate:
			
 
				+            self.activ = get_activation_layer(activation)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        if self.use_bn:
			
 
				+            x = self.bn(x)
			
 
				+        if self.activate:
			
 
				+            x = self.activ(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def mixconv1x1_block(in_channels,
			
 
				+                     out_channels,
			
 
				+                     kernel_count,
			
 
				+                     stride=1,
			
 
				+                     groups=1,
			
 
				+                     bias=False,
			
 
				+                     use_bn=True,
			
 
				+                     bn_eps=1e-5,
			
 
				+                     activation=nn.ReLU()):
			
 
				+    """
			
 
				+    1x1 version of the mixed convolution block.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    in_channels : int
			
 
				+        Number of input channels.
			
 
				+    out_channels : int
			
 
				+        Number of output channels.
			
 
				+    kernel_count : int
			
 
				+        Kernel count.
			
 
				+    stride : int or tuple/list of 2 int, default 1
			
 
				+        Strides of the convolution.
			
 
				+    groups : int, default 1
			
 
				+        Number of groups.
			
 
				+    bias : bool, default False
			
 
				+        Whether the layer uses a bias vector.
			
 
				+    use_bn : bool, default True
			
 
				+        Whether to use BatchNorm layer.
			
 
				+    bn_eps : float, default 1e-5
			
 
				+        Small float added to variance in Batch norm.
			
 
				+    activation : function or str, or None, default nn.ReLU()
			
 
				+        Activation function or name of activation function.
			
 
				+    """
			
 
				+    return MixConvBlock(
			
 
				+        in_channels=in_channels,
			
 
				+        out_channels=out_channels,
			
 
				+        kernel_size=([1] * kernel_count),
			
 
				+        stride=stride,
			
 
				+        padding=([0] * kernel_count),
			
 
				+        groups=groups,
			
 
				+        bias=bias,
			
 
				+        use_bn=use_bn,
			
 
				+        bn_eps=bn_eps,
			
 
				+        activation=activation)
			
 
				+
			
 
				+
			
 
				+class MixUnit(nn.Layer):
			
 
				+    """
			
 
				+    MixNet unit.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    in_channels : int
			
 
				+        Number of input channels.
			
 
				+    out_channels : int
			
 
				+        Number of output channels.  exp_channels : int
			
 
				+        Number of middle (expanded) channels.
			
 
				+    stride : int or tuple/list of 2 int
			
 
				+        Strides of the second convolution layer.
			
 
				+    exp_kernel_count : int
			
 
				+        Expansion convolution kernel count for each unit.
			
 
				+    conv1_kernel_count : int
			
 
				+        Conv1 kernel count for each unit.
			
 
				+    conv2_kernel_count : int
			
 
				+        Conv2 kernel count for each unit.
			
 
				+    exp_factor : int
			
 
				+        Expansion factor for each unit.
			
 
				+    se_factor : int
			
 
				+        SE reduction factor for each unit.
			
 
				+    activation : str
			
 
				+        Activation function or name of activation function.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, in_channels, out_channels, stride, exp_kernel_count,
			
 
				+                 conv1_kernel_count, conv2_kernel_count, exp_factor, se_factor,
			
 
				+                 activation):
			
 
				+        super(MixUnit, self).__init__()
			
 
				+        assert exp_factor >= 1
			
 
				+        assert se_factor >= 0
			
 
				+        self.residual = (in_channels == out_channels) and (stride == 1)
			
 
				+        self.use_se = se_factor > 0
			
 
				+        mid_channels = exp_factor * in_channels
			
 
				+        self.use_exp_conv = exp_factor > 1
			
 
				+
			
 
				+        if self.use_exp_conv:
			
 
				+            if exp_kernel_count == 1:
			
 
				+                self.exp_conv = ConvBlock(
			
 
				+                    in_channels=in_channels,
			
 
				+                    out_channels=mid_channels,
			
 
				+                    kernel_size=1,
			
 
				+                    stride=1,
			
 
				+                    padding=0,
			
 
				+                    groups=1,
			
 
				+                    bias=False,
			
 
				+                    use_bn=True,
			
 
				+                    bn_eps=1e-5,
			
 
				+                    activation=activation)
			
 
				+            else:
			
 
				+                self.exp_conv = mixconv1x1_block(
			
 
				+                    in_channels=in_channels,
			
 
				+                    out_channels=mid_channels,
			
 
				+                    kernel_count=exp_kernel_count,
			
 
				+                    activation=activation)
			
 
				+        if conv1_kernel_count == 1:
			
 
				+            self.conv1 = ConvBlock(
			
 
				+                in_channels=mid_channels,
			
 
				+                out_channels=mid_channels,
			
 
				+                kernel_size=3,
			
 
				+                stride=stride,
			
 
				+                padding=1,
			
 
				+                dilation=1,
			
 
				+                groups=mid_channels,
			
 
				+                bias=False,
			
 
				+                use_bn=True,
			
 
				+                bn_eps=1e-5,
			
 
				+                activation=activation)
			
 
				+        else:
			
 
				+            self.conv1 = MixConvBlock(
			
 
				+                in_channels=mid_channels,
			
 
				+                out_channels=mid_channels,
			
 
				+                kernel_size=[3 + 2 * i for i in range(conv1_kernel_count)],
			
 
				+                stride=stride,
			
 
				+                padding=[1 + i for i in range(conv1_kernel_count)],
			
 
				+                groups=mid_channels,
			
 
				+                activation=activation)
			
 
				+        if self.use_se:
			
 
				+            self.se = SEBlock(
			
 
				+                channels=mid_channels,
			
 
				+                reduction=(exp_factor * se_factor),
			
 
				+                round_mid=False,
			
 
				+                mid_activation=activation)
			
 
				+        if conv2_kernel_count == 1:
			
 
				+            self.conv2 = ConvBlock(
			
 
				+                in_channels=mid_channels,
			
 
				+                out_channels=out_channels,
			
 
				+                activation=None,
			
 
				+                kernel_size=1,
			
 
				+                stride=1,
			
 
				+                padding=0,
			
 
				+                groups=1,
			
 
				+                bias=False,
			
 
				+                use_bn=True,
			
 
				+                bn_eps=1e-5)
			
 
				+        else:
			
 
				+            self.conv2 = mixconv1x1_block(
			
 
				+                in_channels=mid_channels,
			
 
				+                out_channels=out_channels,
			
 
				+                kernel_count=conv2_kernel_count,
			
 
				+                activation=None)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        if self.residual:
			
 
				+            identity = x
			
 
				+        if self.use_exp_conv:
			
 
				+            x = self.exp_conv(x)
			
 
				+        x = self.conv1(x)
			
 
				+        if self.use_se:
			
 
				+            x = self.se(x)
			
 
				+        x = self.conv2(x)
			
 
				+        if self.residual:
			
 
				+            x = x + identity
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class MixInitBlock(nn.Layer):
			
 
				+    """
			
 
				+    MixNet specific initial block.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    in_channels : int
			
 
				+        Number of input channels.
			
 
				+    out_channels : int
			
 
				+        Number of output channels.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, in_channels, out_channels):
			
 
				+        super(MixInitBlock, self).__init__()
			
 
				+        self.conv1 = ConvBlock(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            stride=2,
			
 
				+            kernel_size=3,
			
 
				+            padding=1)
			
 
				+        self.conv2 = MixUnit(
			
 
				+            in_channels=out_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            stride=1,
			
 
				+            exp_kernel_count=1,
			
 
				+            conv1_kernel_count=1,
			
 
				+            conv2_kernel_count=1,
			
 
				+            exp_factor=1,
			
 
				+            se_factor=0,
			
 
				+            activation="relu")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv1(x)
			
 
				+        x = self.conv2(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class MixNet(nn.Layer):
			
 
				+    """
			
 
				+    MixNet model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
			
 
				+    https://arxiv.org/abs/1907.09595.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    channels : list of list of int
			
 
				+        Number of output channels for each unit.
			
 
				+    init_block_channels : int
			
 
				+        Number of output channels for the initial unit.
			
 
				+    final_block_channels : int
			
 
				+        Number of output channels for the final block of the feature extractor.
			
 
				+    exp_kernel_counts : list of list of int
			
 
				+        Expansion convolution kernel count for each unit.
			
 
				+    conv1_kernel_counts : list of list of int
			
 
				+        Conv1 kernel count for each unit.
			
 
				+    conv2_kernel_counts : list of list of int
			
 
				+        Conv2 kernel count for each unit.
			
 
				+    exp_factors : list of list of int
			
 
				+        Expansion factor for each unit.
			
 
				+    se_factors : list of list of int
			
 
				+        SE reduction factor for each unit.
			
 
				+    in_channels : int, default 3
			
 
				+        Number of input channels.
			
 
				+    in_size : tuple of two ints, default (224, 224)
			
 
				+        Spatial size of the expected input image.
			
 
				+    class_num : int, default 1000
			
 
				+        Number of classification classes.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 channels,
			
 
				+                 init_block_channels,
			
 
				+                 final_block_channels,
			
 
				+                 exp_kernel_counts,
			
 
				+                 conv1_kernel_counts,
			
 
				+                 conv2_kernel_counts,
			
 
				+                 exp_factors,
			
 
				+                 se_factors,
			
 
				+                 in_channels=3,
			
 
				+                 in_size=(224, 224),
			
 
				+                 class_num=1000):
			
 
				+        super(MixNet, self).__init__()
			
 
				+        self.in_size = in_size
			
 
				+        self.class_num = class_num
			
 
				+
			
 
				+        self.features = nn.Sequential()
			
 
				+        self.features.add_sublayer(
			
 
				+            "init_block",
			
 
				+            MixInitBlock(
			
 
				+                in_channels=in_channels, out_channels=init_block_channels))
			
 
				+        in_channels = init_block_channels
			
 
				+        for i, channels_per_stage in enumerate(channels):
			
 
				+            stage = nn.Sequential()
			
 
				+            for j, out_channels in enumerate(channels_per_stage):
			
 
				+                stride = 2 if ((j == 0) and (i != 3)) or (
			
 
				+                    (j == len(channels_per_stage) // 2) and (i == 3)) else 1
			
 
				+                exp_kernel_count = exp_kernel_counts[i][j]
			
 
				+                conv1_kernel_count = conv1_kernel_counts[i][j]
			
 
				+                conv2_kernel_count = conv2_kernel_counts[i][j]
			
 
				+                exp_factor = exp_factors[i][j]
			
 
				+                se_factor = se_factors[i][j]
			
 
				+                activation = "relu" if i == 0 else "swish"
			
 
				+                stage.add_sublayer(
			
 
				+                    "unit{}".format(j + 1),
			
 
				+                    MixUnit(
			
 
				+                        in_channels=in_channels,
			
 
				+                        out_channels=out_channels,
			
 
				+                        stride=stride,
			
 
				+                        exp_kernel_count=exp_kernel_count,
			
 
				+                        conv1_kernel_count=conv1_kernel_count,
			
 
				+                        conv2_kernel_count=conv2_kernel_count,
			
 
				+                        exp_factor=exp_factor,
			
 
				+                        se_factor=se_factor,
			
 
				+                        activation=activation))
			
 
				+                in_channels = out_channels
			
 
				+            self.features.add_sublayer("stage{}".format(i + 1), stage)
			
 
				+        self.features.add_sublayer(
			
 
				+            "final_block",
			
 
				+            ConvBlock(
			
 
				+                in_channels=in_channels,
			
 
				+                out_channels=final_block_channels,
			
 
				+                kernel_size=1,
			
 
				+                stride=1,
			
 
				+                padding=0,
			
 
				+                groups=1,
			
 
				+                bias=False,
			
 
				+                use_bn=True,
			
 
				+                bn_eps=1e-5,
			
 
				+                activation=nn.ReLU()))
			
 
				+        in_channels = final_block_channels
			
 
				+        self.features.add_sublayer(
			
 
				+            "final_pool", nn.AvgPool2D(
			
 
				+                kernel_size=7, stride=1))
			
 
				+
			
 
				+        self.output = nn.Linear(
			
 
				+            in_features=in_channels, out_features=class_num)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.features(x)
			
 
				+        reshape_dim = reduce(lambda x, y: x * y, x.shape[1:])
			
 
				+        x = x.reshape(shape=[x.shape[0], reshape_dim])
			
 
				+        x = self.output(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def get_mixnet(version, width_scale, model_name=None, **kwargs):
			
 
				+    """
			
 
				+    Create MixNet model with specific parameters.
			
 
				+
			
 
				+    Parameters:
			
 
				+    ----------
			
 
				+    version : str
			
 
				+        Version of MobileNetV3 ('s' or 'm').
			
 
				+    width_scale : float
			
 
				+        Scale factor for width of layers.
			
 
				+    model_name : str or None, default None
			
 
				+        Model name.
			
 
				+    """
			
 
				+
			
 
				+    if version == "s":
			
 
				+        init_block_channels = 16
			
 
				+        channels = [[24, 24], [40, 40, 40, 40], [80, 80, 80],
			
 
				+                    [120, 120, 120, 200, 200, 200]]
			
 
				+        exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 1, 1],
			
 
				+                             [2, 2, 2, 1, 1, 1]]
			
 
				+        conv1_kernel_counts = [[1, 1], [3, 2, 2, 2], [3, 2, 2],
			
 
				+                               [3, 4, 4, 5, 4, 4]]
			
 
				+        conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [2, 2, 2],
			
 
				+                               [2, 2, 2, 1, 2, 2]]
			
 
				+        exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6], [6, 3, 3, 6, 6, 6]]
			
 
				+        se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4], [2, 2, 2, 2, 2, 2]]
			
 
				+    elif version == "m":
			
 
				+        init_block_channels = 24
			
 
				+        channels = [[32, 32], [40, 40, 40, 40], [80, 80, 80, 80],
			
 
				+                    [120, 120, 120, 120, 200, 200, 200, 200]]
			
 
				+        exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2],
			
 
				+                             [1, 2, 2, 2, 1, 1, 1, 1]]
			
 
				+        conv1_kernel_counts = [[3, 1], [4, 2, 2, 2], [3, 4, 4, 4],
			
 
				+                               [1, 4, 4, 4, 4, 4, 4, 4]]
			
 
				+        conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2],
			
 
				+                               [1, 2, 2, 2, 1, 2, 2, 2]]
			
 
				+        exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6, 6],
			
 
				+                       [6, 3, 3, 3, 6, 6, 6, 6]]
			
 
				+        se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4, 4],
			
 
				+                      [2, 2, 2, 2, 2, 2, 2, 2]]
			
 
				+    else:
			
 
				+        raise ValueError("Unsupported MixNet version {}".format(version))
			
 
				+
			
 
				+    final_block_channels = 1536
			
 
				+
			
 
				+    if width_scale != 1.0:
			
 
				+        channels = [[round_channels(cij * width_scale) for cij in ci]
			
 
				+                    for ci in channels]
			
 
				+        init_block_channels = round_channels(init_block_channels * width_scale)
			
 
				+
			
 
				+    net = MixNet(
			
 
				+        channels=channels,
			
 
				+        init_block_channels=init_block_channels,
			
 
				+        final_block_channels=final_block_channels,
			
 
				+        exp_kernel_counts=exp_kernel_counts,
			
 
				+        conv1_kernel_counts=conv1_kernel_counts,
			
 
				+        conv2_kernel_counts=conv2_kernel_counts,
			
 
				+        exp_factors=exp_factors,
			
 
				+        se_factors=se_factors,
			
 
				+        **kwargs)
			
 
				+
			
 
				+    return net
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def MixNet_S(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
			
 
				+    https://arxiv.org/abs/1907.09595.
			
 
				+    """
			
 
				+    model = get_mixnet(
			
 
				+        version="s", width_scale=1.0, model_name="MixNet_S", **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MixNet_M(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MixNet-M model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
			
 
				+    https://arxiv.org/abs/1907.09595.
			
 
				+    """
			
 
				+    model = get_mixnet(
			
 
				+        version="m", width_scale=1.0, model_name="MixNet_M", **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MixNet_L(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    """
			
 
				+    MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
			
 
				+    https://arxiv.org/abs/1907.09595.
			
 
				+    """
			
 
				+    model = get_mixnet(
			
 
				+        version="m", width_scale=1.3, model_name="MixNet_L", **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
@@ -0,0 +1,287 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "MobileNetV2_x0_25":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams",
			
 
				+    "MobileNetV2_x0_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams",
			
 
				+    "MobileNetV2_x0_75":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
			
 
				+    "MobileNetV2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
			
 
				+    "MobileNetV2_x1_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
			
 
				+    "MobileNetV2_x2_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 filter_size,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 channels=None,
			
 
				+                 num_groups=1,
			
 
				+                 name=None,
			
 
				+                 use_cudnn=True):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=num_groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            param_attr=ParamAttr(name=name + "_bn_scale"),
			
 
				+            bias_attr=ParamAttr(name=name + "_bn_offset"),
			
 
				+            moving_mean_name=name + "_bn_mean",
			
 
				+            moving_variance_name=name + "_bn_variance")
			
 
				+
			
 
				+    def forward(self, inputs, if_act=True):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        if if_act:
			
 
				+            y = F.relu6(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class InvertedResidualUnit(nn.Layer):
			
 
				+    def __init__(self, num_channels, num_in_filter, num_filters, stride,
			
 
				+                 filter_size, padding, expansion_factor, name):
			
 
				+        super(InvertedResidualUnit, self).__init__()
			
 
				+        num_expfilter = int(round(num_in_filter * expansion_factor))
			
 
				+        self._expand_conv = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_expfilter,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            num_groups=1,
			
 
				+            name=name + "_expand")
			
 
				+
			
 
				+        self._bottleneck_conv = ConvBNLayer(
			
 
				+            num_channels=num_expfilter,
			
 
				+            num_filters=num_expfilter,
			
 
				+            filter_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            num_groups=num_expfilter,
			
 
				+            use_cudnn=False,
			
 
				+            name=name + "_dwise")
			
 
				+
			
 
				+        self._linear_conv = ConvBNLayer(
			
 
				+            num_channels=num_expfilter,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            num_groups=1,
			
 
				+            name=name + "_linear")
			
 
				+
			
 
				+    def forward(self, inputs, ifshortcut):
			
 
				+        y = self._expand_conv(inputs, if_act=True)
			
 
				+        y = self._bottleneck_conv(y, if_act=True)
			
 
				+        y = self._linear_conv(y, if_act=False)
			
 
				+        if ifshortcut:
			
 
				+            y = paddle.add(inputs, y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class InvresiBlocks(nn.Layer):
			
 
				+    def __init__(self, in_c, t, c, n, s, name):
			
 
				+        super(InvresiBlocks, self).__init__()
			
 
				+
			
 
				+        self._first_block = InvertedResidualUnit(
			
 
				+            num_channels=in_c,
			
 
				+            num_in_filter=in_c,
			
 
				+            num_filters=c,
			
 
				+            stride=s,
			
 
				+            filter_size=3,
			
 
				+            padding=1,
			
 
				+            expansion_factor=t,
			
 
				+            name=name + "_1")
			
 
				+
			
 
				+        self._block_list = []
			
 
				+        for i in range(1, n):
			
 
				+            block = self.add_sublayer(
			
 
				+                name + "_" + str(i + 1),
			
 
				+                sublayer=InvertedResidualUnit(
			
 
				+                    num_channels=c,
			
 
				+                    num_in_filter=c,
			
 
				+                    num_filters=c,
			
 
				+                    stride=1,
			
 
				+                    filter_size=3,
			
 
				+                    padding=1,
			
 
				+                    expansion_factor=t,
			
 
				+                    name=name + "_" + str(i + 1)))
			
 
				+            self._block_list.append(block)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._first_block(inputs, ifshortcut=False)
			
 
				+        for block in self._block_list:
			
 
				+            y = block(y, ifshortcut=True)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class MobileNet(nn.Layer):
			
 
				+    def __init__(self, class_num=1000, scale=1.0, prefix_name=""):
			
 
				+        super(MobileNet, self).__init__()
			
 
				+        self.scale = scale
			
 
				+        self.class_num = class_num
			
 
				+
			
 
				+        bottleneck_params_list = [
			
 
				+            (1, 16, 1, 1),
			
 
				+            (6, 24, 2, 2),
			
 
				+            (6, 32, 3, 2),
			
 
				+            (6, 64, 4, 2),
			
 
				+            (6, 96, 3, 1),
			
 
				+            (6, 160, 3, 2),
			
 
				+            (6, 320, 1, 1),
			
 
				+        ]
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=int(32 * scale),
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            name=prefix_name + "conv1_1")
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        i = 1
			
 
				+        in_c = int(32 * scale)
			
 
				+        for layer_setting in bottleneck_params_list:
			
 
				+            t, c, n, s = layer_setting
			
 
				+            i += 1
			
 
				+            block = self.add_sublayer(
			
 
				+                prefix_name + "conv" + str(i),
			
 
				+                sublayer=InvresiBlocks(
			
 
				+                    in_c=in_c,
			
 
				+                    t=t,
			
 
				+                    c=int(c * scale),
			
 
				+                    n=n,
			
 
				+                    s=s,
			
 
				+                    name=prefix_name + "conv" + str(i)))
			
 
				+            self.block_list.append(block)
			
 
				+            in_c = int(c * scale)
			
 
				+
			
 
				+        self.out_c = int(1280 * scale) if scale > 1.0 else 1280
			
 
				+        self.conv9 = ConvBNLayer(
			
 
				+            num_channels=in_c,
			
 
				+            num_filters=self.out_c,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            name=prefix_name + "conv9")
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.out_c,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(name=prefix_name + "fc10_weights"),
			
 
				+            bias_attr=ParamAttr(name=prefix_name + "fc10_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv1(inputs, if_act=True)
			
 
				+        for block in self.block_list:
			
 
				+            y = block(y)
			
 
				+        y = self.conv9(y, if_act=True)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = MobileNet(scale=0.25, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = MobileNet(scale=0.5, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = MobileNet(scale=0.75, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV2(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = MobileNet(scale=1.0, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = MobileNet(scale=1.5, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = MobileNet(scale=2.0, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/pvt_v2.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/pvt_v2.py
@@ -0,0 +1,492 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was heavily based on https://github.com/whai362/PVT
			
 
				+
			
 
				+from functools import partial
			
 
				+import math
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn.initializer import TruncatedNormal, Constant
			
 
				+
			
 
				+from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPath, Identity, drop_path
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "PVT_V2_B0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B0_pretrained.pdparams",
			
 
				+    "PVT_V2_B1":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B1_pretrained.pdparams",
			
 
				+    "PVT_V2_B2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_pretrained.pdparams",
			
 
				+    "PVT_V2_B2_Linear":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_Linear_pretrained.pdparams",
			
 
				+    "PVT_V2_B3":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B3_pretrained.pdparams",
			
 
				+    "PVT_V2_B4":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B4_pretrained.pdparams",
			
 
				+    "PVT_V2_B5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B5_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+@paddle.jit.not_to_static
			
 
				+def swapdim(x, dim1, dim2):
			
 
				+    a = list(range(len(x.shape)))
			
 
				+    a[dim1], a[dim2] = a[dim2], a[dim1]
			
 
				+    return x.transpose(a)
			
 
				+
			
 
				+
			
 
				+class Mlp(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_features,
			
 
				+                 hidden_features=None,
			
 
				+                 out_features=None,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 drop=0.,
			
 
				+                 linear=False):
			
 
				+        super().__init__()
			
 
				+        out_features = out_features or in_features
			
 
				+        hidden_features = hidden_features or in_features
			
 
				+        self.fc1 = nn.Linear(in_features, hidden_features)
			
 
				+        self.dwconv = DWConv(hidden_features)
			
 
				+        self.act = act_layer()
			
 
				+        self.fc2 = nn.Linear(hidden_features, out_features)
			
 
				+        self.drop = nn.Dropout(drop)
			
 
				+        self.linear = linear
			
 
				+        if self.linear:
			
 
				+            self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        x = self.fc1(x)
			
 
				+        if self.linear:
			
 
				+            x = self.relu(x)
			
 
				+        x = self.dwconv(x, H, W)
			
 
				+        x = self.act(x)
			
 
				+        x = self.drop(x)
			
 
				+        x = self.fc2(x)
			
 
				+        x = self.drop(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Attention(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads=8,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 attn_drop=0.,
			
 
				+                 proj_drop=0.,
			
 
				+                 sr_ratio=1,
			
 
				+                 linear=False):
			
 
				+        super().__init__()
			
 
				+        assert dim % num_heads == 0
			
 
				+
			
 
				+        self.dim = dim
			
 
				+        self.num_heads = num_heads
			
 
				+        head_dim = dim // num_heads
			
 
				+        self.scale = qk_scale or head_dim**-0.5
			
 
				+
			
 
				+        self.q = nn.Linear(dim, dim, bias_attr=qkv_bias)
			
 
				+        self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias)
			
 
				+        self.attn_drop = nn.Dropout(attn_drop)
			
 
				+        self.proj = nn.Linear(dim, dim)
			
 
				+        self.proj_drop = nn.Dropout(proj_drop)
			
 
				+
			
 
				+        self.linear = linear
			
 
				+        self.sr_ratio = sr_ratio
			
 
				+        if not linear:
			
 
				+            if sr_ratio > 1:
			
 
				+                self.sr = nn.Conv2D(
			
 
				+                    dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
			
 
				+                self.norm = nn.LayerNorm(dim)
			
 
				+        else:
			
 
				+            self.pool = nn.AdaptiveAvgPool2D(7)
			
 
				+            self.sr = nn.Conv2D(dim, dim, kernel_size=1, stride=1)
			
 
				+            self.norm = nn.LayerNorm(dim)
			
 
				+            self.act = nn.GELU()
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        B, N, C = x.shape
			
 
				+        q = self.q(x).reshape(
			
 
				+            [B, N, self.num_heads, C // self.num_heads]).transpose(
			
 
				+                [0, 2, 1, 3])
			
 
				+
			
 
				+        if not self.linear:
			
 
				+            if self.sr_ratio > 1:
			
 
				+                x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
			
 
				+                x_ = self.sr(x_)
			
 
				+                h_, w_ = x_.shape[-2:]
			
 
				+                x_ = x_.reshape([B, C, h_ * w_]).transpose([0, 2, 1])
			
 
				+                x_ = self.norm(x_)
			
 
				+                kv = self.kv(x_)
			
 
				+                kv = kv.reshape([
			
 
				+                    B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
			
 
				+                    C // self.num_heads
			
 
				+                ]).transpose([2, 0, 3, 1, 4])
			
 
				+            else:
			
 
				+                kv = self.kv(x)
			
 
				+                kv = kv.reshape([
			
 
				+                    B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
			
 
				+                    C // self.num_heads
			
 
				+                ]).transpose([2, 0, 3, 1, 4])
			
 
				+        else:
			
 
				+            x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
			
 
				+            x_ = self.sr(self.pool(x_))
			
 
				+            x_ = x_.reshape([B, C, x_.shape[2] * x_.shape[3]]).transpose(
			
 
				+                [0, 2, 1])
			
 
				+            x_ = self.norm(x_)
			
 
				+            x_ = self.act(x_)
			
 
				+            kv = self.kv(x_)
			
 
				+            kv = kv.reshape([
			
 
				+                B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
			
 
				+                C // self.num_heads
			
 
				+            ]).transpose([2, 0, 3, 1, 4])
			
 
				+        k, v = kv[0], kv[1]
			
 
				+
			
 
				+        attn = (q @swapdim(k, -2, -1)) * self.scale
			
 
				+        attn = F.softmax(attn, axis=-1)
			
 
				+        attn = self.attn_drop(attn)
			
 
				+
			
 
				+        x = swapdim((attn @v), 1, 2).reshape([B, N, C])
			
 
				+        x = self.proj(x)
			
 
				+        x = self.proj_drop(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Block(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop=0.,
			
 
				+                 attn_drop=0.,
			
 
				+                 drop_path=0.,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 sr_ratio=1,
			
 
				+                 linear=False):
			
 
				+        super().__init__()
			
 
				+        self.norm1 = norm_layer(dim)
			
 
				+        self.attn = Attention(
			
 
				+            dim,
			
 
				+            num_heads=num_heads,
			
 
				+            qkv_bias=qkv_bias,
			
 
				+            qk_scale=qk_scale,
			
 
				+            attn_drop=attn_drop,
			
 
				+            proj_drop=drop,
			
 
				+            sr_ratio=sr_ratio,
			
 
				+            linear=linear)
			
 
				+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
			
 
				+        self.norm2 = norm_layer(dim)
			
 
				+        mlp_hidden_dim = int(dim * mlp_ratio)
			
 
				+        self.mlp = Mlp(in_features=dim,
			
 
				+                       hidden_features=mlp_hidden_dim,
			
 
				+                       act_layer=act_layer,
			
 
				+                       drop=drop,
			
 
				+                       linear=linear)
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
			
 
				+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class OverlapPatchEmbed(nn.Layer):
			
 
				+    """ Image to Patch Embedding
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=7,
			
 
				+                 stride=4,
			
 
				+                 in_chans=3,
			
 
				+                 embed_dim=768):
			
 
				+        super().__init__()
			
 
				+        img_size = to_2tuple(img_size)
			
 
				+        patch_size = to_2tuple(patch_size)
			
 
				+
			
 
				+        self.img_size = img_size
			
 
				+        self.patch_size = patch_size
			
 
				+        self.H, self.W = img_size[0] // patch_size[0], img_size[
			
 
				+            1] // patch_size[1]
			
 
				+        self.num_patches = self.H * self.W
			
 
				+        self.proj = nn.Conv2D(
			
 
				+            in_chans,
			
 
				+            embed_dim,
			
 
				+            kernel_size=patch_size,
			
 
				+            stride=stride,
			
 
				+            padding=(patch_size[0] // 2, patch_size[1] // 2))
			
 
				+        self.norm = nn.LayerNorm(embed_dim)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.proj(x)
			
 
				+        _, _, H, W = x.shape
			
 
				+        x = x.flatten(2)
			
 
				+        x = swapdim(x, 1, 2)
			
 
				+        x = self.norm(x)
			
 
				+
			
 
				+        return x, H, W
			
 
				+
			
 
				+
			
 
				+class PyramidVisionTransformerV2(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=16,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dims=[64, 128, 256, 512],
			
 
				+                 num_heads=[1, 2, 4, 8],
			
 
				+                 mlp_ratios=[4, 4, 4, 4],
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop_rate=0.,
			
 
				+                 attn_drop_rate=0.,
			
 
				+                 drop_path_rate=0.,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 depths=[3, 4, 6, 3],
			
 
				+                 sr_ratios=[8, 4, 2, 1],
			
 
				+                 num_stages=4,
			
 
				+                 linear=False):
			
 
				+        super().__init__()
			
 
				+        self.class_num = class_num
			
 
				+        self.depths = depths
			
 
				+        self.num_stages = num_stages
			
 
				+
			
 
				+        dpr = [x for x in paddle.linspace(0, drop_path_rate, sum(depths))
			
 
				+               ]  # stochastic depth decay rule
			
 
				+        cur = 0
			
 
				+
			
 
				+        for i in range(num_stages):
			
 
				+            patch_embed = OverlapPatchEmbed(
			
 
				+                img_size=img_size if i == 0 else img_size // (2**(i + 1)),
			
 
				+                patch_size=7 if i == 0 else 3,
			
 
				+                stride=4 if i == 0 else 2,
			
 
				+                in_chans=in_chans if i == 0 else embed_dims[i - 1],
			
 
				+                embed_dim=embed_dims[i])
			
 
				+
			
 
				+            block = nn.LayerList([
			
 
				+                Block(
			
 
				+                    dim=embed_dims[i],
			
 
				+                    num_heads=num_heads[i],
			
 
				+                    mlp_ratio=mlp_ratios[i],
			
 
				+                    qkv_bias=qkv_bias,
			
 
				+                    qk_scale=qk_scale,
			
 
				+                    drop=drop_rate,
			
 
				+                    attn_drop=attn_drop_rate,
			
 
				+                    drop_path=dpr[cur + j],
			
 
				+                    norm_layer=norm_layer,
			
 
				+                    sr_ratio=sr_ratios[i],
			
 
				+                    linear=linear) for j in range(depths[i])
			
 
				+            ])
			
 
				+            norm = norm_layer(embed_dims[i])
			
 
				+            cur += depths[i]
			
 
				+
			
 
				+            setattr(self, f"patch_embed{i + 1}", patch_embed)
			
 
				+            setattr(self, f"block{i + 1}", block)
			
 
				+            setattr(self, f"norm{i + 1}", norm)
			
 
				+
			
 
				+        # classification head
			
 
				+        self.head = nn.Linear(embed_dims[3],
			
 
				+                              class_num) if class_num > 0 else Identity()
			
 
				+
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+    def _init_weights(self, m):
			
 
				+        if isinstance(m, nn.Linear):
			
 
				+            trunc_normal_(m.weight)
			
 
				+            if isinstance(m, nn.Linear) and m.bias is not None:
			
 
				+                zeros_(m.bias)
			
 
				+        elif isinstance(m, nn.LayerNorm):
			
 
				+            zeros_(m.bias)
			
 
				+            ones_(m.weight)
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        B = x.shape[0]
			
 
				+
			
 
				+        for i in range(self.num_stages):
			
 
				+            patch_embed = getattr(self, f"patch_embed{i + 1}")
			
 
				+            block = getattr(self, f"block{i + 1}")
			
 
				+            norm = getattr(self, f"norm{i + 1}")
			
 
				+            x, H, W = patch_embed(x)
			
 
				+            for blk in block:
			
 
				+                x = blk(x, H, W)
			
 
				+            x = norm(x)
			
 
				+            if i != self.num_stages - 1:
			
 
				+                x = x.reshape([B, H, W, x.shape[2]]).transpose([0, 3, 1, 2])
			
 
				+
			
 
				+        return x.mean(axis=1)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.forward_features(x)
			
 
				+        x = self.head(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class DWConv(nn.Layer):
			
 
				+    def __init__(self, dim=768):
			
 
				+        super().__init__()
			
 
				+        self.dwconv = nn.Conv2D(dim, dim, 3, 1, 1, bias_attr=True, groups=dim)
			
 
				+
			
 
				+    def forward(self, x, H, W):
			
 
				+        B, N, C = x.shape
			
 
				+        x = swapdim(x, 1, 2)
			
 
				+        x = x.reshape([B, C, H, W])
			
 
				+        x = self.dwconv(x)
			
 
				+        x = x.flatten(2)
			
 
				+        x = swapdim(x, 1, 2)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def PVT_V2_B0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = PyramidVisionTransformerV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[32, 64, 160, 256],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[2, 2, 2, 2],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["PVT_V2_B0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PVT_V2_B1(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = PyramidVisionTransformerV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[2, 2, 2, 2],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["PVT_V2_B1"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PVT_V2_B2(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = PyramidVisionTransformerV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[3, 4, 6, 3],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["PVT_V2_B2"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PVT_V2_B3(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = PyramidVisionTransformerV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[3, 4, 18, 3],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["PVT_V2_B3"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PVT_V2_B4(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = PyramidVisionTransformerV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[3, 8, 27, 3],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["PVT_V2_B4"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PVT_V2_B5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = PyramidVisionTransformerV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[4, 4, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[3, 6, 40, 3],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["PVT_V2_B5"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def PVT_V2_B2_Linear(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = PyramidVisionTransformerV2(
			
 
				+        patch_size=4,
			
 
				+        embed_dims=[64, 128, 320, 512],
			
 
				+        num_heads=[1, 2, 5, 8],
			
 
				+        mlp_ratios=[8, 8, 4, 4],
			
 
				+        qkv_bias=True,
			
 
				+        norm_layer=partial(
			
 
				+            nn.LayerNorm, epsilon=1e-6),
			
 
				+        depths=[3, 4, 6, 3],
			
 
				+        sr_ratios=[8, 4, 2, 1],
			
 
				+        linear=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["PVT_V2_B2_Linear"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/rednet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/rednet.py
@@ -0,0 +1,203 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/d-li14/involution
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+from paddle.vision.models import resnet
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "RedNet26":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet26_pretrained.pdparams",
			
 
				+    "RedNet38":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet38_pretrained.pdparams",
			
 
				+    "RedNet50":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet50_pretrained.pdparams",
			
 
				+    "RedNet101":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet101_pretrained.pdparams",
			
 
				+    "RedNet152":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet152_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+
			
 
				+
			
 
				+class Involution(nn.Layer):
			
 
				+    def __init__(self, channels, kernel_size, stride):
			
 
				+        super(Involution, self).__init__()
			
 
				+        self.kernel_size = kernel_size
			
 
				+        self.stride = stride
			
 
				+        self.channels = channels
			
 
				+        reduction_ratio = 4
			
 
				+        self.group_channels = 16
			
 
				+        self.groups = self.channels // self.group_channels
			
 
				+        self.conv1 = nn.Sequential(
			
 
				+            ('conv', nn.Conv2D(
			
 
				+                in_channels=channels,
			
 
				+                out_channels=channels // reduction_ratio,
			
 
				+                kernel_size=1,
			
 
				+                bias_attr=False)),
			
 
				+            ('bn', nn.BatchNorm2D(channels // reduction_ratio)),
			
 
				+            ('activate', nn.ReLU()))
			
 
				+        self.conv2 = nn.Sequential(('conv', nn.Conv2D(
			
 
				+            in_channels=channels // reduction_ratio,
			
 
				+            out_channels=kernel_size**2 * self.groups,
			
 
				+            kernel_size=1,
			
 
				+            stride=1)))
			
 
				+        if stride > 1:
			
 
				+            self.avgpool = nn.AvgPool2D(stride, stride)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        weight = self.conv2(
			
 
				+            self.conv1(x if self.stride == 1 else self.avgpool(x)))
			
 
				+        b, c, h, w = weight.shape
			
 
				+        weight = weight.reshape(
			
 
				+            (b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)
			
 
				+
			
 
				+        out = nn.functional.unfold(x, self.kernel_size, self.stride,
			
 
				+                                   (self.kernel_size - 1) // 2, 1)
			
 
				+        out = out.reshape(
			
 
				+            (b, self.groups, self.group_channels, self.kernel_size**2, h, w))
			
 
				+        out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w))
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(resnet.BottleneckBlock):
			
 
				+    def __init__(self,
			
 
				+                 inplanes,
			
 
				+                 planes,
			
 
				+                 stride=1,
			
 
				+                 downsample=None,
			
 
				+                 groups=1,
			
 
				+                 base_width=64,
			
 
				+                 dilation=1,
			
 
				+                 norm_layer=None):
			
 
				+        super(BottleneckBlock, self).__init__(inplanes, planes, stride,
			
 
				+                                              downsample, groups, base_width,
			
 
				+                                              dilation, norm_layer)
			
 
				+        width = int(planes * (base_width / 64.)) * groups
			
 
				+        self.conv2 = Involution(width, 7, stride)
			
 
				+
			
 
				+
			
 
				+class RedNet(resnet.ResNet):
			
 
				+    def __init__(self, block, depth, class_num=1000, with_pool=True):
			
 
				+        super(RedNet, self).__init__(
			
 
				+            block=block, depth=50, num_classes=class_num, with_pool=with_pool)
			
 
				+        layer_cfg = {
			
 
				+            26: [1, 2, 4, 1],
			
 
				+            38: [2, 3, 5, 2],
			
 
				+            50: [3, 4, 6, 3],
			
 
				+            101: [3, 4, 23, 3],
			
 
				+            152: [3, 8, 36, 3]
			
 
				+        }
			
 
				+        layers = layer_cfg[depth]
			
 
				+
			
 
				+        self.conv1 = None
			
 
				+        self.bn1 = None
			
 
				+        self.relu = None
			
 
				+        self.inplanes = 64
			
 
				+        self.class_num = class_num
			
 
				+        self.stem = nn.Sequential(
			
 
				+            nn.Sequential(
			
 
				+                ('conv', nn.Conv2D(
			
 
				+                    in_channels=3,
			
 
				+                    out_channels=self.inplanes // 2,
			
 
				+                    kernel_size=3,
			
 
				+                    stride=2,
			
 
				+                    padding=1,
			
 
				+                    bias_attr=False)),
			
 
				+                ('bn', nn.BatchNorm2D(self.inplanes // 2)),
			
 
				+                ('activate', nn.ReLU())),
			
 
				+            Involution(self.inplanes // 2, 3, 1),
			
 
				+            nn.BatchNorm2D(self.inplanes // 2),
			
 
				+            nn.ReLU(),
			
 
				+            nn.Sequential(
			
 
				+                ('conv', nn.Conv2D(
			
 
				+                    in_channels=self.inplanes // 2,
			
 
				+                    out_channels=self.inplanes,
			
 
				+                    kernel_size=3,
			
 
				+                    stride=1,
			
 
				+                    padding=1,
			
 
				+                    bias_attr=False)), ('bn', nn.BatchNorm2D(self.inplanes)),
			
 
				+                ('activate', nn.ReLU())))
			
 
				+
			
 
				+        self.layer1 = self._make_layer(block, 64, layers[0])
			
 
				+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
			
 
				+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
			
 
				+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.stem(x)
			
 
				+        x = self.maxpool(x)
			
 
				+
			
 
				+        x = self.layer1(x)
			
 
				+        x = self.layer2(x)
			
 
				+        x = self.layer3(x)
			
 
				+        x = self.layer4(x)
			
 
				+
			
 
				+        if self.with_pool:
			
 
				+            x = self.avgpool(x)
			
 
				+
			
 
				+        if self.class_num > 0:
			
 
				+            x = paddle.flatten(x, 1)
			
 
				+            x = self.fc(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def RedNet26(pretrained=False, **kwargs):
			
 
				+    model = RedNet(BottleneckBlock, 26, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["RedNet26"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RedNet38(pretrained=False, **kwargs):
			
 
				+    model = RedNet(BottleneckBlock, 38, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["RedNet38"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RedNet50(pretrained=False, **kwargs):
			
 
				+    model = RedNet(BottleneckBlock, 50, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["RedNet50"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RedNet101(pretrained=False, **kwargs):
			
 
				+    model = RedNet(BottleneckBlock, 101, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["RedNet101"])
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RedNet152(pretrained=False, **kwargs):
			
 
				+    model = RedNet(BottleneckBlock, 152, **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["RedNet152"])
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/regnet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/regnet.py
@@ -0,0 +1,431 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/facebookresearch/pycls
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "RegNetX_200MF":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams",
			
 
				+    "RegNetX_4GF":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams",
			
 
				+    "RegNetX_32GF":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams",
			
 
				+    "RegNetY_200MF":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams",
			
 
				+    "RegNetY_4GF":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams",
			
 
				+    "RegNetY_32GF":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+def quantize_float(f, q):
			
 
				+    """Converts a float to closest non-zero int divisible by q."""
			
 
				+    return int(round(f / q) * q)
			
 
				+
			
 
				+
			
 
				+def adjust_ws_gs_comp(ws, bms, gs):
			
 
				+    """Adjusts the compatibility of widths and groups."""
			
 
				+    ws_bot = [int(w * b) for w, b in zip(ws, bms)]
			
 
				+    gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
			
 
				+    ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
			
 
				+    ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
			
 
				+    return ws, gs
			
 
				+
			
 
				+
			
 
				+def get_stages_from_blocks(ws, rs):
			
 
				+    """Gets ws/ds of network at each stage from per block values."""
			
 
				+    ts = [
			
 
				+        w != wp or r != rp
			
 
				+        for w, wp, r, rp in zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
			
 
				+    ]
			
 
				+    s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
			
 
				+    s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
			
 
				+    return s_ws, s_ds
			
 
				+
			
 
				+
			
 
				+def generate_regnet(w_a, w_0, w_m, d, q=8):
			
 
				+    """Generates per block ws from RegNet parameters."""
			
 
				+    assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
			
 
				+    ws_cont = np.arange(d) * w_a + w_0
			
 
				+    ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
			
 
				+    ws = w_0 * np.power(w_m, ks)
			
 
				+    ws = np.round(np.divide(ws, q)) * q
			
 
				+    num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
			
 
				+    ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
			
 
				+    return ws, num_stages, max_stage, ws_cont
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 padding=0,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"),
			
 
				+            bias_attr=ParamAttr(name=name + ".conv2d.output.1.b_0"))
			
 
				+        bn_name = name + "_bn"
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + ".output.1.w_0"),
			
 
				+            bias_attr=ParamAttr(bn_name + ".output.1.b_0"),
			
 
				+            moving_mean_name=bn_name + "_mean",
			
 
				+            moving_variance_name=bn_name + "_variance")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 bm,
			
 
				+                 gw,
			
 
				+                 se_on,
			
 
				+                 se_r,
			
 
				+                 shortcut=True,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+
			
 
				+        # Compute the bottleneck width
			
 
				+        w_b = int(round(num_filters * bm))
			
 
				+        # Compute the number of groups
			
 
				+        num_gs = w_b // gw
			
 
				+        self.se_on = se_on
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=w_b,
			
 
				+            filter_size=1,
			
 
				+            padding=0,
			
 
				+            act="relu",
			
 
				+            name=name + "_branch2a")
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=w_b,
			
 
				+            num_filters=w_b,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=num_gs,
			
 
				+            act="relu",
			
 
				+            name=name + "_branch2b")
			
 
				+        if se_on:
			
 
				+            w_se = int(round(num_channels * se_r))
			
 
				+            self.se_block = SELayer(
			
 
				+                num_channels=w_b,
			
 
				+                num_filters=w_b,
			
 
				+                reduction_ratio=w_se,
			
 
				+                name=name + "_branch2se")
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=w_b,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2c")
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters,
			
 
				+                filter_size=1,
			
 
				+                stride=stride,
			
 
				+                name=name + "_branch1")
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+        if self.se_on:
			
 
				+            conv1 = self.se_block(conv1)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+
			
 
				+        y = paddle.add(x=short, y=conv2)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class SELayer(nn.Layer):
			
 
				+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
			
 
				+        super(SELayer, self).__init__()
			
 
				+
			
 
				+        self.pool2d_gap = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self._num_channels = num_channels
			
 
				+
			
 
				+        med_ch = int(num_channels / reduction_ratio)
			
 
				+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
			
 
				+        self.squeeze = Linear(
			
 
				+            num_channels,
			
 
				+            med_ch,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + "_sqz_offset"))
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
			
 
				+        self.excitation = Linear(
			
 
				+            med_ch,
			
 
				+            num_filters,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + "_exc_offset"))
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        pool = self.pool2d_gap(input)
			
 
				+        pool = paddle.reshape(pool, shape=[-1, self._num_channels])
			
 
				+        squeeze = self.squeeze(pool)
			
 
				+        squeeze = F.relu(squeeze)
			
 
				+        excitation = self.excitation(squeeze)
			
 
				+        excitation = F.sigmoid(excitation)
			
 
				+        excitation = paddle.reshape(
			
 
				+            excitation, shape=[-1, self._num_channels, 1, 1])
			
 
				+        out = input * excitation
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class RegNet(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 w_a,
			
 
				+                 w_0,
			
 
				+                 w_m,
			
 
				+                 d,
			
 
				+                 group_w,
			
 
				+                 bot_mul,
			
 
				+                 q=8,
			
 
				+                 se_on=False,
			
 
				+                 class_num=1000):
			
 
				+        super(RegNet, self).__init__()
			
 
				+
			
 
				+        # Generate RegNet ws per block
			
 
				+        b_ws, num_s, max_s, ws_cont = generate_regnet(w_a, w_0, w_m, d, q)
			
 
				+        # Convert to per stage format
			
 
				+        ws, ds = get_stages_from_blocks(b_ws, b_ws)
			
 
				+        # Generate group widths and bot muls
			
 
				+        gws = [group_w for _ in range(num_s)]
			
 
				+        bms = [bot_mul for _ in range(num_s)]
			
 
				+        # Adjust the compatibility of ws and gws
			
 
				+        ws, gws = adjust_ws_gs_comp(ws, bms, gws)
			
 
				+        # Use the same stride for each stage
			
 
				+        ss = [2 for _ in range(num_s)]
			
 
				+        # Use SE for RegNetY
			
 
				+        se_r = 0.25
			
 
				+        # Construct the model
			
 
				+        # Group params by stage
			
 
				+        stage_params = list(zip(ds, ws, ss, bms, gws))
			
 
				+        # Construct the stem
			
 
				+        stem_type = "simple_stem_in"
			
 
				+        stem_w = 32
			
 
				+        block_type = "res_bottleneck_block"
			
 
				+
			
 
				+        self.conv = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=stem_w,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            act="relu",
			
 
				+            name="stem_conv")
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        for block, (d, w_out, stride, bm, gw) in enumerate(stage_params):
			
 
				+            shortcut = False
			
 
				+            for i in range(d):
			
 
				+                num_channels = stem_w if block == i == 0 else in_channels
			
 
				+                # Stride apply to the first block of the stage
			
 
				+                b_stride = stride if i == 0 else 1
			
 
				+                conv_name = "s" + str(block + 1) + "_b" + str(i +
			
 
				+                                                              1)  # chr(97 + i)
			
 
				+                bottleneck_block = self.add_sublayer(
			
 
				+                    conv_name,
			
 
				+                    BottleneckBlock(
			
 
				+                        num_channels=num_channels,
			
 
				+                        num_filters=w_out,
			
 
				+                        stride=b_stride,
			
 
				+                        bm=bm,
			
 
				+                        gw=gw,
			
 
				+                        se_on=se_on,
			
 
				+                        se_r=se_r,
			
 
				+                        shortcut=shortcut,
			
 
				+                        name=conv_name))
			
 
				+                in_channels = w_out
			
 
				+                self.block_list.append(bottleneck_block)
			
 
				+                shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.pool2d_avg_channels = w_out
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
			
 
				+            bias_attr=ParamAttr(name="fc_0.b_0"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv(inputs)
			
 
				+        for block in self.block_list:
			
 
				+            y = block(y)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RegNet(
			
 
				+        w_a=36.44,
			
 
				+        w_0=24,
			
 
				+        w_m=2.49,
			
 
				+        d=13,
			
 
				+        group_w=8,
			
 
				+        bot_mul=1.0,
			
 
				+        q=8,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RegNet(
			
 
				+        w_a=38.65,
			
 
				+        w_0=96,
			
 
				+        w_m=2.43,
			
 
				+        d=23,
			
 
				+        group_w=40,
			
 
				+        bot_mul=1.0,
			
 
				+        q=8,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RegNet(
			
 
				+        w_a=69.86,
			
 
				+        w_0=320,
			
 
				+        w_m=2.0,
			
 
				+        d=23,
			
 
				+        group_w=168,
			
 
				+        bot_mul=1.0,
			
 
				+        q=8,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RegNet(
			
 
				+        w_a=36.44,
			
 
				+        w_0=24,
			
 
				+        w_m=2.49,
			
 
				+        d=13,
			
 
				+        group_w=8,
			
 
				+        bot_mul=1.0,
			
 
				+        q=8,
			
 
				+        se_on=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RegNet(
			
 
				+        w_a=31.41,
			
 
				+        w_0=96,
			
 
				+        w_m=2.24,
			
 
				+        d=22,
			
 
				+        group_w=64,
			
 
				+        bot_mul=1.0,
			
 
				+        q=8,
			
 
				+        se_on=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RegNet(
			
 
				+        w_a=115.89,
			
 
				+        w_0=232,
			
 
				+        w_m=2.53,
			
 
				+        d=20,
			
 
				+        group_w=232,
			
 
				+        bot_mul=1.0,
			
 
				+        q=8,
			
 
				+        se_on=True,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/repvgg.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/repvgg.py
@@ -0,0 +1,422 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/DingXiaoH/RepVGG
			
 
				+
			
 
				+import paddle.nn as nn
			
 
				+import paddle
			
 
				+import numpy as np
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "RepVGG_A0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams",
			
 
				+    "RepVGG_A1":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams",
			
 
				+    "RepVGG_A2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams",
			
 
				+    "RepVGG_B0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams",
			
 
				+    "RepVGG_B1":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams",
			
 
				+    "RepVGG_B2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams",
			
 
				+    "RepVGG_B3":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams",
			
 
				+    "RepVGG_B1g2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams",
			
 
				+    "RepVGG_B1g4":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams",
			
 
				+    "RepVGG_B2g2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g2_pretrained.pdparams",
			
 
				+    "RepVGG_B2g4":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams",
			
 
				+    "RepVGG_B3g2":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g2_pretrained.pdparams",
			
 
				+    "RepVGG_B3g4":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
			
 
				+g2_map = {l: 2 for l in optional_groupwise_layers}
			
 
				+g4_map = {l: 4 for l in optional_groupwise_layers}
			
 
				+
			
 
				+
			
 
				+class ConvBN(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride,
			
 
				+                 padding,
			
 
				+                 groups=1):
			
 
				+        super(ConvBN, self).__init__()
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            bias_attr=False)
			
 
				+        self.bn = nn.BatchNorm2D(num_features=out_channels)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        y = self.conv(x)
			
 
				+        y = self.bn(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class RepVGGBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size,
			
 
				+                 stride=1,
			
 
				+                 padding=0,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 padding_mode='zeros'):
			
 
				+        super(RepVGGBlock, self).__init__()
			
 
				+        self.in_channels = in_channels
			
 
				+        self.out_channels = out_channels
			
 
				+        self.kernel_size = kernel_size
			
 
				+        self.stride = stride
			
 
				+        self.padding = padding
			
 
				+        self.dilation = dilation
			
 
				+        self.groups = groups
			
 
				+        self.padding_mode = padding_mode
			
 
				+
			
 
				+        assert kernel_size == 3
			
 
				+        assert padding == 1
			
 
				+
			
 
				+        padding_11 = padding - kernel_size // 2
			
 
				+
			
 
				+        self.nonlinearity = nn.ReLU()
			
 
				+
			
 
				+        self.rbr_identity = nn.BatchNorm2D(
			
 
				+            num_features=in_channels
			
 
				+        ) if out_channels == in_channels and stride == 1 else None
			
 
				+        self.rbr_dense = ConvBN(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups)
			
 
				+        self.rbr_1x1 = ConvBN(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=1,
			
 
				+            stride=stride,
			
 
				+            padding=padding_11,
			
 
				+            groups=groups)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if not self.training:
			
 
				+            return self.nonlinearity(self.rbr_reparam(inputs))
			
 
				+
			
 
				+        if self.rbr_identity is None:
			
 
				+            id_out = 0
			
 
				+        else:
			
 
				+            id_out = self.rbr_identity(inputs)
			
 
				+        return self.nonlinearity(
			
 
				+            self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
			
 
				+
			
 
				+    def eval(self):
			
 
				+        if not hasattr(self, 'rbr_reparam'):
			
 
				+            self.rbr_reparam = nn.Conv2D(
			
 
				+                in_channels=self.in_channels,
			
 
				+                out_channels=self.out_channels,
			
 
				+                kernel_size=self.kernel_size,
			
 
				+                stride=self.stride,
			
 
				+                padding=self.padding,
			
 
				+                dilation=self.dilation,
			
 
				+                groups=self.groups,
			
 
				+                padding_mode=self.padding_mode)
			
 
				+        self.training = False
			
 
				+        kernel, bias = self.get_equivalent_kernel_bias()
			
 
				+        self.rbr_reparam.weight.set_value(kernel)
			
 
				+        self.rbr_reparam.bias.set_value(bias)
			
 
				+        for layer in self.sublayers():
			
 
				+            layer.eval()
			
 
				+
			
 
				+    def get_equivalent_kernel_bias(self):
			
 
				+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
			
 
				+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
			
 
				+        kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
			
 
				+        return kernel3x3 + self._pad_1x1_to_3x3_tensor(
			
 
				+            kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
			
 
				+
			
 
				+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
			
 
				+        if kernel1x1 is None:
			
 
				+            return 0
			
 
				+        else:
			
 
				+            return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
			
 
				+
			
 
				+    def _fuse_bn_tensor(self, branch):
			
 
				+        if branch is None:
			
 
				+            return 0, 0
			
 
				+        if isinstance(branch, ConvBN):
			
 
				+            kernel = branch.conv.weight
			
 
				+            running_mean = branch.bn._mean
			
 
				+            running_var = branch.bn._variance
			
 
				+            gamma = branch.bn.weight
			
 
				+            beta = branch.bn.bias
			
 
				+            eps = branch.bn._epsilon
			
 
				+        else:
			
 
				+            assert isinstance(branch, nn.BatchNorm2D)
			
 
				+            if not hasattr(self, 'id_tensor'):
			
 
				+                input_dim = self.in_channels // self.groups
			
 
				+                kernel_value = np.zeros(
			
 
				+                    (self.in_channels, input_dim, 3, 3), dtype=np.float32)
			
 
				+                for i in range(self.in_channels):
			
 
				+                    kernel_value[i, i % input_dim, 1, 1] = 1
			
 
				+                self.id_tensor = paddle.to_tensor(kernel_value)
			
 
				+            kernel = self.id_tensor
			
 
				+            running_mean = branch._mean
			
 
				+            running_var = branch._variance
			
 
				+            gamma = branch.weight
			
 
				+            beta = branch.bias
			
 
				+            eps = branch._epsilon
			
 
				+        std = (running_var + eps).sqrt()
			
 
				+        t = (gamma / std).reshape((-1, 1, 1, 1))
			
 
				+        return kernel * t, beta - running_mean * gamma / std
			
 
				+
			
 
				+
			
 
				+class RepVGG(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_blocks,
			
 
				+                 width_multiplier=None,
			
 
				+                 override_groups_map=None,
			
 
				+                 class_num=1000):
			
 
				+        super(RepVGG, self).__init__()
			
 
				+
			
 
				+        assert len(width_multiplier) == 4
			
 
				+        self.override_groups_map = override_groups_map or dict()
			
 
				+
			
 
				+        assert 0 not in self.override_groups_map
			
 
				+
			
 
				+        self.in_planes = min(64, int(64 * width_multiplier[0]))
			
 
				+
			
 
				+        self.stage0 = RepVGGBlock(
			
 
				+            in_channels=3,
			
 
				+            out_channels=self.in_planes,
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1)
			
 
				+        self.cur_layer_idx = 1
			
 
				+        self.stage1 = self._make_stage(
			
 
				+            int(64 * width_multiplier[0]), num_blocks[0], stride=2)
			
 
				+        self.stage2 = self._make_stage(
			
 
				+            int(128 * width_multiplier[1]), num_blocks[1], stride=2)
			
 
				+        self.stage3 = self._make_stage(
			
 
				+            int(256 * width_multiplier[2]), num_blocks[2], stride=2)
			
 
				+        self.stage4 = self._make_stage(
			
 
				+            int(512 * width_multiplier[3]), num_blocks[3], stride=2)
			
 
				+        self.gap = nn.AdaptiveAvgPool2D(output_size=1)
			
 
				+        self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num)
			
 
				+
			
 
				+    def _make_stage(self, planes, num_blocks, stride):
			
 
				+        strides = [stride] + [1] * (num_blocks - 1)
			
 
				+        blocks = []
			
 
				+        for stride in strides:
			
 
				+            cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
			
 
				+            blocks.append(
			
 
				+                RepVGGBlock(
			
 
				+                    in_channels=self.in_planes,
			
 
				+                    out_channels=planes,
			
 
				+                    kernel_size=3,
			
 
				+                    stride=stride,
			
 
				+                    padding=1,
			
 
				+                    groups=cur_groups))
			
 
				+            self.in_planes = planes
			
 
				+            self.cur_layer_idx += 1
			
 
				+        return nn.Sequential(*blocks)
			
 
				+
			
 
				+    def eval(self):
			
 
				+        self.training = False
			
 
				+        for layer in self.sublayers():
			
 
				+            layer.training = False
			
 
				+            layer.eval()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        out = self.stage0(x)
			
 
				+        out = self.stage1(out)
			
 
				+        out = self.stage2(out)
			
 
				+        out = self.stage3(out)
			
 
				+        out = self.stage4(out)
			
 
				+        out = self.gap(out)
			
 
				+        out = paddle.flatten(out, start_axis=1)
			
 
				+        out = self.linear(out)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[2, 4, 14, 1],
			
 
				+        width_multiplier=[0.75, 0.75, 0.75, 2.5],
			
 
				+        override_groups_map=None,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_A1(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[2, 4, 14, 1],
			
 
				+        width_multiplier=[1, 1, 1, 2.5],
			
 
				+        override_groups_map=None,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_A2(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[2, 4, 14, 1],
			
 
				+        width_multiplier=[1.5, 1.5, 1.5, 2.75],
			
 
				+        override_groups_map=None,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[1, 1, 1, 2.5],
			
 
				+        override_groups_map=None,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B1(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[2, 2, 2, 4],
			
 
				+        override_groups_map=None,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B1g2(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[2, 2, 2, 4],
			
 
				+        override_groups_map=g2_map,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B1g4(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[2, 2, 2, 4],
			
 
				+        override_groups_map=g4_map,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B2(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[2.5, 2.5, 2.5, 5],
			
 
				+        override_groups_map=None,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B2g2(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[2.5, 2.5, 2.5, 5],
			
 
				+        override_groups_map=g2_map,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B2g2"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[2.5, 2.5, 2.5, 5],
			
 
				+        override_groups_map=g4_map,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B3(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[3, 3, 3, 5],
			
 
				+        override_groups_map=None,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B3g2(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[3, 3, 3, 5],
			
 
				+        override_groups_map=g2_map,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B3g2"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = RepVGG(
			
 
				+        num_blocks=[4, 6, 16, 1],
			
 
				+        width_multiplier=[3, 3, 3, 5],
			
 
				+        override_groups_map=g4_map,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/res2net.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/res2net.py
@@ -0,0 +1,264 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "Res2Net50_26w_4s":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams",
			
 
				+    "Res2Net50_14w_8s":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            num_channels,
			
 
				+            num_filters,
			
 
				+            filter_size,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            act=None,
			
 
				+            name=None, ):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        if name == "conv1":
			
 
				+            bn_name = "bn_" + name
			
 
				+        else:
			
 
				+            bn_name = "bn" + name[3:]
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + '_scale'),
			
 
				+            bias_attr=ParamAttr(bn_name + '_offset'),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels1,
			
 
				+                 num_channels2,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 scales,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+        self.stride = stride
			
 
				+        self.scales = scales
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels1,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2a")
			
 
				+        self.conv1_list = []
			
 
				+        for s in range(scales - 1):
			
 
				+            conv1 = self.add_sublayer(
			
 
				+                name + '_branch2b_' + str(s + 1),
			
 
				+                ConvBNLayer(
			
 
				+                    num_channels=num_filters // scales,
			
 
				+                    num_filters=num_filters // scales,
			
 
				+                    filter_size=3,
			
 
				+                    stride=stride,
			
 
				+                    act='relu',
			
 
				+                    name=name + '_branch2b_' + str(s + 1)))
			
 
				+            self.conv1_list.append(conv1)
			
 
				+        self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1)
			
 
				+
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_channels2,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2c")
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels1,
			
 
				+                num_filters=num_channels2,
			
 
				+                filter_size=1,
			
 
				+                stride=stride,
			
 
				+                name=name + "_branch1")
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        xs = paddle.split(y, self.scales, 1)
			
 
				+        ys = []
			
 
				+        for s, conv1 in enumerate(self.conv1_list):
			
 
				+            if s == 0 or self.stride == 2:
			
 
				+                ys.append(conv1(xs[s]))
			
 
				+            else:
			
 
				+                ys.append(conv1(paddle.add(xs[s], ys[-1])))
			
 
				+        if self.stride == 1:
			
 
				+            ys.append(xs[-1])
			
 
				+        else:
			
 
				+            ys.append(self.pool2d_avg(xs[-1]))
			
 
				+        conv1 = paddle.concat(ys, axis=1)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+        y = paddle.add(x=short, y=conv2)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class Res2Net(nn.Layer):
			
 
				+    def __init__(self, layers=50, scales=4, width=26, class_num=1000):
			
 
				+        super(Res2Net, self).__init__()
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        self.scales = scales
			
 
				+        self.width = width
			
 
				+        basic_width = self.width * self.scales
			
 
				+        supported_layers = [50, 101, 152, 200]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+
			
 
				+        if layers == 50:
			
 
				+            depth = [3, 4, 6, 3]
			
 
				+        elif layers == 101:
			
 
				+            depth = [3, 4, 23, 3]
			
 
				+        elif layers == 152:
			
 
				+            depth = [3, 8, 36, 3]
			
 
				+        elif layers == 200:
			
 
				+            depth = [3, 12, 48, 3]
			
 
				+        num_channels = [64, 256, 512, 1024]
			
 
				+        num_channels2 = [256, 512, 1024, 2048]
			
 
				+        num_filters = [basic_width * t for t in [1, 2, 4, 8]]
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=64,
			
 
				+            filter_size=7,
			
 
				+            stride=2,
			
 
				+            act='relu',
			
 
				+            name="conv1")
			
 
				+        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        for block in range(len(depth)):
			
 
				+            shortcut = False
			
 
				+            for i in range(depth[block]):
			
 
				+                if layers in [101, 152] and block == 2:
			
 
				+                    if i == 0:
			
 
				+                        conv_name = "res" + str(block + 2) + "a"
			
 
				+                    else:
			
 
				+                        conv_name = "res" + str(block + 2) + "b" + str(i)
			
 
				+                else:
			
 
				+                    conv_name = "res" + str(block + 2) + chr(97 + i)
			
 
				+                bottleneck_block = self.add_sublayer(
			
 
				+                    'bb_%d_%d' % (block, i),
			
 
				+                    BottleneckBlock(
			
 
				+                        num_channels1=num_channels[block]
			
 
				+                        if i == 0 else num_channels2[block],
			
 
				+                        num_channels2=num_channels2[block],
			
 
				+                        num_filters=num_filters[block],
			
 
				+                        stride=2 if i == 0 and block != 0 else 1,
			
 
				+                        scales=scales,
			
 
				+                        shortcut=shortcut,
			
 
				+                        if_first=block == i == 0,
			
 
				+                        name=conv_name))
			
 
				+                self.block_list.append(bottleneck_block)
			
 
				+                shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.pool2d_avg_channels = num_channels[-1] * 2
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv1(inputs)
			
 
				+        y = self.pool2d_max(y)
			
 
				+        for block in self.block_list:
			
 
				+            y = block(y)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def Res2Net50_26w_4s(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = Res2Net(layers=50, scales=4, width=26, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def Res2Net50_14w_8s(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = Res2Net(layers=50, scales=8, width=14, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/res2net_vd.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/res2net_vd.py
@@ -0,0 +1,305 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "Res2Net50_vd_26w_4s":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams",
			
 
				+    "Res2Net101_vd_26w_4s":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams",
			
 
				+    "Res2Net200_vd_26w_4s":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            num_channels,
			
 
				+            num_filters,
			
 
				+            filter_size,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            is_vd_mode=False,
			
 
				+            act=None,
			
 
				+            name=None, ):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self.is_vd_mode = is_vd_mode
			
 
				+        self._pool2d_avg = AvgPool2D(
			
 
				+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        if name == "conv1":
			
 
				+            bn_name = "bn_" + name
			
 
				+        else:
			
 
				+            bn_name = "bn" + name[3:]
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + '_scale'),
			
 
				+            bias_attr=ParamAttr(bn_name + '_offset'),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if self.is_vd_mode:
			
 
				+            inputs = self._pool2d_avg(inputs)
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels1,
			
 
				+                 num_channels2,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 scales,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+        self.stride = stride
			
 
				+        self.scales = scales
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels1,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2a")
			
 
				+        self.conv1_list = []
			
 
				+        for s in range(scales - 1):
			
 
				+            conv1 = self.add_sublayer(
			
 
				+                name + '_branch2b_' + str(s + 1),
			
 
				+                ConvBNLayer(
			
 
				+                    num_channels=num_filters // scales,
			
 
				+                    num_filters=num_filters // scales,
			
 
				+                    filter_size=3,
			
 
				+                    stride=stride,
			
 
				+                    act='relu',
			
 
				+                    name=name + '_branch2b_' + str(s + 1)))
			
 
				+            self.conv1_list.append(conv1)
			
 
				+        self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1)
			
 
				+
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_channels2,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2c")
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels1,
			
 
				+                num_filters=num_channels2,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                is_vd_mode=False if if_first else True,
			
 
				+                name=name + "_branch1")
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        xs = paddle.split(y, self.scales, 1)
			
 
				+        ys = []
			
 
				+        for s, conv1 in enumerate(self.conv1_list):
			
 
				+            if s == 0 or self.stride == 2:
			
 
				+                ys.append(conv1(xs[s]))
			
 
				+            else:
			
 
				+                ys.append(conv1(xs[s] + ys[-1]))
			
 
				+        if self.stride == 1:
			
 
				+            ys.append(xs[-1])
			
 
				+        else:
			
 
				+            ys.append(self.pool2d_avg(xs[-1]))
			
 
				+        conv1 = paddle.concat(ys, axis=1)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+        y = paddle.add(x=short, y=conv2)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class Res2Net_vd(nn.Layer):
			
 
				+    def __init__(self, layers=50, scales=4, width=26, class_num=1000):
			
 
				+        super(Res2Net_vd, self).__init__()
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        self.scales = scales
			
 
				+        self.width = width
			
 
				+        basic_width = self.width * self.scales
			
 
				+        supported_layers = [50, 101, 152, 200]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+
			
 
				+        if layers == 50:
			
 
				+            depth = [3, 4, 6, 3]
			
 
				+        elif layers == 101:
			
 
				+            depth = [3, 4, 23, 3]
			
 
				+        elif layers == 152:
			
 
				+            depth = [3, 8, 36, 3]
			
 
				+        elif layers == 200:
			
 
				+            depth = [3, 12, 48, 3]
			
 
				+        num_channels = [64, 256, 512, 1024]
			
 
				+        num_channels2 = [256, 512, 1024, 2048]
			
 
				+        num_filters = [basic_width * t for t in [1, 2, 4, 8]]
			
 
				+
			
 
				+        self.conv1_1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act='relu',
			
 
				+            name="conv1_1")
			
 
				+        self.conv1_2 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_2")
			
 
				+        self.conv1_3 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_3")
			
 
				+        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        for block in range(len(depth)):
			
 
				+            shortcut = False
			
 
				+            for i in range(depth[block]):
			
 
				+                if layers in [101, 152, 200] and block == 2:
			
 
				+                    if i == 0:
			
 
				+                        conv_name = "res" + str(block + 2) + "a"
			
 
				+                    else:
			
 
				+                        conv_name = "res" + str(block + 2) + "b" + str(i)
			
 
				+                else:
			
 
				+                    conv_name = "res" + str(block + 2) + chr(97 + i)
			
 
				+                bottleneck_block = self.add_sublayer(
			
 
				+                    'bb_%d_%d' % (block, i),
			
 
				+                    BottleneckBlock(
			
 
				+                        num_channels1=num_channels[block]
			
 
				+                        if i == 0 else num_channels2[block],
			
 
				+                        num_channels2=num_channels2[block],
			
 
				+                        num_filters=num_filters[block],
			
 
				+                        stride=2 if i == 0 and block != 0 else 1,
			
 
				+                        scales=scales,
			
 
				+                        shortcut=shortcut,
			
 
				+                        if_first=block == i == 0,
			
 
				+                        name=conv_name))
			
 
				+                self.block_list.append(bottleneck_block)
			
 
				+                shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.pool2d_avg_channels = num_channels[-1] * 2
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv1_1(inputs)
			
 
				+        y = self.conv1_2(y)
			
 
				+        y = self.conv1_3(y)
			
 
				+        y = self.pool2d_max(y)
			
 
				+        for block in self.block_list:
			
 
				+            y = block(y)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def Res2Net50_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = Res2Net_vd(layers=50, scales=4, width=26, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["Res2Net50_vd_26w_4s"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def Res2Net101_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = Res2Net_vd(layers=101, scales=4, width=26, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["Res2Net101_vd_26w_4s"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def Res2Net200_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = Res2Net_vd(layers=200, scales=4, width=26, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["Res2Net200_vd_26w_4s"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/resnest.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/resnest.py
@@ -0,0 +1,740 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/zhanghang1989/ResNeSt
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+import math
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle import ParamAttr
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.regularizer import L2Decay
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ResNeSt50_fast_1s1x64d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams",
			
 
				+    "ResNeSt50":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams",
			
 
				+    "ResNeSt101":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        bn_decay = 0.0
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            dilation=dilation,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weight"),
			
 
				+            bias_attr=False)
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(
			
 
				+                name=name + "_scale", regularizer=L2Decay(bn_decay)),
			
 
				+            bias_attr=ParamAttr(
			
 
				+                name + "_offset", regularizer=L2Decay(bn_decay)),
			
 
				+            moving_mean_name=name + "_mean",
			
 
				+            moving_variance_name=name + "_variance")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self._conv(x)
			
 
				+        x = self._batch_norm(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class rSoftmax(nn.Layer):
			
 
				+    def __init__(self, radix, cardinality):
			
 
				+        super(rSoftmax, self).__init__()
			
 
				+        self.radix = radix
			
 
				+        self.cardinality = cardinality
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        cardinality = self.cardinality
			
 
				+        radix = self.radix
			
 
				+
			
 
				+        batch, r, h, w = x.shape
			
 
				+        if self.radix > 1:
			
 
				+            x = paddle.reshape(
			
 
				+                x=x,
			
 
				+                shape=[
			
 
				+                    batch, cardinality, radix,
			
 
				+                    int(r * h * w / cardinality / radix)
			
 
				+                ])
			
 
				+            x = paddle.transpose(x=x, perm=[0, 2, 1, 3])
			
 
				+            x = nn.functional.softmax(x, axis=1)
			
 
				+            x = paddle.reshape(x=x, shape=[batch, r * h * w, 1, 1])
			
 
				+        else:
			
 
				+            x = nn.functional.sigmoid(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class SplatConv(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 channels,
			
 
				+                 kernel_size,
			
 
				+                 stride=1,
			
 
				+                 padding=0,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 bias=True,
			
 
				+                 radix=2,
			
 
				+                 reduction_factor=4,
			
 
				+                 rectify_avg=False,
			
 
				+                 name=None):
			
 
				+        super(SplatConv, self).__init__()
			
 
				+
			
 
				+        self.radix = radix
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=in_channels,
			
 
				+            num_filters=channels * radix,
			
 
				+            filter_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            groups=groups * radix,
			
 
				+            act="relu",
			
 
				+            name=name + "_1_weights")
			
 
				+
			
 
				+        self.avg_pool2d = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        inter_channels = int(max(in_channels * radix // reduction_factor, 32))
			
 
				+
			
 
				+        # to calc gap
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=channels,
			
 
				+            num_filters=inter_channels,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            groups=groups,
			
 
				+            act="relu",
			
 
				+            name=name + "_2_weights")
			
 
				+
			
 
				+        # to calc atten
			
 
				+        self.conv3 = Conv2D(
			
 
				+            in_channels=inter_channels,
			
 
				+            out_channels=channels * radix,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name=name + "_weights", initializer=KaimingNormal()),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self.rsoftmax = rSoftmax(radix=radix, cardinality=groups)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv1(x)
			
 
				+
			
 
				+        if self.radix > 1:
			
 
				+            splited = paddle.split(x, num_or_sections=self.radix, axis=1)
			
 
				+            gap = paddle.add_n(splited)
			
 
				+        else:
			
 
				+            gap = x
			
 
				+
			
 
				+        gap = self.avg_pool2d(gap)
			
 
				+        gap = self.conv2(gap)
			
 
				+
			
 
				+        atten = self.conv3(gap)
			
 
				+        atten = self.rsoftmax(atten)
			
 
				+
			
 
				+        if self.radix > 1:
			
 
				+            attens = paddle.split(atten, num_or_sections=self.radix, axis=1)
			
 
				+            y = paddle.add_n([
			
 
				+                paddle.multiply(split, att)
			
 
				+                for (att, split) in zip(attens, splited)
			
 
				+            ])
			
 
				+        else:
			
 
				+            y = paddle.multiply(x, atten)
			
 
				+
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 inplanes,
			
 
				+                 planes,
			
 
				+                 stride=1,
			
 
				+                 radix=1,
			
 
				+                 cardinality=1,
			
 
				+                 bottleneck_width=64,
			
 
				+                 avd=False,
			
 
				+                 avd_first=False,
			
 
				+                 dilation=1,
			
 
				+                 is_first=False,
			
 
				+                 rectify_avg=False,
			
 
				+                 last_gamma=False,
			
 
				+                 avg_down=False,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+        self.inplanes = inplanes
			
 
				+        self.planes = planes
			
 
				+        self.stride = stride
			
 
				+        self.radix = radix
			
 
				+        self.cardinality = cardinality
			
 
				+        self.avd = avd
			
 
				+        self.avd_first = avd_first
			
 
				+        self.dilation = dilation
			
 
				+        self.is_first = is_first
			
 
				+        self.rectify_avg = rectify_avg
			
 
				+        self.last_gamma = last_gamma
			
 
				+        self.avg_down = avg_down
			
 
				+
			
 
				+        group_width = int(planes * (bottleneck_width / 64.)) * cardinality
			
 
				+
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=self.inplanes,
			
 
				+            num_filters=group_width,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            act="relu",
			
 
				+            name=name + "_conv1")
			
 
				+
			
 
				+        if avd and avd_first and (stride > 1 or is_first):
			
 
				+            self.avg_pool2d_1 = AvgPool2D(
			
 
				+                kernel_size=3, stride=stride, padding=1)
			
 
				+
			
 
				+        if radix >= 1:
			
 
				+            self.conv2 = SplatConv(
			
 
				+                in_channels=group_width,
			
 
				+                channels=group_width,
			
 
				+                kernel_size=3,
			
 
				+                stride=1,
			
 
				+                padding=dilation,
			
 
				+                dilation=dilation,
			
 
				+                groups=cardinality,
			
 
				+                bias=False,
			
 
				+                radix=radix,
			
 
				+                rectify_avg=rectify_avg,
			
 
				+                name=name + "_splat")
			
 
				+        else:
			
 
				+            self.conv2 = ConvBNLayer(
			
 
				+                num_channels=group_width,
			
 
				+                num_filters=group_width,
			
 
				+                filter_size=3,
			
 
				+                stride=1,
			
 
				+                dilation=dilation,
			
 
				+                groups=cardinality,
			
 
				+                act="relu",
			
 
				+                name=name + "_conv2")
			
 
				+
			
 
				+        if avd and avd_first == False and (stride > 1 or is_first):
			
 
				+            self.avg_pool2d_2 = AvgPool2D(
			
 
				+                kernel_size=3, stride=stride, padding=1)
			
 
				+
			
 
				+        self.conv3 = ConvBNLayer(
			
 
				+            num_channels=group_width,
			
 
				+            num_filters=planes * 4,
			
 
				+            filter_size=1,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            act=None,
			
 
				+            name=name + "_conv3")
			
 
				+
			
 
				+        if stride != 1 or self.inplanes != self.planes * 4:
			
 
				+            if avg_down:
			
 
				+                if dilation == 1:
			
 
				+                    self.avg_pool2d_3 = AvgPool2D(
			
 
				+                        kernel_size=stride, stride=stride, padding=0)
			
 
				+                else:
			
 
				+                    self.avg_pool2d_3 = AvgPool2D(
			
 
				+                        kernel_size=1, stride=1, padding=0, ceil_mode=True)
			
 
				+
			
 
				+                self.conv4 = Conv2D(
			
 
				+                    in_channels=self.inplanes,
			
 
				+                    out_channels=planes * 4,
			
 
				+                    kernel_size=1,
			
 
				+                    stride=1,
			
 
				+                    padding=0,
			
 
				+                    groups=1,
			
 
				+                    weight_attr=ParamAttr(
			
 
				+                        name=name + "_weights", initializer=KaimingNormal()),
			
 
				+                    bias_attr=False)
			
 
				+            else:
			
 
				+                self.conv4 = Conv2D(
			
 
				+                    in_channels=self.inplanes,
			
 
				+                    out_channels=planes * 4,
			
 
				+                    kernel_size=1,
			
 
				+                    stride=stride,
			
 
				+                    padding=0,
			
 
				+                    groups=1,
			
 
				+                    weight_attr=ParamAttr(
			
 
				+                        name=name + "_shortcut_weights",
			
 
				+                        initializer=KaimingNormal()),
			
 
				+                    bias_attr=False)
			
 
				+
			
 
				+            bn_decay = 0.0
			
 
				+            self._batch_norm = BatchNorm(
			
 
				+                planes * 4,
			
 
				+                act=None,
			
 
				+                param_attr=ParamAttr(
			
 
				+                    name=name + "_shortcut_scale",
			
 
				+                    regularizer=L2Decay(bn_decay)),
			
 
				+                bias_attr=ParamAttr(
			
 
				+                    name + "_shortcut_offset", regularizer=L2Decay(bn_decay)),
			
 
				+                moving_mean_name=name + "_shortcut_mean",
			
 
				+                moving_variance_name=name + "_shortcut_variance")
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        short = x
			
 
				+
			
 
				+        x = self.conv1(x)
			
 
				+        if self.avd and self.avd_first and (self.stride > 1 or self.is_first):
			
 
				+            x = self.avg_pool2d_1(x)
			
 
				+
			
 
				+        x = self.conv2(x)
			
 
				+
			
 
				+        if self.avd and self.avd_first == False and (self.stride > 1 or
			
 
				+                                                     self.is_first):
			
 
				+            x = self.avg_pool2d_2(x)
			
 
				+
			
 
				+        x = self.conv3(x)
			
 
				+
			
 
				+        if self.stride != 1 or self.inplanes != self.planes * 4:
			
 
				+            if self.avg_down:
			
 
				+                short = self.avg_pool2d_3(short)
			
 
				+
			
 
				+            short = self.conv4(short)
			
 
				+
			
 
				+            short = self._batch_norm(short)
			
 
				+
			
 
				+        y = paddle.add(x=short, y=x)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class ResNeStLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 inplanes,
			
 
				+                 planes,
			
 
				+                 blocks,
			
 
				+                 radix,
			
 
				+                 cardinality,
			
 
				+                 bottleneck_width,
			
 
				+                 avg_down,
			
 
				+                 avd,
			
 
				+                 avd_first,
			
 
				+                 rectify_avg,
			
 
				+                 last_gamma,
			
 
				+                 stride=1,
			
 
				+                 dilation=1,
			
 
				+                 is_first=True,
			
 
				+                 name=None):
			
 
				+        super(ResNeStLayer, self).__init__()
			
 
				+        self.inplanes = inplanes
			
 
				+        self.planes = planes
			
 
				+        self.blocks = blocks
			
 
				+        self.radix = radix
			
 
				+        self.cardinality = cardinality
			
 
				+        self.bottleneck_width = bottleneck_width
			
 
				+        self.avg_down = avg_down
			
 
				+        self.avd = avd
			
 
				+        self.avd_first = avd_first
			
 
				+        self.rectify_avg = rectify_avg
			
 
				+        self.last_gamma = last_gamma
			
 
				+        self.is_first = is_first
			
 
				+
			
 
				+        if dilation == 1 or dilation == 2:
			
 
				+            bottleneck_func = self.add_sublayer(
			
 
				+                name + "_bottleneck_0",
			
 
				+                BottleneckBlock(
			
 
				+                    inplanes=self.inplanes,
			
 
				+                    planes=planes,
			
 
				+                    stride=stride,
			
 
				+                    radix=radix,
			
 
				+                    cardinality=cardinality,
			
 
				+                    bottleneck_width=bottleneck_width,
			
 
				+                    avg_down=self.avg_down,
			
 
				+                    avd=avd,
			
 
				+                    avd_first=avd_first,
			
 
				+                    dilation=1,
			
 
				+                    is_first=is_first,
			
 
				+                    rectify_avg=rectify_avg,
			
 
				+                    last_gamma=last_gamma,
			
 
				+                    name=name + "_bottleneck_0"))
			
 
				+        elif dilation == 4:
			
 
				+            bottleneck_func = self.add_sublayer(
			
 
				+                name + "_bottleneck_0",
			
 
				+                BottleneckBlock(
			
 
				+                    inplanes=self.inplanes,
			
 
				+                    planes=planes,
			
 
				+                    stride=stride,
			
 
				+                    radix=radix,
			
 
				+                    cardinality=cardinality,
			
 
				+                    bottleneck_width=bottleneck_width,
			
 
				+                    avg_down=self.avg_down,
			
 
				+                    avd=avd,
			
 
				+                    avd_first=avd_first,
			
 
				+                    dilation=2,
			
 
				+                    is_first=is_first,
			
 
				+                    rectify_avg=rectify_avg,
			
 
				+                    last_gamma=last_gamma,
			
 
				+                    name=name + "_bottleneck_0"))
			
 
				+        else:
			
 
				+            raise RuntimeError("=>unknown dilation size")
			
 
				+
			
 
				+        self.inplanes = planes * 4
			
 
				+        self.bottleneck_block_list = [bottleneck_func]
			
 
				+        for i in range(1, blocks):
			
 
				+            curr_name = name + "_bottleneck_" + str(i)
			
 
				+
			
 
				+            bottleneck_func = self.add_sublayer(
			
 
				+                curr_name,
			
 
				+                BottleneckBlock(
			
 
				+                    inplanes=self.inplanes,
			
 
				+                    planes=planes,
			
 
				+                    radix=radix,
			
 
				+                    cardinality=cardinality,
			
 
				+                    bottleneck_width=bottleneck_width,
			
 
				+                    avg_down=self.avg_down,
			
 
				+                    avd=avd,
			
 
				+                    avd_first=avd_first,
			
 
				+                    dilation=dilation,
			
 
				+                    rectify_avg=rectify_avg,
			
 
				+                    last_gamma=last_gamma,
			
 
				+                    name=curr_name))
			
 
				+            self.bottleneck_block_list.append(bottleneck_func)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        for bottleneck_block in self.bottleneck_block_list:
			
 
				+            x = bottleneck_block(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ResNeSt(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 layers,
			
 
				+                 radix=1,
			
 
				+                 groups=1,
			
 
				+                 bottleneck_width=64,
			
 
				+                 dilated=False,
			
 
				+                 dilation=1,
			
 
				+                 deep_stem=False,
			
 
				+                 stem_width=64,
			
 
				+                 avg_down=False,
			
 
				+                 rectify_avg=False,
			
 
				+                 avd=False,
			
 
				+                 avd_first=False,
			
 
				+                 final_drop=0.0,
			
 
				+                 last_gamma=False,
			
 
				+                 class_num=1000):
			
 
				+        super(ResNeSt, self).__init__()
			
 
				+
			
 
				+        self.cardinality = groups
			
 
				+        self.bottleneck_width = bottleneck_width
			
 
				+        # ResNet-D params
			
 
				+        self.inplanes = stem_width * 2 if deep_stem else 64
			
 
				+        self.avg_down = avg_down
			
 
				+        self.last_gamma = last_gamma
			
 
				+        # ResNeSt params
			
 
				+        self.radix = radix
			
 
				+        self.avd = avd
			
 
				+        self.avd_first = avd_first
			
 
				+
			
 
				+        self.deep_stem = deep_stem
			
 
				+        self.stem_width = stem_width
			
 
				+        self.layers = layers
			
 
				+        self.final_drop = final_drop
			
 
				+        self.dilated = dilated
			
 
				+        self.dilation = dilation
			
 
				+
			
 
				+        self.rectify_avg = rectify_avg
			
 
				+
			
 
				+        if self.deep_stem:
			
 
				+            self.stem = nn.Sequential(
			
 
				+                ("conv1", ConvBNLayer(
			
 
				+                    num_channels=3,
			
 
				+                    num_filters=stem_width,
			
 
				+                    filter_size=3,
			
 
				+                    stride=2,
			
 
				+                    act="relu",
			
 
				+                    name="conv1")), ("conv2", ConvBNLayer(
			
 
				+                        num_channels=stem_width,
			
 
				+                        num_filters=stem_width,
			
 
				+                        filter_size=3,
			
 
				+                        stride=1,
			
 
				+                        act="relu",
			
 
				+                        name="conv2")), ("conv3", ConvBNLayer(
			
 
				+                            num_channels=stem_width,
			
 
				+                            num_filters=stem_width * 2,
			
 
				+                            filter_size=3,
			
 
				+                            stride=1,
			
 
				+                            act="relu",
			
 
				+                            name="conv3")))
			
 
				+        else:
			
 
				+            self.stem = ConvBNLayer(
			
 
				+                num_channels=3,
			
 
				+                num_filters=stem_width,
			
 
				+                filter_size=7,
			
 
				+                stride=2,
			
 
				+                act="relu",
			
 
				+                name="conv1")
			
 
				+
			
 
				+        self.max_pool2d = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self.layer1 = ResNeStLayer(
			
 
				+            inplanes=self.stem_width * 2
			
 
				+            if self.deep_stem else self.stem_width,
			
 
				+            planes=64,
			
 
				+            blocks=self.layers[0],
			
 
				+            radix=radix,
			
 
				+            cardinality=self.cardinality,
			
 
				+            bottleneck_width=bottleneck_width,
			
 
				+            avg_down=self.avg_down,
			
 
				+            avd=avd,
			
 
				+            avd_first=avd_first,
			
 
				+            rectify_avg=rectify_avg,
			
 
				+            last_gamma=last_gamma,
			
 
				+            stride=1,
			
 
				+            dilation=1,
			
 
				+            is_first=False,
			
 
				+            name="layer1")
			
 
				+
			
 
				+        #         return
			
 
				+
			
 
				+        self.layer2 = ResNeStLayer(
			
 
				+            inplanes=256,
			
 
				+            planes=128,
			
 
				+            blocks=self.layers[1],
			
 
				+            radix=radix,
			
 
				+            cardinality=self.cardinality,
			
 
				+            bottleneck_width=bottleneck_width,
			
 
				+            avg_down=self.avg_down,
			
 
				+            avd=avd,
			
 
				+            avd_first=avd_first,
			
 
				+            rectify_avg=rectify_avg,
			
 
				+            last_gamma=last_gamma,
			
 
				+            stride=2,
			
 
				+            name="layer2")
			
 
				+
			
 
				+        if self.dilated or self.dilation == 4:
			
 
				+            self.layer3 = ResNeStLayer(
			
 
				+                inplanes=512,
			
 
				+                planes=256,
			
 
				+                blocks=self.layers[2],
			
 
				+                radix=radix,
			
 
				+                cardinality=self.cardinality,
			
 
				+                bottleneck_width=bottleneck_width,
			
 
				+                avg_down=self.avg_down,
			
 
				+                avd=avd,
			
 
				+                avd_first=avd_first,
			
 
				+                rectify_avg=rectify_avg,
			
 
				+                last_gamma=last_gamma,
			
 
				+                stride=1,
			
 
				+                dilation=2,
			
 
				+                name="layer3")
			
 
				+            self.layer4 = ResNeStLayer(
			
 
				+                inplanes=1024,
			
 
				+                planes=512,
			
 
				+                blocks=self.layers[3],
			
 
				+                radix=radix,
			
 
				+                cardinality=self.cardinality,
			
 
				+                bottleneck_width=bottleneck_width,
			
 
				+                avg_down=self.avg_down,
			
 
				+                avd=avd,
			
 
				+                avd_first=avd_first,
			
 
				+                rectify_avg=rectify_avg,
			
 
				+                last_gamma=last_gamma,
			
 
				+                stride=1,
			
 
				+                dilation=4,
			
 
				+                name="layer4")
			
 
				+        elif self.dilation == 2:
			
 
				+            self.layer3 = ResNeStLayer(
			
 
				+                inplanes=512,
			
 
				+                planes=256,
			
 
				+                blocks=self.layers[2],
			
 
				+                radix=radix,
			
 
				+                cardinality=self.cardinality,
			
 
				+                bottleneck_width=bottleneck_width,
			
 
				+                avg_down=self.avg_down,
			
 
				+                avd=avd,
			
 
				+                avd_first=avd_first,
			
 
				+                rectify_avg=rectify_avg,
			
 
				+                last_gamma=last_gamma,
			
 
				+                stride=2,
			
 
				+                dilation=1,
			
 
				+                name="layer3")
			
 
				+            self.layer4 = ResNeStLayer(
			
 
				+                inplanes=1024,
			
 
				+                planes=512,
			
 
				+                blocks=self.layers[3],
			
 
				+                radix=radix,
			
 
				+                cardinality=self.cardinality,
			
 
				+                bottleneck_width=bottleneck_width,
			
 
				+                avg_down=self.avg_down,
			
 
				+                avd=avd,
			
 
				+                avd_first=avd_first,
			
 
				+                rectify_avg=rectify_avg,
			
 
				+                last_gamma=last_gamma,
			
 
				+                stride=1,
			
 
				+                dilation=2,
			
 
				+                name="layer4")
			
 
				+        else:
			
 
				+            self.layer3 = ResNeStLayer(
			
 
				+                inplanes=512,
			
 
				+                planes=256,
			
 
				+                blocks=self.layers[2],
			
 
				+                radix=radix,
			
 
				+                cardinality=self.cardinality,
			
 
				+                bottleneck_width=bottleneck_width,
			
 
				+                avg_down=self.avg_down,
			
 
				+                avd=avd,
			
 
				+                avd_first=avd_first,
			
 
				+                rectify_avg=rectify_avg,
			
 
				+                last_gamma=last_gamma,
			
 
				+                stride=2,
			
 
				+                name="layer3")
			
 
				+            self.layer4 = ResNeStLayer(
			
 
				+                inplanes=1024,
			
 
				+                planes=512,
			
 
				+                blocks=self.layers[3],
			
 
				+                radix=radix,
			
 
				+                cardinality=self.cardinality,
			
 
				+                bottleneck_width=bottleneck_width,
			
 
				+                avg_down=self.avg_down,
			
 
				+                avd=avd,
			
 
				+                avd_first=avd_first,
			
 
				+                rectify_avg=rectify_avg,
			
 
				+                last_gamma=last_gamma,
			
 
				+                stride=2,
			
 
				+                name="layer4")
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.out_channels = 2048
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.out_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.out_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=nn.initializer.Uniform(-stdv, stdv),
			
 
				+                name="fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.stem(x)
			
 
				+        x = self.max_pool2d(x)
			
 
				+        x = self.layer1(x)
			
 
				+        x = self.layer2(x)
			
 
				+
			
 
				+        x = self.layer3(x)
			
 
				+
			
 
				+        x = self.layer4(x)
			
 
				+        x = self.pool2d_avg(x)
			
 
				+        x = paddle.reshape(x, shape=[-1, self.out_channels])
			
 
				+        x = self.out(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeSt(
			
 
				+        layers=[3, 4, 6, 3],
			
 
				+        radix=1,
			
 
				+        groups=1,
			
 
				+        bottleneck_width=64,
			
 
				+        deep_stem=True,
			
 
				+        stem_width=32,
			
 
				+        avg_down=True,
			
 
				+        avd=True,
			
 
				+        avd_first=True,
			
 
				+        final_drop=0.0,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeSt50_fast_1s1x64d"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeSt50(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeSt(
			
 
				+        layers=[3, 4, 6, 3],
			
 
				+        radix=2,
			
 
				+        groups=1,
			
 
				+        bottleneck_width=64,
			
 
				+        deep_stem=True,
			
 
				+        stem_width=32,
			
 
				+        avg_down=True,
			
 
				+        avd=True,
			
 
				+        avd_first=False,
			
 
				+        final_drop=0.0,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeSt101(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeSt(
			
 
				+        layers=[3, 4, 23, 3],
			
 
				+        radix=2,
			
 
				+        groups=1,
			
 
				+        bottleneck_width=64,
			
 
				+        deep_stem=True,
			
 
				+        stem_width=64,
			
 
				+        avg_down=True,
			
 
				+        avd=True,
			
 
				+        avd_first=False,
			
 
				+        final_drop=0.0,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/resnet_vc.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/resnet_vc.py
@@ -0,0 +1,309 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ResNet50_vc":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        if name == "conv1":
			
 
				+            bn_name = "bn_" + name
			
 
				+        else:
			
 
				+            bn_name = "bn" + name[3:]
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + '_scale'),
			
 
				+            bias_attr=ParamAttr(bn_name + '_offset'),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 shortcut=True,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2a")
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2b")
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters * 4,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2c")
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters * 4,
			
 
				+                filter_size=1,
			
 
				+                stride=stride,
			
 
				+                name=name + "_branch1")
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+        self._num_channels_out = num_filters * 4
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+
			
 
				+        y = paddle.add(x=short, y=conv2)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 shortcut=True,
			
 
				+                 name=None):
			
 
				+        super(BasicBlock, self).__init__()
			
 
				+        self.stride = stride
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2a")
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2b")
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters,
			
 
				+                filter_size=1,
			
 
				+                stride=stride,
			
 
				+                name=name + "_branch1")
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+        y = paddle.add(x=short, y=conv1)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class ResNet_vc(nn.Layer):
			
 
				+    def __init__(self, layers=50, class_num=1000):
			
 
				+        super(ResNet_vc, self).__init__()
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        supported_layers = [18, 34, 50, 101, 152]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+
			
 
				+        if layers == 18:
			
 
				+            depth = [2, 2, 2, 2]
			
 
				+        elif layers == 34 or layers == 50:
			
 
				+            depth = [3, 4, 6, 3]
			
 
				+        elif layers == 101:
			
 
				+            depth = [3, 4, 23, 3]
			
 
				+        elif layers == 152:
			
 
				+            depth = [3, 8, 36, 3]
			
 
				+        num_channels = [64, 256, 512,
			
 
				+                        1024] if layers >= 50 else [64, 64, 128, 256]
			
 
				+        num_filters = [64, 128, 256, 512]
			
 
				+
			
 
				+        self.conv1_1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act='relu',
			
 
				+            name="conv1_1")
			
 
				+        self.conv1_2 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_2")
			
 
				+        self.conv1_3 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_3")
			
 
				+
			
 
				+        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        if layers >= 50:
			
 
				+            for block in range(len(depth)):
			
 
				+                shortcut = False
			
 
				+                for i in range(depth[block]):
			
 
				+                    if layers in [101, 152] and block == 2:
			
 
				+                        if i == 0:
			
 
				+                            conv_name = "res" + str(block + 2) + "a"
			
 
				+                        else:
			
 
				+                            conv_name = "res" + str(block + 2) + "b" + str(i)
			
 
				+                    else:
			
 
				+                        conv_name = "res" + str(block + 2) + chr(97 + i)
			
 
				+                    bottleneck_block = self.add_sublayer(
			
 
				+                        'bb_%d_%d' % (block, i),
			
 
				+                        BottleneckBlock(
			
 
				+                            num_channels=num_channels[block]
			
 
				+                            if i == 0 else num_filters[block] * 4,
			
 
				+                            num_filters=num_filters[block],
			
 
				+                            stride=2 if i == 0 and block != 0 else 1,
			
 
				+                            shortcut=shortcut,
			
 
				+                            name=conv_name))
			
 
				+                    self.block_list.append(bottleneck_block)
			
 
				+                    shortcut = True
			
 
				+        else:
			
 
				+            for block in range(len(depth)):
			
 
				+                shortcut = False
			
 
				+                for i in range(depth[block]):
			
 
				+                    conv_name = "res" + str(block + 2) + chr(97 + i)
			
 
				+                    basic_block = self.add_sublayer(
			
 
				+                        'bb_%d_%d' % (block, i),
			
 
				+                        BasicBlock(
			
 
				+                            num_channels=num_channels[block]
			
 
				+                            if i == 0 else num_filters[block],
			
 
				+                            num_filters=num_filters[block],
			
 
				+                            stride=2 if i == 0 and block != 0 else 1,
			
 
				+                            shortcut=shortcut,
			
 
				+                            name=conv_name))
			
 
				+                    self.block_list.append(basic_block)
			
 
				+                    shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.pool2d_avg_channels = num_channels[-1] * 2
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
			
 
				+            bias_attr=ParamAttr(name="fc_0.b_0"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv1_1(inputs)
			
 
				+        y = self.conv1_2(y)
			
 
				+        y = self.conv1_3(y)
			
 
				+        y = self.pool2d_max(y)
			
 
				+        for block in self.block_list:
			
 
				+            y = block(y)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ResNet50_vc(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNet_vc(layers=50, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/resnext.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/resnext.py
@@ -0,0 +1,298 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ResNeXt50_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams",
			
 
				+    "ResNeXt50_64x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams",
			
 
				+    "ResNeXt101_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams",
			
 
				+    "ResNeXt101_64x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams",
			
 
				+    "ResNeXt152_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams",
			
 
				+    "ResNeXt152_64x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 name=None,
			
 
				+                 data_format="NCHW"):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False,
			
 
				+            data_format=data_format)
			
 
				+        if name == "conv1":
			
 
				+            bn_name = "bn_" + name
			
 
				+        else:
			
 
				+            bn_name = "bn" + name[3:]
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + '_scale'),
			
 
				+            bias_attr=ParamAttr(bn_name + '_offset'),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance',
			
 
				+            data_layout=data_format)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 cardinality,
			
 
				+                 shortcut=True,
			
 
				+                 name=None,
			
 
				+                 data_format="NCHW"):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2a",
			
 
				+            data_format=data_format)
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            groups=cardinality,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2b",
			
 
				+            data_format=data_format)
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2c",
			
 
				+            data_format=data_format)
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters * 2
			
 
				+                if cardinality == 32 else num_filters,
			
 
				+                filter_size=1,
			
 
				+                stride=stride,
			
 
				+                name=name + "_branch1",
			
 
				+                data_format=data_format)
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+
			
 
				+        y = paddle.add(x=short, y=conv2)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class ResNeXt(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 layers=50,
			
 
				+                 class_num=1000,
			
 
				+                 cardinality=32,
			
 
				+                 input_image_channel=3,
			
 
				+                 data_format="NCHW"):
			
 
				+        super(ResNeXt, self).__init__()
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        self.data_format = data_format
			
 
				+        self.input_image_channel = input_image_channel
			
 
				+        self.cardinality = cardinality
			
 
				+        supported_layers = [50, 101, 152]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+        supported_cardinality = [32, 64]
			
 
				+        assert cardinality in supported_cardinality, \
			
 
				+            "supported cardinality is {} but input cardinality is {}" \
			
 
				+            .format(supported_cardinality, cardinality)
			
 
				+        if layers == 50:
			
 
				+            depth = [3, 4, 6, 3]
			
 
				+        elif layers == 101:
			
 
				+            depth = [3, 4, 23, 3]
			
 
				+        elif layers == 152:
			
 
				+            depth = [3, 8, 36, 3]
			
 
				+        num_channels = [64, 256, 512, 1024]
			
 
				+        num_filters = [128, 256, 512,
			
 
				+                       1024] if cardinality == 32 else [256, 512, 1024, 2048]
			
 
				+
			
 
				+        self.conv = ConvBNLayer(
			
 
				+            num_channels=self.input_image_channel,
			
 
				+            num_filters=64,
			
 
				+            filter_size=7,
			
 
				+            stride=2,
			
 
				+            act='relu',
			
 
				+            name="res_conv1",
			
 
				+            data_format=self.data_format)
			
 
				+        self.pool2d_max = MaxPool2D(
			
 
				+            kernel_size=3, stride=2, padding=1, data_format=self.data_format)
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        for block in range(len(depth)):
			
 
				+            shortcut = False
			
 
				+            for i in range(depth[block]):
			
 
				+                if layers in [101, 152] and block == 2:
			
 
				+                    if i == 0:
			
 
				+                        conv_name = "res" + str(block + 2) + "a"
			
 
				+                    else:
			
 
				+                        conv_name = "res" + str(block + 2) + "b" + str(i)
			
 
				+                else:
			
 
				+                    conv_name = "res" + str(block + 2) + chr(97 + i)
			
 
				+                bottleneck_block = self.add_sublayer(
			
 
				+                    'bb_%d_%d' % (block, i),
			
 
				+                    BottleneckBlock(
			
 
				+                        num_channels=num_channels[block] if i == 0 else
			
 
				+                        num_filters[block] * int(64 // self.cardinality),
			
 
				+                        num_filters=num_filters[block],
			
 
				+                        stride=2 if i == 0 and block != 0 else 1,
			
 
				+                        cardinality=self.cardinality,
			
 
				+                        shortcut=shortcut,
			
 
				+                        name=conv_name,
			
 
				+                        data_format=self.data_format))
			
 
				+                self.block_list.append(bottleneck_block)
			
 
				+                shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
			
 
				+
			
 
				+        self.pool2d_avg_channels = num_channels[-1] * 2
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        with paddle.static.amp.fp16_guard():
			
 
				+            if self.data_format == "NHWC":
			
 
				+                inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
			
 
				+                inputs.stop_gradient = True
			
 
				+            y = self.conv(inputs)
			
 
				+            y = self.pool2d_max(y)
			
 
				+            for block in self.block_list:
			
 
				+                y = block(y)
			
 
				+            y = self.pool2d_avg(y)
			
 
				+            y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+            y = self.out(y)
			
 
				+            return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=50, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt50_64x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=50, cardinality=64, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=101, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt101_64x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=101, cardinality=64, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt152_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=152, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=152, cardinality=64, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
@@ -0,0 +1,490 @@
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ResNeXt101_32x8d_wsl":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams",
			
 
				+    "ResNeXt101_32x16d_wsl":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x16_wsl_pretrained.pdparams",
			
 
				+    "ResNeXt101_32x32d_wsl":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams",
			
 
				+    "ResNeXt101_32x48d_wsl":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+        if "downsample" in name:
			
 
				+            conv_name = name + ".0"
			
 
				+        else:
			
 
				+            conv_name = name
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=conv_name + ".weight"),
			
 
				+            bias_attr=False)
			
 
				+        if "downsample" in name:
			
 
				+            bn_name = name[:9] + "downsample.1"
			
 
				+        else:
			
 
				+            if "conv1" == name:
			
 
				+                bn_name = "bn" + name[-1]
			
 
				+            else:
			
 
				+                bn_name = (name[:10] if name[7:9].isdigit() else name[:9]
			
 
				+                           ) + "bn" + name[-1]
			
 
				+        self._bn = BatchNorm(
			
 
				+            num_channels=output_channels,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + ".weight"),
			
 
				+            bias_attr=ParamAttr(name=bn_name + ".bias"),
			
 
				+            moving_mean_name=bn_name + ".running_mean",
			
 
				+            moving_variance_name=bn_name + ".running_var")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv(inputs)
			
 
				+        x = self._bn(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ShortCut(nn.Layer):
			
 
				+    def __init__(self, input_channels, output_channels, stride, name=None):
			
 
				+        super(ShortCut, self).__init__()
			
 
				+
			
 
				+        self.input_channels = input_channels
			
 
				+        self.output_channels = output_channels
			
 
				+        self.stride = stride
			
 
				+        if input_channels != output_channels or stride != 1:
			
 
				+            self._conv = ConvBNLayer(
			
 
				+                input_channels,
			
 
				+                output_channels,
			
 
				+                filter_size=1,
			
 
				+                stride=stride,
			
 
				+                name=name)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if self.input_channels != self.output_channels or self.stride != 1:
			
 
				+            return self._conv(inputs)
			
 
				+        return inputs
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self, input_channels, output_channels, stride, cardinality,
			
 
				+                 width, name):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self._conv0 = ConvBNLayer(
			
 
				+            input_channels,
			
 
				+            output_channels,
			
 
				+            filter_size=1,
			
 
				+            act="relu",
			
 
				+            name=name + ".conv1")
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            output_channels,
			
 
				+            output_channels,
			
 
				+            filter_size=3,
			
 
				+            act="relu",
			
 
				+            stride=stride,
			
 
				+            groups=cardinality,
			
 
				+            name=name + ".conv2")
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            output_channels,
			
 
				+            output_channels // (width // 8),
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name=name + ".conv3")
			
 
				+        self._short = ShortCut(
			
 
				+            input_channels,
			
 
				+            output_channels // (width // 8),
			
 
				+            stride=stride,
			
 
				+            name=name + ".downsample")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv0(inputs)
			
 
				+        x = self._conv1(x)
			
 
				+        x = self._conv2(x)
			
 
				+        y = self._short(inputs)
			
 
				+        y = paddle.add(x, y)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class ResNeXt101WSL(nn.Layer):
			
 
				+    def __init__(self, layers=101, cardinality=32, width=48, class_num=1000):
			
 
				+        super(ResNeXt101WSL, self).__init__()
			
 
				+
			
 
				+        self.class_num = class_num
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        self.cardinality = cardinality
			
 
				+        self.width = width
			
 
				+        self.scale = width // 8
			
 
				+
			
 
				+        self.depth = [3, 4, 23, 3]
			
 
				+        self.base_width = cardinality * width
			
 
				+        num_filters = [self.base_width * i
			
 
				+                       for i in [1, 2, 4, 8]]  # [256, 512, 1024, 2048]
			
 
				+        self._conv_stem = ConvBNLayer(
			
 
				+            3, 64, 7, stride=2, act="relu", name="conv1")
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self._conv1_0 = BottleneckBlock(
			
 
				+            64,
			
 
				+            num_filters[0],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer1.0")
			
 
				+        self._conv1_1 = BottleneckBlock(
			
 
				+            num_filters[0] // (width // 8),
			
 
				+            num_filters[0],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer1.1")
			
 
				+        self._conv1_2 = BottleneckBlock(
			
 
				+            num_filters[0] // (width // 8),
			
 
				+            num_filters[0],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer1.2")
			
 
				+
			
 
				+        self._conv2_0 = BottleneckBlock(
			
 
				+            num_filters[0] // (width // 8),
			
 
				+            num_filters[1],
			
 
				+            stride=2,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer2.0")
			
 
				+        self._conv2_1 = BottleneckBlock(
			
 
				+            num_filters[1] // (width // 8),
			
 
				+            num_filters[1],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer2.1")
			
 
				+        self._conv2_2 = BottleneckBlock(
			
 
				+            num_filters[1] // (width // 8),
			
 
				+            num_filters[1],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer2.2")
			
 
				+        self._conv2_3 = BottleneckBlock(
			
 
				+            num_filters[1] // (width // 8),
			
 
				+            num_filters[1],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer2.3")
			
 
				+
			
 
				+        self._conv3_0 = BottleneckBlock(
			
 
				+            num_filters[1] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=2,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.0")
			
 
				+        self._conv3_1 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.1")
			
 
				+        self._conv3_2 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.2")
			
 
				+        self._conv3_3 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.3")
			
 
				+        self._conv3_4 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.4")
			
 
				+        self._conv3_5 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.5")
			
 
				+        self._conv3_6 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.6")
			
 
				+        self._conv3_7 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.7")
			
 
				+        self._conv3_8 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.8")
			
 
				+        self._conv3_9 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.9")
			
 
				+        self._conv3_10 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.10")
			
 
				+        self._conv3_11 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.11")
			
 
				+        self._conv3_12 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.12")
			
 
				+        self._conv3_13 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.13")
			
 
				+        self._conv3_14 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.14")
			
 
				+        self._conv3_15 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.15")
			
 
				+        self._conv3_16 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.16")
			
 
				+        self._conv3_17 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.17")
			
 
				+        self._conv3_18 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.18")
			
 
				+        self._conv3_19 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.19")
			
 
				+        self._conv3_20 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.20")
			
 
				+        self._conv3_21 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.21")
			
 
				+        self._conv3_22 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[2],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer3.22")
			
 
				+
			
 
				+        self._conv4_0 = BottleneckBlock(
			
 
				+            num_filters[2] // (width // 8),
			
 
				+            num_filters[3],
			
 
				+            stride=2,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer4.0")
			
 
				+        self._conv4_1 = BottleneckBlock(
			
 
				+            num_filters[3] // (width // 8),
			
 
				+            num_filters[3],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer4.1")
			
 
				+        self._conv4_2 = BottleneckBlock(
			
 
				+            num_filters[3] // (width // 8),
			
 
				+            num_filters[3],
			
 
				+            stride=1,
			
 
				+            cardinality=self.cardinality,
			
 
				+            width=self.width,
			
 
				+            name="layer4.2")
			
 
				+
			
 
				+        self._avg_pool = AdaptiveAvgPool2D(1)
			
 
				+        self._out = Linear(
			
 
				+            num_filters[3] // (width // 8),
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(name="fc.weight"),
			
 
				+            bias_attr=ParamAttr(name="fc.bias"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv_stem(inputs)
			
 
				+        x = self._pool(x)
			
 
				+
			
 
				+        x = self._conv1_0(x)
			
 
				+        x = self._conv1_1(x)
			
 
				+        x = self._conv1_2(x)
			
 
				+
			
 
				+        x = self._conv2_0(x)
			
 
				+        x = self._conv2_1(x)
			
 
				+        x = self._conv2_2(x)
			
 
				+        x = self._conv2_3(x)
			
 
				+
			
 
				+        x = self._conv3_0(x)
			
 
				+        x = self._conv3_1(x)
			
 
				+        x = self._conv3_2(x)
			
 
				+        x = self._conv3_3(x)
			
 
				+        x = self._conv3_4(x)
			
 
				+        x = self._conv3_5(x)
			
 
				+        x = self._conv3_6(x)
			
 
				+        x = self._conv3_7(x)
			
 
				+        x = self._conv3_8(x)
			
 
				+        x = self._conv3_9(x)
			
 
				+        x = self._conv3_10(x)
			
 
				+        x = self._conv3_11(x)
			
 
				+        x = self._conv3_12(x)
			
 
				+        x = self._conv3_13(x)
			
 
				+        x = self._conv3_14(x)
			
 
				+        x = self._conv3_15(x)
			
 
				+        x = self._conv3_16(x)
			
 
				+        x = self._conv3_17(x)
			
 
				+        x = self._conv3_18(x)
			
 
				+        x = self._conv3_19(x)
			
 
				+        x = self._conv3_20(x)
			
 
				+        x = self._conv3_21(x)
			
 
				+        x = self._conv3_22(x)
			
 
				+
			
 
				+        x = self._conv4_0(x)
			
 
				+        x = self._conv4_1(x)
			
 
				+        x = self._conv4_2(x)
			
 
				+
			
 
				+        x = self._avg_pool(x)
			
 
				+        x = paddle.squeeze(x, axis=[2, 3])
			
 
				+        x = self._out(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ResNeXt101_32x8d_wsl(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt101WSL(cardinality=32, width=8, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeXt101_32x8d_wsl"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt101_32x16d_wsl(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt101WSL(cardinality=32, width=16, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeXt101_32x16d_wsl"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt101_32x32d_wsl(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt101WSL(cardinality=32, width=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeXt101_32x32d_wsl"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt101_32x48d_wsl(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt101WSL(cardinality=32, width=48, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeXt101_32x48d_wsl"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/resnext_vd.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/resnext_vd.py
@@ -0,0 +1,317 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ResNeXt50_vd_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams",
			
 
				+    "ResNeXt50_vd_64x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams",
			
 
				+    "ResNeXt101_vd_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams",
			
 
				+    "ResNeXt101_vd_64x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams",
			
 
				+    "ResNeXt152_vd_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams",
			
 
				+    "ResNeXt152_vd_64x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            num_channels,
			
 
				+            num_filters,
			
 
				+            filter_size,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            is_vd_mode=False,
			
 
				+            act=None,
			
 
				+            name=None, ):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self.is_vd_mode = is_vd_mode
			
 
				+        self._pool2d_avg = AvgPool2D(
			
 
				+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        if name == "conv1":
			
 
				+            bn_name = "bn_" + name
			
 
				+        else:
			
 
				+            bn_name = "bn" + name[3:]
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + '_scale'),
			
 
				+            bias_attr=ParamAttr(bn_name + '_offset'),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if self.is_vd_mode:
			
 
				+            inputs = self._pool2d_avg(inputs)
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 cardinality,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2a")
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            groups=cardinality,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2b")
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2c")
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters * 2
			
 
				+                if cardinality == 32 else num_filters,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                is_vd_mode=False if if_first else True,
			
 
				+                name=name + "_branch1")
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+
			
 
				+        y = paddle.add(x=short, y=conv2)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class ResNeXt(nn.Layer):
			
 
				+    def __init__(self, layers=50, class_num=1000, cardinality=32):
			
 
				+        super(ResNeXt, self).__init__()
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        self.cardinality = cardinality
			
 
				+        supported_layers = [50, 101, 152]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+        supported_cardinality = [32, 64]
			
 
				+        assert cardinality in supported_cardinality, \
			
 
				+            "supported cardinality is {} but input cardinality is {}" \
			
 
				+            .format(supported_cardinality, cardinality)
			
 
				+        if layers == 50:
			
 
				+            depth = [3, 4, 6, 3]
			
 
				+        elif layers == 101:
			
 
				+            depth = [3, 4, 23, 3]
			
 
				+        elif layers == 152:
			
 
				+            depth = [3, 8, 36, 3]
			
 
				+        num_channels = [64, 256, 512, 1024]
			
 
				+        num_filters = [128, 256, 512,
			
 
				+                       1024] if cardinality == 32 else [256, 512, 1024, 2048]
			
 
				+
			
 
				+        self.conv1_1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act='relu',
			
 
				+            name="conv1_1")
			
 
				+        self.conv1_2 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_2")
			
 
				+        self.conv1_3 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_3")
			
 
				+
			
 
				+        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        for block in range(len(depth)):
			
 
				+            shortcut = False
			
 
				+            for i in range(depth[block]):
			
 
				+                if layers in [101, 152] and block == 2:
			
 
				+                    if i == 0:
			
 
				+                        conv_name = "res" + str(block + 2) + "a"
			
 
				+                    else:
			
 
				+                        conv_name = "res" + str(block + 2) + "b" + str(i)
			
 
				+                else:
			
 
				+                    conv_name = "res" + str(block + 2) + chr(97 + i)
			
 
				+                bottleneck_block = self.add_sublayer(
			
 
				+                    'bb_%d_%d' % (block, i),
			
 
				+                    BottleneckBlock(
			
 
				+                        num_channels=num_channels[block] if i == 0 else
			
 
				+                        num_filters[block] * int(64 // self.cardinality),
			
 
				+                        num_filters=num_filters[block],
			
 
				+                        stride=2 if i == 0 and block != 0 else 1,
			
 
				+                        cardinality=self.cardinality,
			
 
				+                        shortcut=shortcut,
			
 
				+                        if_first=block == i == 0,
			
 
				+                        name=conv_name))
			
 
				+                self.block_list.append(bottleneck_block)
			
 
				+                shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.pool2d_avg_channels = num_channels[-1] * 2
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv1_1(inputs)
			
 
				+        y = self.conv1_2(y)
			
 
				+        y = self.conv1_3(y)
			
 
				+        y = self.pool2d_max(y)
			
 
				+        for block in self.block_list:
			
 
				+            y = block(y)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=50, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt50_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=50, cardinality=64, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=101, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeXt101_vd_32x4d"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt101_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=101, cardinality=64, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeXt101_vd_64x4d"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt152_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=152, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeXt152_vd_32x4d"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ResNeXt152_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=152, cardinality=64, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ResNeXt152_vd_64x4d"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/rexnet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/rexnet.py
@@ -0,0 +1,281 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+from math import ceil
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ReXNet_1_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams",
			
 
				+    "ReXNet_1_3":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams",
			
 
				+    "ReXNet_1_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_pretrained.pdparams",
			
 
				+    "ReXNet_2_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams",
			
 
				+    "ReXNet_3_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+def conv_bn_act(out,
			
 
				+                in_channels,
			
 
				+                channels,
			
 
				+                kernel=1,
			
 
				+                stride=1,
			
 
				+                pad=0,
			
 
				+                num_group=1,
			
 
				+                active=True,
			
 
				+                relu6=False):
			
 
				+    out.append(
			
 
				+        nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            channels,
			
 
				+            kernel,
			
 
				+            stride,
			
 
				+            pad,
			
 
				+            groups=num_group,
			
 
				+            bias_attr=False))
			
 
				+    out.append(nn.BatchNorm2D(channels))
			
 
				+    if active:
			
 
				+        out.append(nn.ReLU6() if relu6 else nn.ReLU())
			
 
				+
			
 
				+
			
 
				+def conv_bn_swish(out,
			
 
				+                  in_channels,
			
 
				+                  channels,
			
 
				+                  kernel=1,
			
 
				+                  stride=1,
			
 
				+                  pad=0,
			
 
				+                  num_group=1):
			
 
				+    out.append(
			
 
				+        nn.Conv2D(
			
 
				+            in_channels,
			
 
				+            channels,
			
 
				+            kernel,
			
 
				+            stride,
			
 
				+            pad,
			
 
				+            groups=num_group,
			
 
				+            bias_attr=False))
			
 
				+    out.append(nn.BatchNorm2D(channels))
			
 
				+    out.append(nn.Swish())
			
 
				+
			
 
				+
			
 
				+class SE(nn.Layer):
			
 
				+    def __init__(self, in_channels, channels, se_ratio=12):
			
 
				+        super(SE, self).__init__()
			
 
				+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
			
 
				+        self.fc = nn.Sequential(
			
 
				+            nn.Conv2D(
			
 
				+                in_channels, channels // se_ratio, kernel_size=1, padding=0),
			
 
				+            nn.BatchNorm2D(channels // se_ratio),
			
 
				+            nn.ReLU(),
			
 
				+            nn.Conv2D(
			
 
				+                channels // se_ratio, channels, kernel_size=1, padding=0),
			
 
				+            nn.Sigmoid())
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        y = self.avg_pool(x)
			
 
				+        y = self.fc(y)
			
 
				+        return x * y
			
 
				+
			
 
				+
			
 
				+class LinearBottleneck(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 channels,
			
 
				+                 t,
			
 
				+                 stride,
			
 
				+                 use_se=True,
			
 
				+                 se_ratio=12,
			
 
				+                 **kwargs):
			
 
				+        super(LinearBottleneck, self).__init__(**kwargs)
			
 
				+        self.use_shortcut = stride == 1 and in_channels <= channels
			
 
				+        self.in_channels = in_channels
			
 
				+        self.out_channels = channels
			
 
				+
			
 
				+        out = []
			
 
				+        if t != 1:
			
 
				+            dw_channels = in_channels * t
			
 
				+            conv_bn_swish(out, in_channels=in_channels, channels=dw_channels)
			
 
				+        else:
			
 
				+            dw_channels = in_channels
			
 
				+
			
 
				+        conv_bn_act(
			
 
				+            out,
			
 
				+            in_channels=dw_channels,
			
 
				+            channels=dw_channels,
			
 
				+            kernel=3,
			
 
				+            stride=stride,
			
 
				+            pad=1,
			
 
				+            num_group=dw_channels,
			
 
				+            active=False)
			
 
				+
			
 
				+        if use_se:
			
 
				+            out.append(SE(dw_channels, dw_channels, se_ratio))
			
 
				+
			
 
				+        out.append(nn.ReLU6())
			
 
				+        conv_bn_act(
			
 
				+            out,
			
 
				+            in_channels=dw_channels,
			
 
				+            channels=channels,
			
 
				+            active=False,
			
 
				+            relu6=True)
			
 
				+        self.out = nn.Sequential(*out)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        out = self.out(x)
			
 
				+        if self.use_shortcut:
			
 
				+            out[:, 0:self.in_channels] += x
			
 
				+
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class ReXNetV1(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_ch=16,
			
 
				+                 final_ch=180,
			
 
				+                 width_mult=1.0,
			
 
				+                 depth_mult=1.0,
			
 
				+                 class_num=1000,
			
 
				+                 use_se=True,
			
 
				+                 se_ratio=12,
			
 
				+                 dropout_ratio=0.2,
			
 
				+                 bn_momentum=0.9):
			
 
				+        super(ReXNetV1, self).__init__()
			
 
				+
			
 
				+        layers = [1, 2, 2, 3, 3, 5]
			
 
				+        strides = [1, 2, 2, 2, 1, 2]
			
 
				+        use_ses = [False, False, True, True, True, True]
			
 
				+
			
 
				+        layers = [ceil(element * depth_mult) for element in layers]
			
 
				+        strides = sum([[element] + [1] * (layers[idx] - 1)
			
 
				+                       for idx, element in enumerate(strides)], [])
			
 
				+        if use_se:
			
 
				+            use_ses = sum([[element] * layers[idx]
			
 
				+                           for idx, element in enumerate(use_ses)], [])
			
 
				+        else:
			
 
				+            use_ses = [False] * sum(layers[:])
			
 
				+        ts = [1] * layers[0] + [6] * sum(layers[1:])
			
 
				+
			
 
				+        self.depth = sum(layers[:]) * 3
			
 
				+        stem_channel = 32 / width_mult if width_mult < 1.0 else 32
			
 
				+        inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch
			
 
				+
			
 
				+        features = []
			
 
				+        in_channels_group = []
			
 
				+        channels_group = []
			
 
				+
			
 
				+        # The following channel configuration is a simple instance to make each layer become an expand layer.
			
 
				+        for i in range(self.depth // 3):
			
 
				+            if i == 0:
			
 
				+                in_channels_group.append(int(round(stem_channel * width_mult)))
			
 
				+                channels_group.append(int(round(inplanes * width_mult)))
			
 
				+            else:
			
 
				+                in_channels_group.append(int(round(inplanes * width_mult)))
			
 
				+                inplanes += final_ch / (self.depth // 3 * 1.0)
			
 
				+                channels_group.append(int(round(inplanes * width_mult)))
			
 
				+
			
 
				+        conv_bn_swish(
			
 
				+            features,
			
 
				+            3,
			
 
				+            int(round(stem_channel * width_mult)),
			
 
				+            kernel=3,
			
 
				+            stride=2,
			
 
				+            pad=1)
			
 
				+
			
 
				+        for block_idx, (in_c, c, t, s, se) in enumerate(
			
 
				+                zip(in_channels_group, channels_group, ts, strides, use_ses)):
			
 
				+            features.append(
			
 
				+                LinearBottleneck(
			
 
				+                    in_channels=in_c,
			
 
				+                    channels=c,
			
 
				+                    t=t,
			
 
				+                    stride=s,
			
 
				+                    use_se=se,
			
 
				+                    se_ratio=se_ratio))
			
 
				+
			
 
				+        pen_channels = int(1280 * width_mult)
			
 
				+        conv_bn_swish(features, c, pen_channels)
			
 
				+
			
 
				+        features.append(nn.AdaptiveAvgPool2D(1))
			
 
				+        self.features = nn.Sequential(*features)
			
 
				+        self.output = nn.Sequential(
			
 
				+            nn.Dropout(dropout_ratio),
			
 
				+            nn.Conv2D(
			
 
				+                pen_channels, class_num, 1, bias_attr=True))
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.features(x)
			
 
				+        x = self.output(x).squeeze(axis=-1).squeeze(axis=-1)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ReXNet_1_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ReXNetV1(width_mult=1.0, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ReXNet_1_3(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ReXNetV1(width_mult=1.3, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ReXNet_1_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ReXNetV1(width_mult=1.5, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ReXNet_2_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ReXNetV1(width_mult=2.0, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ReXNet_3_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ReXNetV1(width_mult=3.0, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
@@ -0,0 +1,390 @@
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "SE_ResNet18_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams",
			
 
				+    "SE_ResNet34_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams",
			
 
				+    "SE_ResNet50_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            num_channels,
			
 
				+            num_filters,
			
 
				+            filter_size,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            is_vd_mode=False,
			
 
				+            act=None,
			
 
				+            name=None, ):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self.is_vd_mode = is_vd_mode
			
 
				+        self._pool2d_avg = AvgPool2D(
			
 
				+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        if name == "conv1":
			
 
				+            bn_name = "bn_" + name
			
 
				+        else:
			
 
				+            bn_name = "bn" + name[3:]
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + '_scale'),
			
 
				+            bias_attr=ParamAttr(bn_name + '_offset'),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if self.is_vd_mode:
			
 
				+            inputs = self._pool2d_avg(inputs)
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 reduction_ratio=16,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2a")
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2b")
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters * 4,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2c")
			
 
				+        self.scale = SELayer(
			
 
				+            num_channels=num_filters * 4,
			
 
				+            num_filters=num_filters * 4,
			
 
				+            reduction_ratio=reduction_ratio,
			
 
				+            name='fc_' + name)
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters * 4,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                is_vd_mode=False if if_first else True,
			
 
				+                name=name + "_branch1")
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+        scale = self.scale(conv2)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+        y = paddle.add(x=short, y=scale)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BasicBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 reduction_ratio=16,
			
 
				+                 name=None):
			
 
				+        super(BasicBlock, self).__init__()
			
 
				+        self.stride = stride
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            name=name + "_branch2a")
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            act=None,
			
 
				+            name=name + "_branch2b")
			
 
				+
			
 
				+        self.scale = SELayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            reduction_ratio=reduction_ratio,
			
 
				+            name='fc_' + name)
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                is_vd_mode=False if if_first else True,
			
 
				+                name=name + "_branch1")
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+        scale = self.scale(conv1)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+        y = paddle.add(x=short, y=scale)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class SELayer(nn.Layer):
			
 
				+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
			
 
				+        super(SELayer, self).__init__()
			
 
				+
			
 
				+        self.pool2d_gap = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self._num_channels = num_channels
			
 
				+
			
 
				+        med_ch = int(num_channels / reduction_ratio)
			
 
				+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
			
 
				+        self.squeeze = Linear(
			
 
				+            num_channels,
			
 
				+            med_ch,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
			
 
				+        self.excitation = Linear(
			
 
				+            med_ch,
			
 
				+            num_filters,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + '_exc_offset'))
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        pool = self.pool2d_gap(input)
			
 
				+        pool = paddle.squeeze(pool, axis=[2, 3])
			
 
				+        squeeze = self.squeeze(pool)
			
 
				+        squeeze = F.relu(squeeze)
			
 
				+        excitation = self.excitation(squeeze)
			
 
				+        excitation = F.sigmoid(excitation)
			
 
				+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
			
 
				+        out = input * excitation
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class SE_ResNet_vd(nn.Layer):
			
 
				+    def __init__(self, layers=50, class_num=1000):
			
 
				+        super(SE_ResNet_vd, self).__init__()
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        supported_layers = [18, 34, 50, 101, 152, 200]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+
			
 
				+        if layers == 18:
			
 
				+            depth = [2, 2, 2, 2]
			
 
				+        elif layers == 34 or layers == 50:
			
 
				+            depth = [3, 4, 6, 3]
			
 
				+        elif layers == 101:
			
 
				+            depth = [3, 4, 23, 3]
			
 
				+        elif layers == 152:
			
 
				+            depth = [3, 8, 36, 3]
			
 
				+        elif layers == 200:
			
 
				+            depth = [3, 12, 48, 3]
			
 
				+        num_channels = [64, 256, 512,
			
 
				+                        1024] if layers >= 50 else [64, 64, 128, 256]
			
 
				+        num_filters = [64, 128, 256, 512]
			
 
				+
			
 
				+        self.conv1_1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act='relu',
			
 
				+            name="conv1_1")
			
 
				+        self.conv1_2 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=32,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_2")
			
 
				+        self.conv1_3 = ConvBNLayer(
			
 
				+            num_channels=32,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_3")
			
 
				+        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        if layers >= 50:
			
 
				+            for block in range(len(depth)):
			
 
				+                shortcut = False
			
 
				+                for i in range(depth[block]):
			
 
				+                    if layers in [101, 152] and block == 2:
			
 
				+                        if i == 0:
			
 
				+                            conv_name = "res" + str(block + 2) + "a"
			
 
				+                        else:
			
 
				+                            conv_name = "res" + str(block + 2) + "b" + str(i)
			
 
				+                    else:
			
 
				+                        conv_name = "res" + str(block + 2) + chr(97 + i)
			
 
				+                    bottleneck_block = self.add_sublayer(
			
 
				+                        'bb_%d_%d' % (block, i),
			
 
				+                        BottleneckBlock(
			
 
				+                            num_channels=num_channels[block]
			
 
				+                            if i == 0 else num_filters[block] * 4,
			
 
				+                            num_filters=num_filters[block],
			
 
				+                            stride=2 if i == 0 and block != 0 else 1,
			
 
				+                            shortcut=shortcut,
			
 
				+                            if_first=block == i == 0,
			
 
				+                            name=conv_name))
			
 
				+                    self.block_list.append(bottleneck_block)
			
 
				+                    shortcut = True
			
 
				+        else:
			
 
				+            for block in range(len(depth)):
			
 
				+                shortcut = False
			
 
				+                for i in range(depth[block]):
			
 
				+                    conv_name = "res" + str(block + 2) + chr(97 + i)
			
 
				+                    basic_block = self.add_sublayer(
			
 
				+                        'bb_%d_%d' % (block, i),
			
 
				+                        BasicBlock(
			
 
				+                            num_channels=num_channels[block]
			
 
				+                            if i == 0 else num_filters[block],
			
 
				+                            num_filters=num_filters[block],
			
 
				+                            stride=2 if i == 0 and block != 0 else 1,
			
 
				+                            shortcut=shortcut,
			
 
				+                            if_first=block == i == 0,
			
 
				+                            name=conv_name))
			
 
				+                    self.block_list.append(basic_block)
			
 
				+                    shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.pool2d_avg_channels = num_channels[-1] * 2
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc6_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc6_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv1_1(inputs)
			
 
				+        y = self.conv1_2(y)
			
 
				+        y = self.conv1_3(y)
			
 
				+        y = self.pool2d_max(y)
			
 
				+        for block in self.block_list:
			
 
				+            y = block(y)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def SE_ResNet18_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = SE_ResNet_vd(layers=18, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_ResNet34_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = SE_ResNet_vd(layers=34, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_ResNet50_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = SE_ResNet_vd(layers=50, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/se_resnext.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/se_resnext.py
@@ -0,0 +1,364 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "SE_ResNeXt50_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams",
			
 
				+    "SE_ResNeXt101_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams",
			
 
				+    "SE_ResNeXt152_64x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 name=None,
			
 
				+                 data_format='NCHW'):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False,
			
 
				+            data_format=data_format)
			
 
				+        bn_name = name + '_bn'
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + '_scale'),
			
 
				+            bias_attr=ParamAttr(bn_name + '_offset'),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance',
			
 
				+            data_layout=data_format)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 cardinality,
			
 
				+                 reduction_ratio,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 name=None,
			
 
				+                 data_format="NCHW"):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act='relu',
			
 
				+            name='conv' + name + '_x1',
			
 
				+            data_format=data_format)
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            groups=cardinality,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            name='conv' + name + '_x2',
			
 
				+            data_format=data_format)
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name='conv' + name + '_x3',
			
 
				+            data_format=data_format)
			
 
				+        self.scale = SELayer(
			
 
				+            num_channels=num_filters * 2 if cardinality == 32 else num_filters,
			
 
				+            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
			
 
				+            reduction_ratio=reduction_ratio,
			
 
				+            name='fc' + name,
			
 
				+            data_format=data_format)
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters * 2
			
 
				+                if cardinality == 32 else num_filters,
			
 
				+                filter_size=1,
			
 
				+                stride=stride,
			
 
				+                name='conv' + name + '_prj',
			
 
				+                data_format=data_format)
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+        scale = self.scale(conv2)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+        y = paddle.add(x=short, y=scale)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class SELayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 reduction_ratio,
			
 
				+                 name=None,
			
 
				+                 data_format="NCHW"):
			
 
				+        super(SELayer, self).__init__()
			
 
				+
			
 
				+        self.data_format = data_format
			
 
				+        self.pool2d_gap = AdaptiveAvgPool2D(1, data_format=self.data_format)
			
 
				+
			
 
				+        self._num_channels = num_channels
			
 
				+
			
 
				+        med_ch = int(num_channels / reduction_ratio)
			
 
				+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
			
 
				+        self.squeeze = Linear(
			
 
				+            num_channels,
			
 
				+            med_ch,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
			
 
				+        self.relu = nn.ReLU()
			
 
				+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
			
 
				+        self.excitation = Linear(
			
 
				+            med_ch,
			
 
				+            num_filters,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + '_exc_offset'))
			
 
				+        self.sigmoid = nn.Sigmoid()
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        pool = self.pool2d_gap(input)
			
 
				+        if self.data_format == "NHWC":
			
 
				+            pool = paddle.squeeze(pool, axis=[1, 2])
			
 
				+        else:
			
 
				+            pool = paddle.squeeze(pool, axis=[2, 3])
			
 
				+        squeeze = self.squeeze(pool)
			
 
				+        squeeze = self.relu(squeeze)
			
 
				+        excitation = self.excitation(squeeze)
			
 
				+        excitation = self.sigmoid(excitation)
			
 
				+        if self.data_format == "NHWC":
			
 
				+            excitation = paddle.unsqueeze(excitation, axis=[1, 2])
			
 
				+        else:
			
 
				+            excitation = paddle.unsqueeze(excitation, axis=[2, 3])
			
 
				+        out = input * excitation
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class ResNeXt(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 layers=50,
			
 
				+                 class_num=1000,
			
 
				+                 cardinality=32,
			
 
				+                 input_image_channel=3,
			
 
				+                 data_format="NCHW"):
			
 
				+        super(ResNeXt, self).__init__()
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        self.cardinality = cardinality
			
 
				+        self.reduction_ratio = 16
			
 
				+        self.data_format = data_format
			
 
				+        self.input_image_channel = input_image_channel
			
 
				+
			
 
				+        supported_layers = [50, 101, 152]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+        supported_cardinality = [32, 64]
			
 
				+        assert cardinality in supported_cardinality, \
			
 
				+            "supported cardinality is {} but input cardinality is {}" \
			
 
				+            .format(supported_cardinality, cardinality)
			
 
				+        if layers == 50:
			
 
				+            depth = [3, 4, 6, 3]
			
 
				+        elif layers == 101:
			
 
				+            depth = [3, 4, 23, 3]
			
 
				+        elif layers == 152:
			
 
				+            depth = [3, 8, 36, 3]
			
 
				+        num_channels = [64, 256, 512, 1024]
			
 
				+        num_filters = [128, 256, 512,
			
 
				+                       1024] if cardinality == 32 else [256, 512, 1024, 2048]
			
 
				+        if layers < 152:
			
 
				+            self.conv = ConvBNLayer(
			
 
				+                num_channels=self.input_image_channel,
			
 
				+                num_filters=64,
			
 
				+                filter_size=7,
			
 
				+                stride=2,
			
 
				+                act='relu',
			
 
				+                name="conv1",
			
 
				+                data_format=self.data_format)
			
 
				+        else:
			
 
				+            self.conv1_1 = ConvBNLayer(
			
 
				+                num_channels=self.input_image_channel,
			
 
				+                num_filters=64,
			
 
				+                filter_size=3,
			
 
				+                stride=2,
			
 
				+                act='relu',
			
 
				+                name="conv1",
			
 
				+                data_format=self.data_format)
			
 
				+            self.conv1_2 = ConvBNLayer(
			
 
				+                num_channels=64,
			
 
				+                num_filters=64,
			
 
				+                filter_size=3,
			
 
				+                stride=1,
			
 
				+                act='relu',
			
 
				+                name="conv2",
			
 
				+                data_format=self.data_format)
			
 
				+            self.conv1_3 = ConvBNLayer(
			
 
				+                num_channels=64,
			
 
				+                num_filters=128,
			
 
				+                filter_size=3,
			
 
				+                stride=1,
			
 
				+                act='relu',
			
 
				+                name="conv3",
			
 
				+                data_format=self.data_format)
			
 
				+
			
 
				+        self.pool2d_max = MaxPool2D(
			
 
				+            kernel_size=3, stride=2, padding=1, data_format=self.data_format)
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        n = 1 if layers == 50 or layers == 101 else 3
			
 
				+        for block in range(len(depth)):
			
 
				+            n += 1
			
 
				+            shortcut = False
			
 
				+            for i in range(depth[block]):
			
 
				+                bottleneck_block = self.add_sublayer(
			
 
				+                    'bb_%d_%d' % (block, i),
			
 
				+                    BottleneckBlock(
			
 
				+                        num_channels=num_channels[block] if i == 0 else
			
 
				+                        num_filters[block] * int(64 // self.cardinality),
			
 
				+                        num_filters=num_filters[block],
			
 
				+                        stride=2 if i == 0 and block != 0 else 1,
			
 
				+                        cardinality=self.cardinality,
			
 
				+                        reduction_ratio=self.reduction_ratio,
			
 
				+                        shortcut=shortcut,
			
 
				+                        if_first=block == 0,
			
 
				+                        name=str(n) + '_' + str(i + 1),
			
 
				+                        data_format=self.data_format))
			
 
				+                self.block_list.append(bottleneck_block)
			
 
				+                shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
			
 
				+
			
 
				+        self.pool2d_avg_channels = num_channels[-1] * 2
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc6_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc6_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        with paddle.static.amp.fp16_guard():
			
 
				+            if self.data_format == "NHWC":
			
 
				+                inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
			
 
				+                inputs.stop_gradient = True
			
 
				+            if self.layers < 152:
			
 
				+                y = self.conv(inputs)
			
 
				+            else:
			
 
				+                y = self.conv1_1(inputs)
			
 
				+                y = self.conv1_2(y)
			
 
				+                y = self.conv1_3(y)
			
 
				+            y = self.pool2d_max(y)
			
 
				+            for i, block in enumerate(self.block_list):
			
 
				+                y = block(y)
			
 
				+            y = self.pool2d_avg(y)
			
 
				+            y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+            y = self.out(y)
			
 
				+            return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def SE_ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=50, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=101, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SE_ResNeXt101_32x4d"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SE_ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=152, cardinality=64, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SE_ResNeXt152_64x4d"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
@@ -0,0 +1,309 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+
			
 
				+import math
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "SE_ResNeXt50_vd_32x4d":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
			
 
				+    "SENet154_vd":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 is_vd_mode=False,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self.is_vd_mode = is_vd_mode
			
 
				+        self._pool2d_avg = AvgPool2D(
			
 
				+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        bn_name = name + '_bn'
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + '_scale'),
			
 
				+            bias_attr=ParamAttr(bn_name + '_offset'),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if self.is_vd_mode:
			
 
				+            inputs = self._pool2d_avg(inputs)
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class BottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 stride,
			
 
				+                 cardinality,
			
 
				+                 reduction_ratio,
			
 
				+                 shortcut=True,
			
 
				+                 if_first=False,
			
 
				+                 name=None):
			
 
				+        super(BottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self.conv0 = ConvBNLayer(
			
 
				+            num_channels=num_channels,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=1,
			
 
				+            act='relu',
			
 
				+            name='conv' + name + '_x1')
			
 
				+        self.conv1 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters,
			
 
				+            filter_size=3,
			
 
				+            groups=cardinality,
			
 
				+            stride=stride,
			
 
				+            act='relu',
			
 
				+            name='conv' + name + '_x2')
			
 
				+        self.conv2 = ConvBNLayer(
			
 
				+            num_channels=num_filters,
			
 
				+            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
			
 
				+            filter_size=1,
			
 
				+            act=None,
			
 
				+            name='conv' + name + '_x3')
			
 
				+        self.scale = SELayer(
			
 
				+            num_channels=num_filters * 2 if cardinality == 32 else num_filters,
			
 
				+            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
			
 
				+            reduction_ratio=reduction_ratio,
			
 
				+            name='fc' + name)
			
 
				+
			
 
				+        if not shortcut:
			
 
				+            self.short = ConvBNLayer(
			
 
				+                num_channels=num_channels,
			
 
				+                num_filters=num_filters * 2
			
 
				+                if cardinality == 32 else num_filters,
			
 
				+                filter_size=1,
			
 
				+                stride=1,
			
 
				+                is_vd_mode=False if if_first else True,
			
 
				+                name='conv' + name + '_prj')
			
 
				+
			
 
				+        self.shortcut = shortcut
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv0(inputs)
			
 
				+        conv1 = self.conv1(y)
			
 
				+        conv2 = self.conv2(conv1)
			
 
				+        scale = self.scale(conv2)
			
 
				+
			
 
				+        if self.shortcut:
			
 
				+            short = inputs
			
 
				+        else:
			
 
				+            short = self.short(inputs)
			
 
				+        y = paddle.add(x=short, y=scale)
			
 
				+        y = F.relu(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class SELayer(nn.Layer):
			
 
				+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
			
 
				+        super(SELayer, self).__init__()
			
 
				+
			
 
				+        self.pool2d_gap = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self._num_channels = num_channels
			
 
				+
			
 
				+        med_ch = int(num_channels / reduction_ratio)
			
 
				+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
			
 
				+        self.squeeze = Linear(
			
 
				+            num_channels,
			
 
				+            med_ch,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
			
 
				+        self.relu = nn.ReLU()
			
 
				+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
			
 
				+        self.excitation = Linear(
			
 
				+            med_ch,
			
 
				+            num_filters,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + '_exc_offset'))
			
 
				+        self.sigmoid = nn.Sigmoid()
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        pool = self.pool2d_gap(input)
			
 
				+        pool = paddle.squeeze(pool, axis=[2, 3])
			
 
				+        squeeze = self.squeeze(pool)
			
 
				+        squeeze = self.relu(squeeze)
			
 
				+        excitation = self.excitation(squeeze)
			
 
				+        excitation = self.sigmoid(excitation)
			
 
				+        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
			
 
				+        out = paddle.multiply(input, excitation)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class ResNeXt(nn.Layer):
			
 
				+    def __init__(self, layers=50, class_num=1000, cardinality=32):
			
 
				+        super(ResNeXt, self).__init__()
			
 
				+
			
 
				+        self.layers = layers
			
 
				+        self.cardinality = cardinality
			
 
				+        self.reduction_ratio = 16
			
 
				+        supported_layers = [50, 101, 152]
			
 
				+        assert layers in supported_layers, \
			
 
				+            "supported layers are {} but input layer is {}".format(
			
 
				+                supported_layers, layers)
			
 
				+        supported_cardinality = [32, 64]
			
 
				+        assert cardinality in supported_cardinality, \
			
 
				+            "supported cardinality is {} but input cardinality is {}" \
			
 
				+            .format(supported_cardinality, cardinality)
			
 
				+        if layers == 50:
			
 
				+            depth = [3, 4, 6, 3]
			
 
				+        elif layers == 101:
			
 
				+            depth = [3, 4, 23, 3]
			
 
				+        elif layers == 152:
			
 
				+            depth = [3, 8, 36, 3]
			
 
				+        num_channels = [128, 256, 512, 1024]
			
 
				+        num_filters = [128, 256, 512,
			
 
				+                       1024] if cardinality == 32 else [256, 512, 1024, 2048]
			
 
				+
			
 
				+        self.conv1_1 = ConvBNLayer(
			
 
				+            num_channels=3,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            stride=2,
			
 
				+            act='relu',
			
 
				+            name="conv1_1")
			
 
				+        self.conv1_2 = ConvBNLayer(
			
 
				+            num_channels=64,
			
 
				+            num_filters=64,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_2")
			
 
				+        self.conv1_3 = ConvBNLayer(
			
 
				+            num_channels=64,
			
 
				+            num_filters=128,
			
 
				+            filter_size=3,
			
 
				+            stride=1,
			
 
				+            act='relu',
			
 
				+            name="conv1_3")
			
 
				+
			
 
				+        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        self.block_list = []
			
 
				+        n = 1 if layers == 50 or layers == 101 else 3
			
 
				+        for block in range(len(depth)):
			
 
				+            n += 1
			
 
				+            shortcut = False
			
 
				+            for i in range(depth[block]):
			
 
				+                bottleneck_block = self.add_sublayer(
			
 
				+                    'bb_%d_%d' % (block, i),
			
 
				+                    BottleneckBlock(
			
 
				+                        num_channels=num_channels[block] if i == 0 else
			
 
				+                        num_filters[block] * int(64 // self.cardinality),
			
 
				+                        num_filters=num_filters[block],
			
 
				+                        stride=2 if i == 0 and block != 0 else 1,
			
 
				+                        cardinality=self.cardinality,
			
 
				+                        reduction_ratio=self.reduction_ratio,
			
 
				+                        shortcut=shortcut,
			
 
				+                        if_first=block == 0,
			
 
				+                        name=str(n) + '_' + str(i + 1)))
			
 
				+                self.block_list.append(bottleneck_block)
			
 
				+                shortcut = True
			
 
				+
			
 
				+        self.pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+        self.pool2d_avg_channels = num_channels[-1] * 2
			
 
				+
			
 
				+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
			
 
				+
			
 
				+        self.out = Linear(
			
 
				+            self.pool2d_avg_channels,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=Uniform(-stdv, stdv), name="fc6_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc6_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self.conv1_1(inputs)
			
 
				+        y = self.conv1_2(y)
			
 
				+        y = self.conv1_3(y)
			
 
				+        y = self.pool2d_max(y)
			
 
				+        for block in self.block_list:
			
 
				+            y = block(y)
			
 
				+        y = self.pool2d_avg(y)
			
 
				+        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
			
 
				+        y = self.out(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def SE_ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=50, cardinality=32, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SE_ResNeXt50_vd_32x4d"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SENet154_vd(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ResNeXt(layers=152, cardinality=64, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
@@ -0,0 +1,362 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+from paddle import ParamAttr, reshape, transpose, concat, split
			
 
				+from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
			
 
				+from paddle.nn.initializer import KaimingNormal
			
 
				+from paddle.nn.functional import swish
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ShuffleNetV2_x0_25":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams",
			
 
				+    "ShuffleNetV2_x0_33":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams",
			
 
				+    "ShuffleNetV2_x0_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams",
			
 
				+    "ShuffleNetV2_x1_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams",
			
 
				+    "ShuffleNetV2_x1_5":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams",
			
 
				+    "ShuffleNetV2_x2_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams",
			
 
				+    "ShuffleNetV2_swish":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+def channel_shuffle(x, groups):
			
 
				+    batch_size, num_channels, height, width = x.shape[0:4]
			
 
				+    channels_per_group = num_channels // groups
			
 
				+
			
 
				+    # reshape
			
 
				+    x = reshape(
			
 
				+        x=x, shape=[batch_size, groups, channels_per_group, height, width])
			
 
				+
			
 
				+    # transpose
			
 
				+    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
			
 
				+
			
 
				+    # flatten
			
 
				+    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
			
 
				+    return x
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(Layer):
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            in_channels,
			
 
				+            out_channels,
			
 
				+            kernel_size,
			
 
				+            stride,
			
 
				+            padding,
			
 
				+            groups=1,
			
 
				+            act=None,
			
 
				+            name=None, ):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                initializer=KaimingNormal(), name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            out_channels,
			
 
				+            param_attr=ParamAttr(name=name + "_bn_scale"),
			
 
				+            bias_attr=ParamAttr(name=name + "_bn_offset"),
			
 
				+            act=act,
			
 
				+            moving_mean_name=name + "_bn_mean",
			
 
				+            moving_variance_name=name + "_bn_variance")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class InvertedResidual(Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 stride,
			
 
				+                 act="relu",
			
 
				+                 name=None):
			
 
				+        super(InvertedResidual, self).__init__()
			
 
				+        self._conv_pw = ConvBNLayer(
			
 
				+            in_channels=in_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act,
			
 
				+            name='stage_' + name + '_conv1')
			
 
				+        self._conv_dw = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=out_channels // 2,
			
 
				+            act=None,
			
 
				+            name='stage_' + name + '_conv2')
			
 
				+        self._conv_linear = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act,
			
 
				+            name='stage_' + name + '_conv3')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x1, x2 = split(
			
 
				+            inputs,
			
 
				+            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
			
 
				+            axis=1)
			
 
				+        x2 = self._conv_pw(x2)
			
 
				+        x2 = self._conv_dw(x2)
			
 
				+        x2 = self._conv_linear(x2)
			
 
				+        out = concat([x1, x2], axis=1)
			
 
				+        return channel_shuffle(out, 2)
			
 
				+
			
 
				+
			
 
				+class InvertedResidualDS(Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 stride,
			
 
				+                 act="relu",
			
 
				+                 name=None):
			
 
				+        super(InvertedResidualDS, self).__init__()
			
 
				+
			
 
				+        # branch1
			
 
				+        self._conv_dw_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=in_channels,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=in_channels,
			
 
				+            act=None,
			
 
				+            name='stage_' + name + '_conv4')
			
 
				+        self._conv_linear_1 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act,
			
 
				+            name='stage_' + name + '_conv5')
			
 
				+        # branch2
			
 
				+        self._conv_pw_2 = ConvBNLayer(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act,
			
 
				+            name='stage_' + name + '_conv1')
			
 
				+        self._conv_dw_2 = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=3,
			
 
				+            stride=stride,
			
 
				+            padding=1,
			
 
				+            groups=out_channels // 2,
			
 
				+            act=None,
			
 
				+            name='stage_' + name + '_conv2')
			
 
				+        self._conv_linear_2 = ConvBNLayer(
			
 
				+            in_channels=out_channels // 2,
			
 
				+            out_channels=out_channels // 2,
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            groups=1,
			
 
				+            act=act,
			
 
				+            name='stage_' + name + '_conv3')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x1 = self._conv_dw_1(inputs)
			
 
				+        x1 = self._conv_linear_1(x1)
			
 
				+        x2 = self._conv_pw_2(inputs)
			
 
				+        x2 = self._conv_dw_2(x2)
			
 
				+        x2 = self._conv_linear_2(x2)
			
 
				+        out = concat([x1, x2], axis=1)
			
 
				+
			
 
				+        return channel_shuffle(out, 2)
			
 
				+
			
 
				+
			
 
				+class ShuffleNet(Layer):
			
 
				+    def __init__(self, class_num=1000, scale=1.0, act="relu"):
			
 
				+        super(ShuffleNet, self).__init__()
			
 
				+        self.scale = scale
			
 
				+        self.class_num = class_num
			
 
				+        stage_repeats = [4, 8, 4]
			
 
				+
			
 
				+        if scale == 0.25:
			
 
				+            stage_out_channels = [-1, 24, 24, 48, 96, 512]
			
 
				+        elif scale == 0.33:
			
 
				+            stage_out_channels = [-1, 24, 32, 64, 128, 512]
			
 
				+        elif scale == 0.5:
			
 
				+            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
			
 
				+        elif scale == 1.0:
			
 
				+            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
			
 
				+        elif scale == 1.5:
			
 
				+            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
			
 
				+        elif scale == 2.0:
			
 
				+            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
			
 
				+        else:
			
 
				+            raise NotImplementedError("This scale size:[" + str(scale) +
			
 
				+                                      "] is not implemented!")
			
 
				+        # 1. conv1
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            in_channels=3,
			
 
				+            out_channels=stage_out_channels[1],
			
 
				+            kernel_size=3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            act=act,
			
 
				+            name='stage1_conv')
			
 
				+        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+        # 2. bottleneck sequences
			
 
				+        self._block_list = []
			
 
				+        for stage_id, num_repeat in enumerate(stage_repeats):
			
 
				+            for i in range(num_repeat):
			
 
				+                if i == 0:
			
 
				+                    block = self.add_sublayer(
			
 
				+                        name=str(stage_id + 2) + '_' + str(i + 1),
			
 
				+                        sublayer=InvertedResidualDS(
			
 
				+                            in_channels=stage_out_channels[stage_id + 1],
			
 
				+                            out_channels=stage_out_channels[stage_id + 2],
			
 
				+                            stride=2,
			
 
				+                            act=act,
			
 
				+                            name=str(stage_id + 2) + '_' + str(i + 1)))
			
 
				+                else:
			
 
				+                    block = self.add_sublayer(
			
 
				+                        name=str(stage_id + 2) + '_' + str(i + 1),
			
 
				+                        sublayer=InvertedResidual(
			
 
				+                            in_channels=stage_out_channels[stage_id + 2],
			
 
				+                            out_channels=stage_out_channels[stage_id + 2],
			
 
				+                            stride=1,
			
 
				+                            act=act,
			
 
				+                            name=str(stage_id + 2) + '_' + str(i + 1)))
			
 
				+                self._block_list.append(block)
			
 
				+        # 3. last_conv
			
 
				+        self._last_conv = ConvBNLayer(
			
 
				+            in_channels=stage_out_channels[-2],
			
 
				+            out_channels=stage_out_channels[-1],
			
 
				+            kernel_size=1,
			
 
				+            stride=1,
			
 
				+            padding=0,
			
 
				+            act=act,
			
 
				+            name='conv5')
			
 
				+        # 4. pool
			
 
				+        self._pool2d_avg = AdaptiveAvgPool2D(1)
			
 
				+        self._out_c = stage_out_channels[-1]
			
 
				+        # 5. fc
			
 
				+        self._fc = Linear(
			
 
				+            stage_out_channels[-1],
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(name='fc6_weights'),
			
 
				+            bias_attr=ParamAttr(name='fc6_offset'))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv1(inputs)
			
 
				+        y = self._max_pool(y)
			
 
				+        for inv in self._block_list:
			
 
				+            y = inv(y)
			
 
				+        y = self._last_conv(y)
			
 
				+        y = self._pool2d_avg(y)
			
 
				+        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
			
 
				+        y = self._fc(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ShuffleNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ShuffleNet(scale=0.25, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_25"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ShuffleNetV2_x0_33(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ShuffleNet(scale=0.33, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_33"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ShuffleNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ShuffleNet(scale=0.5, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ShuffleNetV2_x0_5"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ShuffleNetV2_x1_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ShuffleNet(scale=1.0, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ShuffleNetV2_x1_0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ShuffleNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ShuffleNet(scale=1.5, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ShuffleNetV2_x1_5"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ShuffleNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ShuffleNet(scale=2.0, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ShuffleNetV2_x2_0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ShuffleNetV2_swish(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = ShuffleNet(scale=1.0, act="swish", **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["ShuffleNetV2_swish"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/squeezenet.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/squeezenet.py
@@ -0,0 +1,194 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "SqueezeNet1_0":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams",
			
 
				+    "SqueezeNet1_1":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class MakeFireConv(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 filter_size,
			
 
				+                 padding=0,
			
 
				+                 name=None):
			
 
				+        super(MakeFireConv, self).__init__()
			
 
				+        self._conv = Conv2D(
			
 
				+            input_channels,
			
 
				+            output_channels,
			
 
				+            filter_size,
			
 
				+            padding=padding,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=ParamAttr(name=name + "_offset"))
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self._conv(x)
			
 
				+        x = F.relu(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class MakeFire(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 squeeze_channels,
			
 
				+                 expand1x1_channels,
			
 
				+                 expand3x3_channels,
			
 
				+                 name=None):
			
 
				+        super(MakeFire, self).__init__()
			
 
				+        self._conv = MakeFireConv(
			
 
				+            input_channels, squeeze_channels, 1, name=name + "_squeeze1x1")
			
 
				+        self._conv_path1 = MakeFireConv(
			
 
				+            squeeze_channels, expand1x1_channels, 1, name=name + "_expand1x1")
			
 
				+        self._conv_path2 = MakeFireConv(
			
 
				+            squeeze_channels,
			
 
				+            expand3x3_channels,
			
 
				+            3,
			
 
				+            padding=1,
			
 
				+            name=name + "_expand3x3")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv(inputs)
			
 
				+        x1 = self._conv_path1(x)
			
 
				+        x2 = self._conv_path2(x)
			
 
				+        return paddle.concat([x1, x2], axis=1)
			
 
				+
			
 
				+
			
 
				+class SqueezeNet(nn.Layer):
			
 
				+    def __init__(self, version, class_num=1000):
			
 
				+        super(SqueezeNet, self).__init__()
			
 
				+        self.version = version
			
 
				+
			
 
				+        if self.version == "1.0":
			
 
				+            self._conv = Conv2D(
			
 
				+                3,
			
 
				+                96,
			
 
				+                7,
			
 
				+                stride=2,
			
 
				+                weight_attr=ParamAttr(name="conv1_weights"),
			
 
				+                bias_attr=ParamAttr(name="conv1_offset"))
			
 
				+            self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
			
 
				+            self._conv1 = MakeFire(96, 16, 64, 64, name="fire2")
			
 
				+            self._conv2 = MakeFire(128, 16, 64, 64, name="fire3")
			
 
				+            self._conv3 = MakeFire(128, 32, 128, 128, name="fire4")
			
 
				+
			
 
				+            self._conv4 = MakeFire(256, 32, 128, 128, name="fire5")
			
 
				+            self._conv5 = MakeFire(256, 48, 192, 192, name="fire6")
			
 
				+            self._conv6 = MakeFire(384, 48, 192, 192, name="fire7")
			
 
				+            self._conv7 = MakeFire(384, 64, 256, 256, name="fire8")
			
 
				+
			
 
				+            self._conv8 = MakeFire(512, 64, 256, 256, name="fire9")
			
 
				+        else:
			
 
				+            self._conv = Conv2D(
			
 
				+                3,
			
 
				+                64,
			
 
				+                3,
			
 
				+                stride=2,
			
 
				+                padding=1,
			
 
				+                weight_attr=ParamAttr(name="conv1_weights"),
			
 
				+                bias_attr=ParamAttr(name="conv1_offset"))
			
 
				+            self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
			
 
				+            self._conv1 = MakeFire(64, 16, 64, 64, name="fire2")
			
 
				+            self._conv2 = MakeFire(128, 16, 64, 64, name="fire3")
			
 
				+
			
 
				+            self._conv3 = MakeFire(128, 32, 128, 128, name="fire4")
			
 
				+            self._conv4 = MakeFire(256, 32, 128, 128, name="fire5")
			
 
				+
			
 
				+            self._conv5 = MakeFire(256, 48, 192, 192, name="fire6")
			
 
				+            self._conv6 = MakeFire(384, 48, 192, 192, name="fire7")
			
 
				+            self._conv7 = MakeFire(384, 64, 256, 256, name="fire8")
			
 
				+            self._conv8 = MakeFire(512, 64, 256, 256, name="fire9")
			
 
				+
			
 
				+        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
			
 
				+        self._conv9 = Conv2D(
			
 
				+            512,
			
 
				+            class_num,
			
 
				+            1,
			
 
				+            weight_attr=ParamAttr(name="conv10_weights"),
			
 
				+            bias_attr=ParamAttr(name="conv10_offset"))
			
 
				+        self._avg_pool = AdaptiveAvgPool2D(1)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv(inputs)
			
 
				+        x = F.relu(x)
			
 
				+        x = self._pool(x)
			
 
				+        if self.version == "1.0":
			
 
				+            x = self._conv1(x)
			
 
				+            x = self._conv2(x)
			
 
				+            x = self._conv3(x)
			
 
				+            x = self._pool(x)
			
 
				+            x = self._conv4(x)
			
 
				+            x = self._conv5(x)
			
 
				+            x = self._conv6(x)
			
 
				+            x = self._conv7(x)
			
 
				+            x = self._pool(x)
			
 
				+            x = self._conv8(x)
			
 
				+        else:
			
 
				+            x = self._conv1(x)
			
 
				+            x = self._conv2(x)
			
 
				+            x = self._pool(x)
			
 
				+            x = self._conv3(x)
			
 
				+            x = self._conv4(x)
			
 
				+            x = self._pool(x)
			
 
				+            x = self._conv5(x)
			
 
				+            x = self._conv6(x)
			
 
				+            x = self._conv7(x)
			
 
				+            x = self._conv8(x)
			
 
				+        x = self._drop(x)
			
 
				+        x = self._conv9(x)
			
 
				+        x = F.relu(x)
			
 
				+        x = self._avg_pool(x)
			
 
				+        x = paddle.squeeze(x, axis=[2, 3])
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def SqueezeNet1_0(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = SqueezeNet(version="1.0", **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["SqueezeNet1_0"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SqueezeNet1_1(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = SqueezeNet(version="1.1", **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["SqueezeNet1_1"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/swin_transformer.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/swin_transformer.py
@@ -0,0 +1,857 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/microsoft/Swin-Transformer
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn.initializer import TruncatedNormal, Constant
			
 
				+
			
 
				+from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPath, Identity
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "SwinTransformer_tiny_patch4_window7_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams",
			
 
				+    "SwinTransformer_small_patch4_window7_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams",
			
 
				+    "SwinTransformer_base_patch4_window7_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams",
			
 
				+    "SwinTransformer_base_patch4_window12_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams",
			
 
				+    "SwinTransformer_large_patch4_window7_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22kto1k_pretrained.pdparams",
			
 
				+    "SwinTransformer_large_patch4_window12_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22kto1k_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class Mlp(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_features,
			
 
				+                 hidden_features=None,
			
 
				+                 out_features=None,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 drop=0.):
			
 
				+        super().__init__()
			
 
				+        out_features = out_features or in_features
			
 
				+        hidden_features = hidden_features or in_features
			
 
				+        self.fc1 = nn.Linear(in_features, hidden_features)
			
 
				+        self.act = act_layer()
			
 
				+        self.fc2 = nn.Linear(hidden_features, out_features)
			
 
				+        self.drop = nn.Dropout(drop)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.fc1(x)
			
 
				+        x = self.act(x)
			
 
				+        x = self.drop(x)
			
 
				+        x = self.fc2(x)
			
 
				+        x = self.drop(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def window_partition(x, window_size):
			
 
				+    """
			
 
				+    Args:
			
 
				+        x: (B, H, W, C)
			
 
				+        window_size (int): window size
			
 
				+
			
 
				+    Returns:
			
 
				+        windows: (num_windows*B, window_size, window_size, C)
			
 
				+    """
			
 
				+    B, H, W, C = x.shape
			
 
				+    x = x.reshape(
			
 
				+        [B, H // window_size, window_size, W // window_size, window_size, C])
			
 
				+    windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape(
			
 
				+        [-1, window_size, window_size, C])
			
 
				+    return windows
			
 
				+
			
 
				+
			
 
				+def window_reverse(windows, window_size, H, W, C):
			
 
				+    """
			
 
				+    Args:
			
 
				+        windows: (num_windows*B, window_size, window_size, C)
			
 
				+        window_size (int): Window size
			
 
				+        H (int): Height of image
			
 
				+        W (int): Width of image
			
 
				+
			
 
				+    Returns:
			
 
				+        x: (B, H, W, C)
			
 
				+    """
			
 
				+    x = windows.reshape(
			
 
				+        [-1, H // window_size, W // window_size, window_size, window_size, C])
			
 
				+    x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H, W, C])
			
 
				+    return x
			
 
				+
			
 
				+
			
 
				+class WindowAttention(nn.Layer):
			
 
				+    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
			
 
				+    It supports both of shifted and non-shifted window.
			
 
				+
			
 
				+    Args:
			
 
				+        dim (int): Number of input channels.
			
 
				+        window_size (tuple[int]): The height and width of the window.
			
 
				+        num_heads (int): Number of attention heads.
			
 
				+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
			
 
				+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
			
 
				+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
			
 
				+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 window_size,
			
 
				+                 num_heads,
			
 
				+                 qkv_bias=True,
			
 
				+                 qk_scale=None,
			
 
				+                 attn_drop=0.,
			
 
				+                 proj_drop=0.):
			
 
				+        super().__init__()
			
 
				+        self.dim = dim
			
 
				+        self.window_size = window_size  # Wh, Ww
			
 
				+        self.num_heads = num_heads
			
 
				+        head_dim = dim // num_heads
			
 
				+        self.scale = qk_scale or head_dim**-0.5
			
 
				+
			
 
				+        # define a parameter table of relative position bias
			
 
				+        # 2*Wh-1 * 2*Ww-1, nH
			
 
				+        self.relative_position_bias_table = self.create_parameter(
			
 
				+            shape=((2 * window_size[0] - 1) * (2 * window_size[1] - 1),
			
 
				+                   num_heads),
			
 
				+            default_initializer=zeros_)
			
 
				+        self.add_parameter("relative_position_bias_table",
			
 
				+                           self.relative_position_bias_table)
			
 
				+
			
 
				+        # get pair-wise relative position index for each token inside the window
			
 
				+        coords_h = paddle.arange(self.window_size[0])
			
 
				+        coords_w = paddle.arange(self.window_size[1])
			
 
				+        coords = paddle.stack(paddle.meshgrid(
			
 
				+            [coords_h, coords_w]))  # 2, Wh, Ww
			
 
				+        coords_flatten = paddle.flatten(coords, 1)  # 2, Wh*Ww
			
 
				+
			
 
				+        coords_flatten_1 = coords_flatten.unsqueeze(axis=2)
			
 
				+        coords_flatten_2 = coords_flatten.unsqueeze(axis=1)
			
 
				+        relative_coords = coords_flatten_1 - coords_flatten_2
			
 
				+
			
 
				+        relative_coords = relative_coords.transpose(
			
 
				+            [1, 2, 0])  # Wh*Ww, Wh*Ww, 2
			
 
				+        relative_coords[:, :, 0] += self.window_size[
			
 
				+            0] - 1  # shift to start from 0
			
 
				+        relative_coords[:, :, 1] += self.window_size[1] - 1
			
 
				+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
			
 
				+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
			
 
				+        self.register_buffer("relative_position_index",
			
 
				+                             relative_position_index)
			
 
				+
			
 
				+        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
			
 
				+        self.attn_drop = nn.Dropout(attn_drop)
			
 
				+        self.proj = nn.Linear(dim, dim)
			
 
				+        self.proj_drop = nn.Dropout(proj_drop)
			
 
				+
			
 
				+        trunc_normal_(self.relative_position_bias_table)
			
 
				+        self.softmax = nn.Softmax(axis=-1)
			
 
				+
			
 
				+    def forward(self, x, mask=None):
			
 
				+        """
			
 
				+        Args:
			
 
				+            x: input features with shape of (num_windows*B, N, C)
			
 
				+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
			
 
				+        """
			
 
				+        B_, N, C = x.shape
			
 
				+        qkv = self.qkv(x).reshape(
			
 
				+            [B_, N, 3, self.num_heads, C // self.num_heads]).transpose(
			
 
				+                [2, 0, 3, 1, 4])
			
 
				+        q, k, v = qkv[0], qkv[1], qkv[2]
			
 
				+
			
 
				+        q = q * self.scale
			
 
				+        attn = paddle.mm(q, k.transpose([0, 1, 3, 2]))
			
 
				+
			
 
				+        index = self.relative_position_index.reshape([-1])
			
 
				+
			
 
				+        relative_position_bias = paddle.index_select(
			
 
				+            self.relative_position_bias_table, index)
			
 
				+        relative_position_bias = relative_position_bias.reshape([
			
 
				+            self.window_size[0] * self.window_size[1],
			
 
				+            self.window_size[0] * self.window_size[1], -1
			
 
				+        ])  # Wh*Ww,Wh*Ww,nH
			
 
				+
			
 
				+        relative_position_bias = relative_position_bias.transpose(
			
 
				+            [2, 0, 1])  # nH, Wh*Ww, Wh*Ww
			
 
				+        attn = attn + relative_position_bias.unsqueeze(0)
			
 
				+
			
 
				+        if mask is not None:
			
 
				+            nW = mask.shape[0]
			
 
				+            attn = attn.reshape([B_ // nW, nW, self.num_heads, N, N
			
 
				+                                 ]) + mask.unsqueeze(1).unsqueeze(0)
			
 
				+            attn = attn.reshape([-1, self.num_heads, N, N])
			
 
				+            attn = self.softmax(attn)
			
 
				+        else:
			
 
				+            attn = self.softmax(attn)
			
 
				+
			
 
				+        attn = self.attn_drop(attn)
			
 
				+
			
 
				+        # x = (attn @ v).transpose(1, 2).reshape([B_, N, C])
			
 
				+        x = paddle.mm(attn, v).transpose([0, 2, 1, 3]).reshape([B_, N, C])
			
 
				+        x = self.proj(x)
			
 
				+        x = self.proj_drop(x)
			
 
				+        return x
			
 
				+
			
 
				+    def extra_repr(self):
			
 
				+        return "dim={}, window_size={}, num_heads={}".format(
			
 
				+            self.dim, self.window_size, self.num_heads)
			
 
				+
			
 
				+    def flops(self, N):
			
 
				+        # calculate flops for 1 window with token length of N
			
 
				+        flops = 0
			
 
				+        # qkv = self.qkv(x)
			
 
				+        flops += N * self.dim * 3 * self.dim
			
 
				+        # attn = (q @ k.transpose(-2, -1))
			
 
				+        flops += self.num_heads * N * (self.dim // self.num_heads) * N
			
 
				+        #  x = (attn @ v)
			
 
				+        flops += self.num_heads * N * N * (self.dim // self.num_heads)
			
 
				+        # x = self.proj(x)
			
 
				+        flops += N * self.dim * self.dim
			
 
				+        return flops
			
 
				+
			
 
				+
			
 
				+class SwinTransformerBlock(nn.Layer):
			
 
				+    r""" Swin Transformer Block.
			
 
				+
			
 
				+    Args:
			
 
				+        dim (int): Number of input channels.
			
 
				+        input_resolution (tuple[int]): Input resulotion.
			
 
				+        num_heads (int): Number of attention heads.
			
 
				+        window_size (int): Window size.
			
 
				+        shift_size (int): Shift size for SW-MSA.
			
 
				+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
			
 
				+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
			
 
				+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
			
 
				+        drop (float, optional): Dropout rate. Default: 0.0
			
 
				+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
			
 
				+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
			
 
				+        act_layer (nn.Layer, optional): Activation layer. Default: nn.GELU
			
 
				+        norm_layer (nn.Layer, optional): Normalization layer.  Default: nn.LayerNorm
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 input_resolution,
			
 
				+                 num_heads,
			
 
				+                 window_size=7,
			
 
				+                 shift_size=0,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=True,
			
 
				+                 qk_scale=None,
			
 
				+                 drop=0.,
			
 
				+                 attn_drop=0.,
			
 
				+                 drop_path=0.,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 norm_layer=nn.LayerNorm):
			
 
				+        super().__init__()
			
 
				+        self.dim = dim
			
 
				+        self.input_resolution = input_resolution
			
 
				+        self.num_heads = num_heads
			
 
				+        self.window_size = window_size
			
 
				+        self.shift_size = shift_size
			
 
				+        self.mlp_ratio = mlp_ratio
			
 
				+        if min(self.input_resolution) <= self.window_size:
			
 
				+            # if window size is larger than input resolution, we don't partition windows
			
 
				+            self.shift_size = 0
			
 
				+            self.window_size = min(self.input_resolution)
			
 
				+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
			
 
				+
			
 
				+        self.norm1 = norm_layer(dim)
			
 
				+        self.attn = WindowAttention(
			
 
				+            dim,
			
 
				+            window_size=to_2tuple(self.window_size),
			
 
				+            num_heads=num_heads,
			
 
				+            qkv_bias=qkv_bias,
			
 
				+            qk_scale=qk_scale,
			
 
				+            attn_drop=attn_drop,
			
 
				+            proj_drop=drop)
			
 
				+
			
 
				+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
			
 
				+        self.norm2 = norm_layer(dim)
			
 
				+        mlp_hidden_dim = int(dim * mlp_ratio)
			
 
				+        self.mlp = Mlp(in_features=dim,
			
 
				+                       hidden_features=mlp_hidden_dim,
			
 
				+                       act_layer=act_layer,
			
 
				+                       drop=drop)
			
 
				+
			
 
				+        if self.shift_size > 0:
			
 
				+            # calculate attention mask for SW-MSA
			
 
				+            H, W = self.input_resolution
			
 
				+            img_mask = paddle.zeros((1, H, W, 1))  # 1 H W 1
			
 
				+            h_slices = (slice(0, -self.window_size),
			
 
				+                        slice(-self.window_size, -self.shift_size),
			
 
				+                        slice(-self.shift_size, None))
			
 
				+            w_slices = (slice(0, -self.window_size),
			
 
				+                        slice(-self.window_size, -self.shift_size),
			
 
				+                        slice(-self.shift_size, None))
			
 
				+            cnt = 0
			
 
				+            for h in h_slices:
			
 
				+                for w in w_slices:
			
 
				+                    img_mask[:, h, w, :] = cnt
			
 
				+                    cnt += 1
			
 
				+
			
 
				+            mask_windows = window_partition(
			
 
				+                img_mask, self.window_size)  # nW, window_size, window_size, 1
			
 
				+            mask_windows = mask_windows.reshape(
			
 
				+                [-1, self.window_size * self.window_size])
			
 
				+            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
			
 
				+
			
 
				+            huns = -100.0 * paddle.ones_like(attn_mask)
			
 
				+            attn_mask = huns * (attn_mask != 0).astype("float32")
			
 
				+        else:
			
 
				+            attn_mask = None
			
 
				+
			
 
				+        self.register_buffer("attn_mask", attn_mask)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        H, W = self.input_resolution
			
 
				+        B, L, C = x.shape
			
 
				+        assert L == H * W, "input feature has wrong size"
			
 
				+
			
 
				+        shortcut = x
			
 
				+        x = self.norm1(x)
			
 
				+        x = x.reshape([B, H, W, C])
			
 
				+
			
 
				+        # cyclic shift
			
 
				+        if self.shift_size > 0:
			
 
				+            shifted_x = paddle.roll(
			
 
				+                x, shifts=(-self.shift_size, -self.shift_size), axis=(1, 2))
			
 
				+        else:
			
 
				+            shifted_x = x
			
 
				+
			
 
				+        # partition windows
			
 
				+        x_windows = window_partition(
			
 
				+            shifted_x, self.window_size)  # nW*B, window_size, window_size, C
			
 
				+        x_windows = x_windows.reshape(
			
 
				+            [-1, self.window_size * self.window_size,
			
 
				+             C])  # nW*B, window_size*window_size, C
			
 
				+
			
 
				+        # W-MSA/SW-MSA
			
 
				+        attn_windows = self.attn(
			
 
				+            x_windows, mask=self.attn_mask)  # nW*B, window_size*window_size, C
			
 
				+
			
 
				+        # merge windows
			
 
				+        attn_windows = attn_windows.reshape(
			
 
				+            [-1, self.window_size, self.window_size, C])
			
 
				+        shifted_x = window_reverse(attn_windows, self.window_size, H, W,
			
 
				+                                   C)  # B H' W' C
			
 
				+
			
 
				+        # reverse cyclic shift
			
 
				+        if self.shift_size > 0:
			
 
				+            x = paddle.roll(
			
 
				+                shifted_x,
			
 
				+                shifts=(self.shift_size, self.shift_size),
			
 
				+                axis=(1, 2))
			
 
				+        else:
			
 
				+            x = shifted_x
			
 
				+        x = x.reshape([B, H * W, C])
			
 
				+
			
 
				+        # FFN
			
 
				+        x = shortcut + self.drop_path(x)
			
 
				+        x = x + self.drop_path(self.mlp(self.norm2(x)))
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+    def extra_repr(self):
			
 
				+        return "dim={}, input_resolution={}, num_heads={}, window_size={}, shift_size={}, mlp_ratio={}".format(
			
 
				+            self.dim, self.input_resolution, self.num_heads, self.window_size,
			
 
				+            self.shift_size, self.mlp_ratio)
			
 
				+
			
 
				+    def flops(self):
			
 
				+        flops = 0
			
 
				+        H, W = self.input_resolution
			
 
				+        # norm1
			
 
				+        flops += self.dim * H * W
			
 
				+        # W-MSA/SW-MSA
			
 
				+        nW = H * W / self.window_size / self.window_size
			
 
				+        flops += nW * self.attn.flops(self.window_size * self.window_size)
			
 
				+        # mlp
			
 
				+        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
			
 
				+        # norm2
			
 
				+        flops += self.dim * H * W
			
 
				+        return flops
			
 
				+
			
 
				+
			
 
				+class PatchMerging(nn.Layer):
			
 
				+    r""" Patch Merging Layer.
			
 
				+
			
 
				+    Args:
			
 
				+        input_resolution (tuple[int]): Resolution of input feature.
			
 
				+        dim (int): Number of input channels.
			
 
				+        norm_layer (nn.Layer, optional): Normalization layer.  Default: nn.LayerNorm
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
			
 
				+        super().__init__()
			
 
				+        self.input_resolution = input_resolution
			
 
				+        self.dim = dim
			
 
				+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias_attr=False)
			
 
				+        self.norm = norm_layer(4 * dim)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        """
			
 
				+        x: B, H*W, C
			
 
				+        """
			
 
				+        H, W = self.input_resolution
			
 
				+        B, L, C = x.shape
			
 
				+        assert L == H * W, "input feature has wrong size"
			
 
				+        assert H % 2 == 0 and W % 2 == 0, "x size ({}*{}) are not even.".format(
			
 
				+            H, W)
			
 
				+
			
 
				+        x = x.reshape([B, H, W, C])
			
 
				+
			
 
				+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
			
 
				+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
			
 
				+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
			
 
				+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
			
 
				+        x = paddle.concat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
			
 
				+        x = x.reshape([B, H * W // 4, 4 * C])  # B H/2*W/2 4*C
			
 
				+
			
 
				+        x = self.norm(x)
			
 
				+        x = self.reduction(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+    def extra_repr(self):
			
 
				+        return "input_resolution={}, dim={}".format(self.input_resolution,
			
 
				+                                                    self.dim)
			
 
				+
			
 
				+    def flops(self):
			
 
				+        H, W = self.input_resolution
			
 
				+        flops = H * W * self.dim
			
 
				+        flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
			
 
				+        return flops
			
 
				+
			
 
				+
			
 
				+class BasicLayer(nn.Layer):
			
 
				+    """ A basic Swin Transformer layer for one stage.
			
 
				+
			
 
				+    Args:
			
 
				+        dim (int): Number of input channels.
			
 
				+        input_resolution (tuple[int]): Input resolution.
			
 
				+        depth (int): Number of blocks.
			
 
				+        num_heads (int): Number of attention heads.
			
 
				+        window_size (int): Local window size.
			
 
				+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
			
 
				+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
			
 
				+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
			
 
				+        drop (float, optional): Dropout rate. Default: 0.0
			
 
				+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
			
 
				+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
			
 
				+        norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm
			
 
				+        downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None
			
 
				+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 input_resolution,
			
 
				+                 depth,
			
 
				+                 num_heads,
			
 
				+                 window_size,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=True,
			
 
				+                 qk_scale=None,
			
 
				+                 drop=0.,
			
 
				+                 attn_drop=0.,
			
 
				+                 drop_path=0.,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 downsample=None,
			
 
				+                 use_checkpoint=False):
			
 
				+
			
 
				+        super().__init__()
			
 
				+        self.dim = dim
			
 
				+        self.input_resolution = input_resolution
			
 
				+        self.depth = depth
			
 
				+        self.use_checkpoint = use_checkpoint
			
 
				+
			
 
				+        # build blocks
			
 
				+        self.blocks = nn.LayerList([
			
 
				+            SwinTransformerBlock(
			
 
				+                dim=dim,
			
 
				+                input_resolution=input_resolution,
			
 
				+                num_heads=num_heads,
			
 
				+                window_size=window_size,
			
 
				+                shift_size=0 if (i % 2 == 0) else window_size // 2,
			
 
				+                mlp_ratio=mlp_ratio,
			
 
				+                qkv_bias=qkv_bias,
			
 
				+                qk_scale=qk_scale,
			
 
				+                drop=drop,
			
 
				+                attn_drop=attn_drop,
			
 
				+                drop_path=drop_path[i]
			
 
				+                if isinstance(drop_path, list) else drop_path,
			
 
				+                norm_layer=norm_layer) for i in range(depth)
			
 
				+        ])
			
 
				+
			
 
				+        # patch merging layer
			
 
				+        if downsample is not None:
			
 
				+            self.downsample = downsample(
			
 
				+                input_resolution, dim=dim, norm_layer=norm_layer)
			
 
				+        else:
			
 
				+            self.downsample = None
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        for blk in self.blocks:
			
 
				+            x = blk(x)
			
 
				+        if self.downsample is not None:
			
 
				+            x = self.downsample(x)
			
 
				+        return x
			
 
				+
			
 
				+    def extra_repr(self):
			
 
				+        return "dim={}, input_resolution={}, depth={}".format(
			
 
				+            self.dim, self.input_resolution, self.depth)
			
 
				+
			
 
				+    def flops(self):
			
 
				+        flops = 0
			
 
				+        for blk in self.blocks:
			
 
				+            flops += blk.flops()
			
 
				+        if self.downsample is not None:
			
 
				+            flops += self.downsample.flops()
			
 
				+        return flops
			
 
				+
			
 
				+
			
 
				+class PatchEmbed(nn.Layer):
			
 
				+    """ Image to Patch Embedding
			
 
				+
			
 
				+    Args:
			
 
				+        img_size (int): Image size.  Default: 224.
			
 
				+        patch_size (int): Patch token size. Default: 4.
			
 
				+        in_chans (int): Number of input image channels. Default: 3.
			
 
				+        embed_dim (int): Number of linear projection output channels. Default: 96.
			
 
				+        norm_layer (nn.Layer, optional): Normalization layer. Default: None
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=4,
			
 
				+                 in_chans=3,
			
 
				+                 embed_dim=96,
			
 
				+                 norm_layer=None):
			
 
				+        super().__init__()
			
 
				+        img_size = to_2tuple(img_size)
			
 
				+        patch_size = to_2tuple(patch_size)
			
 
				+        patches_resolution = [
			
 
				+            img_size[0] // patch_size[0], img_size[1] // patch_size[1]
			
 
				+        ]
			
 
				+        self.img_size = img_size
			
 
				+        self.patch_size = patch_size
			
 
				+        self.patches_resolution = patches_resolution
			
 
				+        self.num_patches = patches_resolution[0] * patches_resolution[1]
			
 
				+
			
 
				+        self.in_chans = in_chans
			
 
				+        self.embed_dim = embed_dim
			
 
				+
			
 
				+        self.proj = nn.Conv2D(
			
 
				+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
			
 
				+        if norm_layer is not None:
			
 
				+            self.norm = norm_layer(embed_dim)
			
 
				+        else:
			
 
				+            self.norm = None
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        B, C, H, W = x.shape
			
 
				+        # TODO (littletomatodonkey), uncomment the line will cause failure of jit.save
			
 
				+        # assert [H, W] == self.img_size[:2], "Input image size ({H}*{W}) doesn't match model ({}*{}).".format(H, W, self.img_size[0], self.img_size[1])
			
 
				+        x = self.proj(x)
			
 
				+
			
 
				+        x = x.flatten(2).transpose([0, 2, 1])  # B Ph*Pw C
			
 
				+        if self.norm is not None:
			
 
				+            x = self.norm(x)
			
 
				+        return x
			
 
				+
			
 
				+    def flops(self):
			
 
				+        Ho, Wo = self.patches_resolution
			
 
				+        flops = Ho * Wo * self.embed_dim * self.in_chans * (
			
 
				+            self.patch_size[0] * self.patch_size[1])
			
 
				+        if self.norm is not None:
			
 
				+            flops += Ho * Wo * self.embed_dim
			
 
				+        return flops
			
 
				+
			
 
				+
			
 
				+class SwinTransformer(nn.Layer):
			
 
				+    """ Swin Transformer
			
 
				+        A PaddlePaddle impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
			
 
				+          https://arxiv.org/pdf/2103.14030
			
 
				+
			
 
				+    Args:
			
 
				+        img_size (int | tuple(int)): Input image size. Default 224
			
 
				+        patch_size (int | tuple(int)): Patch size. Default: 4
			
 
				+        in_chans (int): Number of input image channels. Default: 3
			
 
				+        num_classes (int): Number of classes for classification head. Default: 1000
			
 
				+        embed_dim (int): Patch embedding dimension. Default: 96
			
 
				+        depths (tuple(int)): Depth of each Swin Transformer layer.
			
 
				+        num_heads (tuple(int)): Number of attention heads in different layers.
			
 
				+        window_size (int): Window size. Default: 7
			
 
				+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
			
 
				+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
			
 
				+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
			
 
				+        drop_rate (float): Dropout rate. Default: 0
			
 
				+        attn_drop_rate (float): Attention dropout rate. Default: 0
			
 
				+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
			
 
				+        norm_layer (nn.Layer): Normalization layer. Default: nn.LayerNorm.
			
 
				+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
			
 
				+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
			
 
				+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=4,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dim=96,
			
 
				+                 depths=[2, 2, 6, 2],
			
 
				+                 num_heads=[3, 6, 12, 24],
			
 
				+                 window_size=7,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=True,
			
 
				+                 qk_scale=None,
			
 
				+                 drop_rate=0.,
			
 
				+                 attn_drop_rate=0.,
			
 
				+                 drop_path_rate=0.1,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 ape=False,
			
 
				+                 patch_norm=True,
			
 
				+                 use_checkpoint=False,
			
 
				+                 **kwargs):
			
 
				+        super(SwinTransformer, self).__init__()
			
 
				+
			
 
				+        self.num_classes = num_classes = class_num
			
 
				+        self.num_layers = len(depths)
			
 
				+        self.embed_dim = embed_dim
			
 
				+        self.ape = ape
			
 
				+        self.patch_norm = patch_norm
			
 
				+        self.num_features = int(embed_dim * 2**(self.num_layers - 1))
			
 
				+        self.mlp_ratio = mlp_ratio
			
 
				+
			
 
				+        # split image into non-overlapping patches
			
 
				+        self.patch_embed = PatchEmbed(
			
 
				+            img_size=img_size,
			
 
				+            patch_size=patch_size,
			
 
				+            in_chans=in_chans,
			
 
				+            embed_dim=embed_dim,
			
 
				+            norm_layer=norm_layer if self.patch_norm else None)
			
 
				+        num_patches = self.patch_embed.num_patches
			
 
				+        patches_resolution = self.patch_embed.patches_resolution
			
 
				+        self.patches_resolution = patches_resolution
			
 
				+
			
 
				+        # absolute position embedding
			
 
				+        if self.ape:
			
 
				+            self.absolute_pos_embed = self.create_parameter(
			
 
				+                shape=(1, num_patches, embed_dim), default_initializer=zeros_)
			
 
				+            self.add_parameter("absolute_pos_embed", self.absolute_pos_embed)
			
 
				+            trunc_normal_(self.absolute_pos_embed)
			
 
				+
			
 
				+        self.pos_drop = nn.Dropout(p=drop_rate)
			
 
				+
			
 
				+        # stochastic depth
			
 
				+        dpr = np.linspace(0, drop_path_rate,
			
 
				+                          sum(depths)).tolist()  # stochastic depth decay rule
			
 
				+
			
 
				+        # build layers
			
 
				+        self.layers = nn.LayerList()
			
 
				+        for i_layer in range(self.num_layers):
			
 
				+            layer = BasicLayer(
			
 
				+                dim=int(embed_dim * 2**i_layer),
			
 
				+                input_resolution=(patches_resolution[0] // (2**i_layer),
			
 
				+                                  patches_resolution[1] // (2**i_layer)),
			
 
				+                depth=depths[i_layer],
			
 
				+                num_heads=num_heads[i_layer],
			
 
				+                window_size=window_size,
			
 
				+                mlp_ratio=self.mlp_ratio,
			
 
				+                qkv_bias=qkv_bias,
			
 
				+                qk_scale=qk_scale,
			
 
				+                drop=drop_rate,
			
 
				+                attn_drop=attn_drop_rate,
			
 
				+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
			
 
				+                norm_layer=norm_layer,
			
 
				+                downsample=PatchMerging
			
 
				+                if (i_layer < self.num_layers - 1) else None,
			
 
				+                use_checkpoint=use_checkpoint)
			
 
				+            self.layers.append(layer)
			
 
				+
			
 
				+        self.norm = norm_layer(self.num_features)
			
 
				+        self.avgpool = nn.AdaptiveAvgPool1D(1)
			
 
				+        self.head = nn.Linear(
			
 
				+            self.num_features,
			
 
				+            num_classes) if self.num_classes > 0 else nn.Identity()
			
 
				+
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+    def _init_weights(self, m):
			
 
				+        if isinstance(m, nn.Linear):
			
 
				+            trunc_normal_(m.weight)
			
 
				+            if isinstance(m, nn.Linear) and m.bias is not None:
			
 
				+                zeros_(m.bias)
			
 
				+        elif isinstance(m, nn.LayerNorm):
			
 
				+            zeros_(m.bias)
			
 
				+            ones_(m.weight)
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        x = self.patch_embed(x)
			
 
				+        if self.ape:
			
 
				+            x = x + self.absolute_pos_embed
			
 
				+        x = self.pos_drop(x)
			
 
				+
			
 
				+        for layer in self.layers:
			
 
				+            x = layer(x)
			
 
				+
			
 
				+        x = self.norm(x)  # B L C
			
 
				+        x = self.avgpool(x.transpose([0, 2, 1]))  # B C 1
			
 
				+        x = paddle.flatten(x, 1)
			
 
				+        return x
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.forward_features(x)
			
 
				+        x = self.head(x)
			
 
				+        return x
			
 
				+
			
 
				+    def flops(self):
			
 
				+        flops = 0
			
 
				+        flops += self.patch_embed.flops()
			
 
				+        for _, layer in enumerate(self.layers):
			
 
				+            flops += layer.flops()
			
 
				+        flops += self.num_features * self.patches_resolution[
			
 
				+            0] * self.patches_resolution[1] // (2**self.num_layers)
			
 
				+        flops += self.num_features * self.num_classes
			
 
				+        return flops
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def SwinTransformer_tiny_patch4_window7_224(pretrained=False,
			
 
				+                                            use_ssld=False,
			
 
				+                                            **kwargs):
			
 
				+    model = SwinTransformer(
			
 
				+        embed_dim=96,
			
 
				+        depths=[2, 2, 6, 2],
			
 
				+        num_heads=[3, 6, 12, 24],
			
 
				+        window_size=7,
			
 
				+        drop_path_rate=0.2,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SwinTransformer_small_patch4_window7_224(pretrained=False,
			
 
				+                                             use_ssld=False,
			
 
				+                                             **kwargs):
			
 
				+    model = SwinTransformer(
			
 
				+        embed_dim=96,
			
 
				+        depths=[2, 2, 18, 2],
			
 
				+        num_heads=[3, 6, 12, 24],
			
 
				+        window_size=7,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SwinTransformer_small_patch4_window7_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SwinTransformer_base_patch4_window7_224(pretrained=False,
			
 
				+                                            use_ssld=False,
			
 
				+                                            **kwargs):
			
 
				+    model = SwinTransformer(
			
 
				+        embed_dim=128,
			
 
				+        depths=[2, 2, 18, 2],
			
 
				+        num_heads=[4, 8, 16, 32],
			
 
				+        window_size=7,
			
 
				+        drop_path_rate=0.5,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SwinTransformer_base_patch4_window7_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SwinTransformer_base_patch4_window12_384(pretrained=False,
			
 
				+                                             use_ssld=False,
			
 
				+                                             **kwargs):
			
 
				+    model = SwinTransformer(
			
 
				+        img_size=384,
			
 
				+        embed_dim=128,
			
 
				+        depths=[2, 2, 18, 2],
			
 
				+        num_heads=[4, 8, 16, 32],
			
 
				+        window_size=12,
			
 
				+        drop_path_rate=0.5,  # NOTE: do not appear in offical code
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SwinTransformer_base_patch4_window12_384"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SwinTransformer_large_patch4_window7_224(pretrained=False,
			
 
				+                                             use_ssld=False,
			
 
				+                                             **kwargs):
			
 
				+    model = SwinTransformer(
			
 
				+        embed_dim=192,
			
 
				+        depths=[2, 2, 18, 2],
			
 
				+        num_heads=[6, 12, 24, 48],
			
 
				+        window_size=7,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SwinTransformer_large_patch4_window7_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def SwinTransformer_large_patch4_window12_384(pretrained=False,
			
 
				+                                              use_ssld=False,
			
 
				+                                              **kwargs):
			
 
				+    model = SwinTransformer(
			
 
				+        img_size=384,
			
 
				+        embed_dim=192,
			
 
				+        depths=[2, 2, 18, 2],
			
 
				+        num_heads=[6, 12, 24, 48],
			
 
				+        window_size=12,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["SwinTransformer_large_patch4_window12_384"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/tnt.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/tnt.py
@@ -0,0 +1,385 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch
			
 
				+
			
 
				+import math
			
 
				+import numpy as np
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+from paddle.nn.initializer import TruncatedNormal, Constant
			
 
				+
			
 
				+from ppcls.arch.backbone.base.theseus_layer import Identity
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "TNT_small":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = MODEL_URLS.keys()
			
 
				+
			
 
				+trunc_normal_ = TruncatedNormal(std=.02)
			
 
				+zeros_ = Constant(value=0.)
			
 
				+ones_ = Constant(value=1.)
			
 
				+
			
 
				+
			
 
				+def drop_path(x, drop_prob=0., training=False):
			
 
				+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
			
 
				+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
			
 
				+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
			
 
				+    """
			
 
				+    if drop_prob == 0. or not training:
			
 
				+        return x
			
 
				+    keep_prob = paddle.to_tensor(1 - drop_prob)
			
 
				+    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
			
 
				+    random_tensor = paddle.add(keep_prob, paddle.rand(shape, dtype=x.dtype))
			
 
				+    random_tensor = paddle.floor(random_tensor)  # binarize
			
 
				+    output = x.divide(keep_prob) * random_tensor
			
 
				+    return output
			
 
				+
			
 
				+
			
 
				+class DropPath(nn.Layer):
			
 
				+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, drop_prob=None):
			
 
				+        super(DropPath, self).__init__()
			
 
				+        self.drop_prob = drop_prob
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        return drop_path(x, self.drop_prob, self.training)
			
 
				+
			
 
				+
			
 
				+class Mlp(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_features,
			
 
				+                 hidden_features=None,
			
 
				+                 out_features=None,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 drop=0.):
			
 
				+        super().__init__()
			
 
				+        out_features = out_features or in_features
			
 
				+        hidden_features = hidden_features or in_features
			
 
				+        self.fc1 = nn.Linear(in_features, hidden_features)
			
 
				+        self.act = act_layer()
			
 
				+        self.fc2 = nn.Linear(hidden_features, out_features)
			
 
				+        self.drop = nn.Dropout(drop)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.fc1(x)
			
 
				+        x = self.act(x)
			
 
				+        x = self.drop(x)
			
 
				+        x = self.fc2(x)
			
 
				+        x = self.drop(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Attention(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 hidden_dim,
			
 
				+                 num_heads=8,
			
 
				+                 qkv_bias=False,
			
 
				+                 attn_drop=0.,
			
 
				+                 proj_drop=0.):
			
 
				+        super().__init__()
			
 
				+        self.hidden_dim = hidden_dim
			
 
				+        self.num_heads = num_heads
			
 
				+        head_dim = hidden_dim // num_heads
			
 
				+        self.head_dim = head_dim
			
 
				+        self.scale = head_dim**-0.5
			
 
				+
			
 
				+        self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias)
			
 
				+        self.v = nn.Linear(dim, dim, bias_attr=qkv_bias)
			
 
				+        self.attn_drop = nn.Dropout(attn_drop)
			
 
				+        self.proj = nn.Linear(dim, dim)
			
 
				+        self.proj_drop = nn.Dropout(proj_drop)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        B, N, C = x.shape
			
 
				+        qk = self.qk(x).reshape(
			
 
				+            (B, N, 2, self.num_heads, self.head_dim)).transpose(
			
 
				+                (2, 0, 3, 1, 4))
			
 
				+
			
 
				+        q, k = qk[0], qk[1]
			
 
				+        v = self.v(x).reshape(
			
 
				+            (B, N, self.num_heads, x.shape[-1] // self.num_heads)).transpose(
			
 
				+                (0, 2, 1, 3))
			
 
				+
			
 
				+        attn = paddle.matmul(q, k.transpose((0, 1, 3, 2))) * self.scale
			
 
				+        attn = nn.functional.softmax(attn, axis=-1)
			
 
				+        attn = self.attn_drop(attn)
			
 
				+
			
 
				+        x = paddle.matmul(attn, v)
			
 
				+        x = x.transpose((0, 2, 1, 3)).reshape(
			
 
				+            (B, N, x.shape[-1] * x.shape[-3]))
			
 
				+        x = self.proj(x)
			
 
				+        x = self.proj_drop(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Block(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 in_dim,
			
 
				+                 num_pixel,
			
 
				+                 num_heads=12,
			
 
				+                 in_num_head=4,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=False,
			
 
				+                 drop=0.,
			
 
				+                 attn_drop=0.,
			
 
				+                 drop_path=0.,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 norm_layer=nn.LayerNorm):
			
 
				+        super().__init__()
			
 
				+        # Inner transformer
			
 
				+        self.norm_in = norm_layer(in_dim)
			
 
				+        self.attn_in = Attention(
			
 
				+            in_dim,
			
 
				+            in_dim,
			
 
				+            num_heads=in_num_head,
			
 
				+            qkv_bias=qkv_bias,
			
 
				+            attn_drop=attn_drop,
			
 
				+            proj_drop=drop)
			
 
				+
			
 
				+        self.norm_mlp_in = norm_layer(in_dim)
			
 
				+        self.mlp_in = Mlp(in_features=in_dim,
			
 
				+                          hidden_features=int(in_dim * 4),
			
 
				+                          out_features=in_dim,
			
 
				+                          act_layer=act_layer,
			
 
				+                          drop=drop)
			
 
				+
			
 
				+        self.norm1_proj = norm_layer(in_dim)
			
 
				+        self.proj = nn.Linear(in_dim * num_pixel, dim)
			
 
				+        # Outer transformer
			
 
				+        self.norm_out = norm_layer(dim)
			
 
				+        self.attn_out = Attention(
			
 
				+            dim,
			
 
				+            dim,
			
 
				+            num_heads=num_heads,
			
 
				+            qkv_bias=qkv_bias,
			
 
				+            attn_drop=attn_drop,
			
 
				+            proj_drop=drop)
			
 
				+
			
 
				+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
			
 
				+
			
 
				+        self.norm_mlp = norm_layer(dim)
			
 
				+        self.mlp = Mlp(in_features=dim,
			
 
				+                       hidden_features=int(dim * mlp_ratio),
			
 
				+                       out_features=dim,
			
 
				+                       act_layer=act_layer,
			
 
				+                       drop=drop)
			
 
				+
			
 
				+    def forward(self, pixel_embed, patch_embed):
			
 
				+        # inner
			
 
				+        pixel_embed = paddle.add(
			
 
				+            pixel_embed,
			
 
				+            self.drop_path(self.attn_in(self.norm_in(pixel_embed))))
			
 
				+        pixel_embed = paddle.add(
			
 
				+            pixel_embed,
			
 
				+            self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed))))
			
 
				+        # outer
			
 
				+        B, N, C = patch_embed.shape
			
 
				+        norm1_proj = self.norm1_proj(pixel_embed)
			
 
				+        norm1_proj = norm1_proj.reshape(
			
 
				+            (B, N - 1, norm1_proj.shape[1] * norm1_proj.shape[2]))
			
 
				+        patch_embed[:, 1:] = paddle.add(patch_embed[:, 1:],
			
 
				+                                        self.proj(norm1_proj))
			
 
				+        patch_embed = paddle.add(
			
 
				+            patch_embed,
			
 
				+            self.drop_path(self.attn_out(self.norm_out(patch_embed))))
			
 
				+        patch_embed = paddle.add(
			
 
				+            patch_embed, self.drop_path(self.mlp(self.norm_mlp(patch_embed))))
			
 
				+        return pixel_embed, patch_embed
			
 
				+
			
 
				+
			
 
				+class PixelEmbed(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=16,
			
 
				+                 in_chans=3,
			
 
				+                 in_dim=48,
			
 
				+                 stride=4):
			
 
				+        super().__init__()
			
 
				+        num_patches = (img_size // patch_size)**2
			
 
				+        self.img_size = img_size
			
 
				+        self.num_patches = num_patches
			
 
				+        self.in_dim = in_dim
			
 
				+        new_patch_size = math.ceil(patch_size / stride)
			
 
				+        self.new_patch_size = new_patch_size
			
 
				+
			
 
				+        self.proj = nn.Conv2D(
			
 
				+            in_chans, self.in_dim, kernel_size=7, padding=3, stride=stride)
			
 
				+
			
 
				+    def forward(self, x, pixel_pos):
			
 
				+        B, C, H, W = x.shape
			
 
				+        assert H == self.img_size and W == self.img_size, f"Input image size ({H}*{W}) doesn't match model ({self.img_size}*{self.img_size})."
			
 
				+
			
 
				+        x = self.proj(x)
			
 
				+        x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size)
			
 
				+        x = x.transpose((0, 2, 1)).reshape(
			
 
				+            (-1, self.in_dim, self.new_patch_size, self.new_patch_size))
			
 
				+        x = x + pixel_pos
			
 
				+        x = x.reshape((-1, self.in_dim, self.new_patch_size *
			
 
				+                       self.new_patch_size)).transpose((0, 2, 1))
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class TNT(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=16,
			
 
				+                 in_chans=3,
			
 
				+                 embed_dim=768,
			
 
				+                 in_dim=48,
			
 
				+                 depth=12,
			
 
				+                 num_heads=12,
			
 
				+                 in_num_head=4,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=False,
			
 
				+                 drop_rate=0.,
			
 
				+                 attn_drop_rate=0.,
			
 
				+                 drop_path_rate=0.,
			
 
				+                 norm_layer=nn.LayerNorm,
			
 
				+                 first_stride=4,
			
 
				+                 class_num=1000):
			
 
				+        super().__init__()
			
 
				+        self.class_num = class_num
			
 
				+        # num_features for consistency with other models
			
 
				+        self.num_features = self.embed_dim = embed_dim
			
 
				+
			
 
				+        self.pixel_embed = PixelEmbed(
			
 
				+            img_size=img_size,
			
 
				+            patch_size=patch_size,
			
 
				+            in_chans=in_chans,
			
 
				+            in_dim=in_dim,
			
 
				+            stride=first_stride)
			
 
				+        num_patches = self.pixel_embed.num_patches
			
 
				+        self.num_patches = num_patches
			
 
				+        new_patch_size = self.pixel_embed.new_patch_size
			
 
				+        num_pixel = new_patch_size**2
			
 
				+
			
 
				+        self.norm1_proj = norm_layer(num_pixel * in_dim)
			
 
				+        self.proj = nn.Linear(num_pixel * in_dim, embed_dim)
			
 
				+        self.norm2_proj = norm_layer(embed_dim)
			
 
				+
			
 
				+        self.cls_token = self.create_parameter(
			
 
				+            shape=(1, 1, embed_dim), default_initializer=zeros_)
			
 
				+        self.add_parameter("cls_token", self.cls_token)
			
 
				+
			
 
				+        self.patch_pos = self.create_parameter(
			
 
				+            shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_)
			
 
				+        self.add_parameter("patch_pos", self.patch_pos)
			
 
				+
			
 
				+        self.pixel_pos = self.create_parameter(
			
 
				+            shape=(1, in_dim, new_patch_size, new_patch_size),
			
 
				+            default_initializer=zeros_)
			
 
				+        self.add_parameter("pixel_pos", self.pixel_pos)
			
 
				+
			
 
				+        self.pos_drop = nn.Dropout(p=drop_rate)
			
 
				+
			
 
				+        # stochastic depth decay rule
			
 
				+        dpr = np.linspace(0, drop_path_rate, depth)
			
 
				+
			
 
				+        blocks = []
			
 
				+        for i in range(depth):
			
 
				+            blocks.append(
			
 
				+                Block(
			
 
				+                    dim=embed_dim,
			
 
				+                    in_dim=in_dim,
			
 
				+                    num_pixel=num_pixel,
			
 
				+                    num_heads=num_heads,
			
 
				+                    in_num_head=in_num_head,
			
 
				+                    mlp_ratio=mlp_ratio,
			
 
				+                    qkv_bias=qkv_bias,
			
 
				+                    drop=drop_rate,
			
 
				+                    attn_drop=attn_drop_rate,
			
 
				+                    drop_path=dpr[i],
			
 
				+                    norm_layer=norm_layer))
			
 
				+        self.blocks = nn.LayerList(blocks)
			
 
				+        self.norm = norm_layer(embed_dim)
			
 
				+
			
 
				+        if class_num > 0:
			
 
				+            self.head = nn.Linear(embed_dim, class_num)
			
 
				+
			
 
				+        trunc_normal_(self.cls_token)
			
 
				+        trunc_normal_(self.patch_pos)
			
 
				+        trunc_normal_(self.pixel_pos)
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+    def _init_weights(self, m):
			
 
				+        if isinstance(m, nn.Linear):
			
 
				+            trunc_normal_(m.weight)
			
 
				+            if isinstance(m, nn.Linear) and m.bias is not None:
			
 
				+                zeros_(m.bias)
			
 
				+        elif isinstance(m, nn.LayerNorm):
			
 
				+            zeros_(m.bias)
			
 
				+            ones_(m.weight)
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        B = paddle.shape(x)[0]
			
 
				+        pixel_embed = self.pixel_embed(x, self.pixel_pos)
			
 
				+
			
 
				+        patch_embed = self.norm2_proj(
			
 
				+            self.proj(
			
 
				+                self.norm1_proj(
			
 
				+                    pixel_embed.reshape((-1, self.num_patches, pixel_embed.
			
 
				+                                         shape[-1] * pixel_embed.shape[-2])))))
			
 
				+        patch_embed = paddle.concat(
			
 
				+            (self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
			
 
				+        patch_embed = patch_embed + self.patch_pos
			
 
				+        patch_embed = self.pos_drop(patch_embed)
			
 
				+
			
 
				+        for blk in self.blocks:
			
 
				+            pixel_embed, patch_embed = blk(pixel_embed, patch_embed)
			
 
				+
			
 
				+        patch_embed = self.norm(patch_embed)
			
 
				+        return patch_embed[:, 0]
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.forward_features(x)
			
 
				+
			
 
				+        if self.class_num > 0:
			
 
				+            x = self.head(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def TNT_small(pretrained=False, **kwargs):
			
 
				+    model = TNT(patch_size=16,
			
 
				+                embed_dim=384,
			
 
				+                in_dim=24,
			
 
				+                depth=12,
			
 
				+                num_heads=6,
			
 
				+                in_num_head=4,
			
 
				+                qkv_bias=False,
			
 
				+                **kwargs)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["TNT_small"])
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/vision_transformer.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/vision_transformer.py
@@ -0,0 +1,458 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Code was based on https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
			
 
				+
			
 
				+from collections.abc import Callable
			
 
				+
			
 
				+import numpy as np
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+from paddle.nn.initializer import TruncatedNormal, Constant, Normal
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "ViT_small_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_small_patch16_224_pretrained.pdparams",
			
 
				+    "ViT_base_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_224_pretrained.pdparams",
			
 
				+    "ViT_base_patch16_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_384_pretrained.pdparams",
			
 
				+    "ViT_base_patch32_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch32_384_pretrained.pdparams",
			
 
				+    "ViT_large_patch16_224":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_224_pretrained.pdparams",
			
 
				+    "ViT_large_patch16_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams",
			
 
				+    "ViT_large_patch32_384":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams",
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+trunc_normal_ = TruncatedNormal(std=.02)
			
 
				+normal_ = Normal
			
 
				+zeros_ = Constant(value=0.)
			
 
				+ones_ = Constant(value=1.)
			
 
				+
			
 
				+
			
 
				+def to_2tuple(x):
			
 
				+    return tuple([x] * 2)
			
 
				+
			
 
				+
			
 
				+def drop_path(x, drop_prob=0., training=False):
			
 
				+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
			
 
				+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
			
 
				+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
			
 
				+    """
			
 
				+    if drop_prob == 0. or not training:
			
 
				+        return x
			
 
				+    keep_prob = paddle.to_tensor(1 - drop_prob)
			
 
				+    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
			
 
				+    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
			
 
				+    random_tensor = paddle.floor(random_tensor)  # binarize
			
 
				+    output = x.divide(keep_prob) * random_tensor
			
 
				+    return output
			
 
				+
			
 
				+
			
 
				+class DropPath(nn.Layer):
			
 
				+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, drop_prob=None):
			
 
				+        super(DropPath, self).__init__()
			
 
				+        self.drop_prob = drop_prob
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        return drop_path(x, self.drop_prob, self.training)
			
 
				+
			
 
				+
			
 
				+class Identity(nn.Layer):
			
 
				+    def __init__(self):
			
 
				+        super(Identity, self).__init__()
			
 
				+
			
 
				+    def forward(self, input):
			
 
				+        return input
			
 
				+
			
 
				+
			
 
				+class Mlp(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_features,
			
 
				+                 hidden_features=None,
			
 
				+                 out_features=None,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 drop=0.):
			
 
				+        super().__init__()
			
 
				+        out_features = out_features or in_features
			
 
				+        hidden_features = hidden_features or in_features
			
 
				+        self.fc1 = nn.Linear(in_features, hidden_features)
			
 
				+        self.act = act_layer()
			
 
				+        self.fc2 = nn.Linear(hidden_features, out_features)
			
 
				+        self.drop = nn.Dropout(drop)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.fc1(x)
			
 
				+        x = self.act(x)
			
 
				+        x = self.drop(x)
			
 
				+        x = self.fc2(x)
			
 
				+        x = self.drop(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Attention(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads=8,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 attn_drop=0.,
			
 
				+                 proj_drop=0.):
			
 
				+        super().__init__()
			
 
				+        self.num_heads = num_heads
			
 
				+        head_dim = dim // num_heads
			
 
				+        self.scale = qk_scale or head_dim**-0.5
			
 
				+
			
 
				+        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
			
 
				+        self.attn_drop = nn.Dropout(attn_drop)
			
 
				+        self.proj = nn.Linear(dim, dim)
			
 
				+        self.proj_drop = nn.Dropout(proj_drop)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        # B= paddle.shape(x)[0]
			
 
				+        N, C = x.shape[1:]
			
 
				+        qkv = self.qkv(x).reshape((-1, N, 3, self.num_heads, C //
			
 
				+                                   self.num_heads)).transpose((2, 0, 3, 1, 4))
			
 
				+        q, k, v = qkv[0], qkv[1], qkv[2]
			
 
				+
			
 
				+        attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale
			
 
				+        attn = nn.functional.softmax(attn, axis=-1)
			
 
				+        attn = self.attn_drop(attn)
			
 
				+
			
 
				+        x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((-1, N, C))
			
 
				+        x = self.proj(x)
			
 
				+        x = self.proj_drop(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Block(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 dim,
			
 
				+                 num_heads,
			
 
				+                 mlp_ratio=4.,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop=0.,
			
 
				+                 attn_drop=0.,
			
 
				+                 drop_path=0.,
			
 
				+                 act_layer=nn.GELU,
			
 
				+                 norm_layer='nn.LayerNorm',
			
 
				+                 epsilon=1e-5):
			
 
				+        super().__init__()
			
 
				+        if isinstance(norm_layer, str):
			
 
				+            self.norm1 = eval(norm_layer)(dim, epsilon=epsilon)
			
 
				+        elif isinstance(norm_layer, Callable):
			
 
				+            self.norm1 = norm_layer(dim)
			
 
				+        else:
			
 
				+            raise TypeError(
			
 
				+                "The norm_layer must be str or paddle.nn.layer.Layer class")
			
 
				+        self.attn = Attention(
			
 
				+            dim,
			
 
				+            num_heads=num_heads,
			
 
				+            qkv_bias=qkv_bias,
			
 
				+            qk_scale=qk_scale,
			
 
				+            attn_drop=attn_drop,
			
 
				+            proj_drop=drop)
			
 
				+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
			
 
				+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
			
 
				+        if isinstance(norm_layer, str):
			
 
				+            self.norm2 = eval(norm_layer)(dim, epsilon=epsilon)
			
 
				+        elif isinstance(norm_layer, Callable):
			
 
				+            self.norm2 = norm_layer(dim)
			
 
				+        else:
			
 
				+            raise TypeError(
			
 
				+                "The norm_layer must be str or paddle.nn.layer.Layer class")
			
 
				+        mlp_hidden_dim = int(dim * mlp_ratio)
			
 
				+        self.mlp = Mlp(in_features=dim,
			
 
				+                       hidden_features=mlp_hidden_dim,
			
 
				+                       act_layer=act_layer,
			
 
				+                       drop=drop)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = x + self.drop_path(self.attn(self.norm1(x)))
			
 
				+        x = x + self.drop_path(self.mlp(self.norm2(x)))
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class PatchEmbed(nn.Layer):
			
 
				+    """ Image to Patch Embedding
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
			
 
				+        super().__init__()
			
 
				+        img_size = to_2tuple(img_size)
			
 
				+        patch_size = to_2tuple(patch_size)
			
 
				+        num_patches = (img_size[1] // patch_size[1]) * \
			
 
				+            (img_size[0] // patch_size[0])
			
 
				+        self.img_size = img_size
			
 
				+        self.patch_size = patch_size
			
 
				+        self.num_patches = num_patches
			
 
				+
			
 
				+        self.proj = nn.Conv2D(
			
 
				+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        B, C, H, W = x.shape
			
 
				+        assert H == self.img_size[0] and W == self.img_size[1], \
			
 
				+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
			
 
				+
			
 
				+        x = self.proj(x).flatten(2).transpose((0, 2, 1))
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class VisionTransformer(nn.Layer):
			
 
				+    """ Vision Transformer with support for patch input
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self,
			
 
				+                 img_size=224,
			
 
				+                 patch_size=16,
			
 
				+                 in_chans=3,
			
 
				+                 class_num=1000,
			
 
				+                 embed_dim=768,
			
 
				+                 depth=12,
			
 
				+                 num_heads=12,
			
 
				+                 mlp_ratio=4,
			
 
				+                 qkv_bias=False,
			
 
				+                 qk_scale=None,
			
 
				+                 drop_rate=0.,
			
 
				+                 attn_drop_rate=0.,
			
 
				+                 drop_path_rate=0.,
			
 
				+                 norm_layer='nn.LayerNorm',
			
 
				+                 epsilon=1e-5,
			
 
				+                 **kwargs):
			
 
				+        super().__init__()
			
 
				+        self.class_num = class_num
			
 
				+
			
 
				+        self.num_features = self.embed_dim = embed_dim
			
 
				+
			
 
				+        self.patch_embed = PatchEmbed(
			
 
				+            img_size=img_size,
			
 
				+            patch_size=patch_size,
			
 
				+            in_chans=in_chans,
			
 
				+            embed_dim=embed_dim)
			
 
				+        num_patches = self.patch_embed.num_patches
			
 
				+
			
 
				+        self.pos_embed = self.create_parameter(
			
 
				+            shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_)
			
 
				+        self.add_parameter("pos_embed", self.pos_embed)
			
 
				+        self.cls_token = self.create_parameter(
			
 
				+            shape=(1, 1, embed_dim), default_initializer=zeros_)
			
 
				+        self.add_parameter("cls_token", self.cls_token)
			
 
				+        self.pos_drop = nn.Dropout(p=drop_rate)
			
 
				+
			
 
				+        dpr = np.linspace(0, drop_path_rate, depth)
			
 
				+
			
 
				+        self.blocks = nn.LayerList([
			
 
				+            Block(
			
 
				+                dim=embed_dim,
			
 
				+                num_heads=num_heads,
			
 
				+                mlp_ratio=mlp_ratio,
			
 
				+                qkv_bias=qkv_bias,
			
 
				+                qk_scale=qk_scale,
			
 
				+                drop=drop_rate,
			
 
				+                attn_drop=attn_drop_rate,
			
 
				+                drop_path=dpr[i],
			
 
				+                norm_layer=norm_layer,
			
 
				+                epsilon=epsilon) for i in range(depth)
			
 
				+        ])
			
 
				+
			
 
				+        self.norm = eval(norm_layer)(embed_dim, epsilon=epsilon)
			
 
				+
			
 
				+        # Classifier head
			
 
				+        self.head = nn.Linear(embed_dim,
			
 
				+                              class_num) if class_num > 0 else Identity()
			
 
				+
			
 
				+        trunc_normal_(self.pos_embed)
			
 
				+        trunc_normal_(self.cls_token)
			
 
				+        self.apply(self._init_weights)
			
 
				+
			
 
				+    def _init_weights(self, m):
			
 
				+        if isinstance(m, nn.Linear):
			
 
				+            trunc_normal_(m.weight)
			
 
				+            if isinstance(m, nn.Linear) and m.bias is not None:
			
 
				+                zeros_(m.bias)
			
 
				+        elif isinstance(m, nn.LayerNorm):
			
 
				+            zeros_(m.bias)
			
 
				+            ones_(m.weight)
			
 
				+
			
 
				+    def forward_features(self, x):
			
 
				+        # B = x.shape[0]
			
 
				+        B = paddle.shape(x)[0]
			
 
				+        x = self.patch_embed(x)
			
 
				+        cls_tokens = self.cls_token.expand((B, -1, -1))
			
 
				+        x = paddle.concat((cls_tokens, x), axis=1)
			
 
				+        x = x + self.pos_embed
			
 
				+        x = self.pos_drop(x)
			
 
				+        for blk in self.blocks:
			
 
				+            x = blk(x)
			
 
				+        x = self.norm(x)
			
 
				+        return x[:, 0]
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.forward_features(x)
			
 
				+        x = self.head(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def ViT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=768,
			
 
				+        depth=8,
			
 
				+        num_heads=8,
			
 
				+        mlp_ratio=3,
			
 
				+        qk_scale=768**-0.5,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ViT_small_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ViT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=768,
			
 
				+        depth=12,
			
 
				+        num_heads=12,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ViT_base_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ViT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        img_size=384,
			
 
				+        patch_size=16,
			
 
				+        embed_dim=768,
			
 
				+        depth=12,
			
 
				+        num_heads=12,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ViT_base_patch16_384"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ViT_base_patch32_384(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        img_size=384,
			
 
				+        patch_size=32,
			
 
				+        embed_dim=768,
			
 
				+        depth=12,
			
 
				+        num_heads=12,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ViT_base_patch32_384"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ViT_large_patch16_224(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        patch_size=16,
			
 
				+        embed_dim=1024,
			
 
				+        depth=24,
			
 
				+        num_heads=16,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ViT_large_patch16_224"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ViT_large_patch16_384(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        img_size=384,
			
 
				+        patch_size=16,
			
 
				+        embed_dim=1024,
			
 
				+        depth=24,
			
 
				+        num_heads=16,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ViT_large_patch16_384"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def ViT_large_patch32_384(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = VisionTransformer(
			
 
				+        img_size=384,
			
 
				+        patch_size=32,
			
 
				+        embed_dim=1024,
			
 
				+        depth=24,
			
 
				+        num_heads=16,
			
 
				+        mlp_ratio=4,
			
 
				+        qkv_bias=True,
			
 
				+        epsilon=1e-6,
			
 
				+        **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained,
			
 
				+        model,
			
 
				+        MODEL_URLS["ViT_large_patch32_384"],
			
 
				+        use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py
@@ -0,0 +1,377 @@
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+from paddle.nn.initializer import Uniform
			
 
				+import math
			
 
				+import sys
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "Xception41":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams",
			
 
				+    "Xception65":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams",
			
 
				+    "Xception71":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 num_channels,
			
 
				+                 num_filters,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 groups=1,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=num_channels,
			
 
				+            out_channels=num_filters,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=(filter_size - 1) // 2,
			
 
				+            groups=groups,
			
 
				+            weight_attr=ParamAttr(name=name + "_weights"),
			
 
				+            bias_attr=False)
			
 
				+        bn_name = "bn_" + name
			
 
				+        self._batch_norm = BatchNorm(
			
 
				+            num_filters,
			
 
				+            act=act,
			
 
				+            param_attr=ParamAttr(name=bn_name + "_scale"),
			
 
				+            bias_attr=ParamAttr(name=bn_name + "_offset"),
			
 
				+            moving_mean_name=bn_name + '_mean',
			
 
				+            moving_variance_name=bn_name + '_variance')
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        y = self._conv(inputs)
			
 
				+        y = self._batch_norm(y)
			
 
				+        return y
			
 
				+
			
 
				+
			
 
				+class SeparableConv(nn.Layer):
			
 
				+    def __init__(self, input_channels, output_channels, stride=1, name=None):
			
 
				+        super(SeparableConv, self).__init__()
			
 
				+
			
 
				+        self._pointwise_conv = ConvBNLayer(
			
 
				+            input_channels, output_channels, 1, name=name + "_sep")
			
 
				+        self._depthwise_conv = ConvBNLayer(
			
 
				+            output_channels,
			
 
				+            output_channels,
			
 
				+            3,
			
 
				+            stride=stride,
			
 
				+            groups=output_channels,
			
 
				+            name=name + "_dw")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._pointwise_conv(inputs)
			
 
				+        x = self._depthwise_conv(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class EntryFlowBottleneckBlock(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 stride=2,
			
 
				+                 name=None,
			
 
				+                 relu_first=False):
			
 
				+        super(EntryFlowBottleneckBlock, self).__init__()
			
 
				+        self.relu_first = relu_first
			
 
				+
			
 
				+        self._short = Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=1,
			
 
				+            stride=stride,
			
 
				+            padding=0,
			
 
				+            weight_attr=ParamAttr(name + "_branch1_weights"),
			
 
				+            bias_attr=False)
			
 
				+        self._conv1 = SeparableConv(
			
 
				+            input_channels,
			
 
				+            output_channels,
			
 
				+            stride=1,
			
 
				+            name=name + "_branch2a_weights")
			
 
				+        self._conv2 = SeparableConv(
			
 
				+            output_channels,
			
 
				+            output_channels,
			
 
				+            stride=1,
			
 
				+            name=name + "_branch2b_weights")
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=stride, padding=1)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        conv0 = inputs
			
 
				+        short = self._short(inputs)
			
 
				+        if self.relu_first:
			
 
				+            conv0 = F.relu(conv0)
			
 
				+        conv1 = self._conv1(conv0)
			
 
				+        conv2 = F.relu(conv1)
			
 
				+        conv2 = self._conv2(conv2)
			
 
				+        pool = self._pool(conv2)
			
 
				+        return paddle.add(x=short, y=pool)
			
 
				+
			
 
				+
			
 
				+class EntryFlow(nn.Layer):
			
 
				+    def __init__(self, block_num=3):
			
 
				+        super(EntryFlow, self).__init__()
			
 
				+
			
 
				+        name = "entry_flow"
			
 
				+        self.block_num = block_num
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            3, 32, 3, stride=2, act="relu", name=name + "_conv1")
			
 
				+        self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2")
			
 
				+        if block_num == 3:
			
 
				+            self._conv_0 = EntryFlowBottleneckBlock(
			
 
				+                64, 128, stride=2, name=name + "_0", relu_first=False)
			
 
				+            self._conv_1 = EntryFlowBottleneckBlock(
			
 
				+                128, 256, stride=2, name=name + "_1", relu_first=True)
			
 
				+            self._conv_2 = EntryFlowBottleneckBlock(
			
 
				+                256, 728, stride=2, name=name + "_2", relu_first=True)
			
 
				+        elif block_num == 5:
			
 
				+            self._conv_0 = EntryFlowBottleneckBlock(
			
 
				+                64, 128, stride=2, name=name + "_0", relu_first=False)
			
 
				+            self._conv_1 = EntryFlowBottleneckBlock(
			
 
				+                128, 256, stride=1, name=name + "_1", relu_first=True)
			
 
				+            self._conv_2 = EntryFlowBottleneckBlock(
			
 
				+                256, 256, stride=2, name=name + "_2", relu_first=True)
			
 
				+            self._conv_3 = EntryFlowBottleneckBlock(
			
 
				+                256, 728, stride=1, name=name + "_3", relu_first=True)
			
 
				+            self._conv_4 = EntryFlowBottleneckBlock(
			
 
				+                728, 728, stride=2, name=name + "_4", relu_first=True)
			
 
				+        else:
			
 
				+            sys.exit(-1)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv1(inputs)
			
 
				+        x = self._conv2(x)
			
 
				+
			
 
				+        if self.block_num == 3:
			
 
				+            x = self._conv_0(x)
			
 
				+            x = self._conv_1(x)
			
 
				+            x = self._conv_2(x)
			
 
				+        elif self.block_num == 5:
			
 
				+            x = self._conv_0(x)
			
 
				+            x = self._conv_1(x)
			
 
				+            x = self._conv_2(x)
			
 
				+            x = self._conv_3(x)
			
 
				+            x = self._conv_4(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class MiddleFlowBottleneckBlock(nn.Layer):
			
 
				+    def __init__(self, input_channels, output_channels, name):
			
 
				+        super(MiddleFlowBottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self._conv_0 = SeparableConv(
			
 
				+            input_channels,
			
 
				+            output_channels,
			
 
				+            stride=1,
			
 
				+            name=name + "_branch2a_weights")
			
 
				+        self._conv_1 = SeparableConv(
			
 
				+            output_channels,
			
 
				+            output_channels,
			
 
				+            stride=1,
			
 
				+            name=name + "_branch2b_weights")
			
 
				+        self._conv_2 = SeparableConv(
			
 
				+            output_channels,
			
 
				+            output_channels,
			
 
				+            stride=1,
			
 
				+            name=name + "_branch2c_weights")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        conv0 = F.relu(inputs)
			
 
				+        conv0 = self._conv_0(conv0)
			
 
				+        conv1 = F.relu(conv0)
			
 
				+        conv1 = self._conv_1(conv1)
			
 
				+        conv2 = F.relu(conv1)
			
 
				+        conv2 = self._conv_2(conv2)
			
 
				+        return paddle.add(x=inputs, y=conv2)
			
 
				+
			
 
				+
			
 
				+class MiddleFlow(nn.Layer):
			
 
				+    def __init__(self, block_num=8):
			
 
				+        super(MiddleFlow, self).__init__()
			
 
				+
			
 
				+        self.block_num = block_num
			
 
				+        self._conv_0 = MiddleFlowBottleneckBlock(
			
 
				+            728, 728, name="middle_flow_0")
			
 
				+        self._conv_1 = MiddleFlowBottleneckBlock(
			
 
				+            728, 728, name="middle_flow_1")
			
 
				+        self._conv_2 = MiddleFlowBottleneckBlock(
			
 
				+            728, 728, name="middle_flow_2")
			
 
				+        self._conv_3 = MiddleFlowBottleneckBlock(
			
 
				+            728, 728, name="middle_flow_3")
			
 
				+        self._conv_4 = MiddleFlowBottleneckBlock(
			
 
				+            728, 728, name="middle_flow_4")
			
 
				+        self._conv_5 = MiddleFlowBottleneckBlock(
			
 
				+            728, 728, name="middle_flow_5")
			
 
				+        self._conv_6 = MiddleFlowBottleneckBlock(
			
 
				+            728, 728, name="middle_flow_6")
			
 
				+        self._conv_7 = MiddleFlowBottleneckBlock(
			
 
				+            728, 728, name="middle_flow_7")
			
 
				+        if block_num == 16:
			
 
				+            self._conv_8 = MiddleFlowBottleneckBlock(
			
 
				+                728, 728, name="middle_flow_8")
			
 
				+            self._conv_9 = MiddleFlowBottleneckBlock(
			
 
				+                728, 728, name="middle_flow_9")
			
 
				+            self._conv_10 = MiddleFlowBottleneckBlock(
			
 
				+                728, 728, name="middle_flow_10")
			
 
				+            self._conv_11 = MiddleFlowBottleneckBlock(
			
 
				+                728, 728, name="middle_flow_11")
			
 
				+            self._conv_12 = MiddleFlowBottleneckBlock(
			
 
				+                728, 728, name="middle_flow_12")
			
 
				+            self._conv_13 = MiddleFlowBottleneckBlock(
			
 
				+                728, 728, name="middle_flow_13")
			
 
				+            self._conv_14 = MiddleFlowBottleneckBlock(
			
 
				+                728, 728, name="middle_flow_14")
			
 
				+            self._conv_15 = MiddleFlowBottleneckBlock(
			
 
				+                728, 728, name="middle_flow_15")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv_0(inputs)
			
 
				+        x = self._conv_1(x)
			
 
				+        x = self._conv_2(x)
			
 
				+        x = self._conv_3(x)
			
 
				+        x = self._conv_4(x)
			
 
				+        x = self._conv_5(x)
			
 
				+        x = self._conv_6(x)
			
 
				+        x = self._conv_7(x)
			
 
				+        if self.block_num == 16:
			
 
				+            x = self._conv_8(x)
			
 
				+            x = self._conv_9(x)
			
 
				+            x = self._conv_10(x)
			
 
				+            x = self._conv_11(x)
			
 
				+            x = self._conv_12(x)
			
 
				+            x = self._conv_13(x)
			
 
				+            x = self._conv_14(x)
			
 
				+            x = self._conv_15(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class ExitFlowBottleneckBlock(nn.Layer):
			
 
				+    def __init__(self, input_channels, output_channels1, output_channels2,
			
 
				+                 name):
			
 
				+        super(ExitFlowBottleneckBlock, self).__init__()
			
 
				+
			
 
				+        self._short = Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=output_channels2,
			
 
				+            kernel_size=1,
			
 
				+            stride=2,
			
 
				+            padding=0,
			
 
				+            weight_attr=ParamAttr(name + "_branch1_weights"),
			
 
				+            bias_attr=False)
			
 
				+        self._conv_1 = SeparableConv(
			
 
				+            input_channels,
			
 
				+            output_channels1,
			
 
				+            stride=1,
			
 
				+            name=name + "_branch2a_weights")
			
 
				+        self._conv_2 = SeparableConv(
			
 
				+            output_channels1,
			
 
				+            output_channels2,
			
 
				+            stride=1,
			
 
				+            name=name + "_branch2b_weights")
			
 
				+        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        short = self._short(inputs)
			
 
				+        conv0 = F.relu(inputs)
			
 
				+        conv1 = self._conv_1(conv0)
			
 
				+        conv2 = F.relu(conv1)
			
 
				+        conv2 = self._conv_2(conv2)
			
 
				+        pool = self._pool(conv2)
			
 
				+        return paddle.add(x=short, y=pool)
			
 
				+
			
 
				+
			
 
				+class ExitFlow(nn.Layer):
			
 
				+    def __init__(self, class_num):
			
 
				+        super(ExitFlow, self).__init__()
			
 
				+
			
 
				+        name = "exit_flow"
			
 
				+
			
 
				+        self._conv_0 = ExitFlowBottleneckBlock(
			
 
				+            728, 728, 1024, name=name + "_1")
			
 
				+        self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2")
			
 
				+        self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3")
			
 
				+        self._pool = AdaptiveAvgPool2D(1)
			
 
				+        stdv = 1.0 / math.sqrt(2048 * 1.0)
			
 
				+        self._out = Linear(
			
 
				+            2048,
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(
			
 
				+                name="fc_weights", initializer=Uniform(-stdv, stdv)),
			
 
				+            bias_attr=ParamAttr(name="fc_offset"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        conv0 = self._conv_0(inputs)
			
 
				+        conv1 = self._conv_1(conv0)
			
 
				+        conv1 = F.relu(conv1)
			
 
				+        conv2 = self._conv_2(conv1)
			
 
				+        conv2 = F.relu(conv2)
			
 
				+        pool = self._pool(conv2)
			
 
				+        pool = paddle.flatten(pool, start_axis=1, stop_axis=-1)
			
 
				+        out = self._out(pool)
			
 
				+        return out
			
 
				+
			
 
				+
			
 
				+class Xception(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 entry_flow_block_num=3,
			
 
				+                 middle_flow_block_num=8,
			
 
				+                 class_num=1000):
			
 
				+        super(Xception, self).__init__()
			
 
				+        self.entry_flow_block_num = entry_flow_block_num
			
 
				+        self.middle_flow_block_num = middle_flow_block_num
			
 
				+        self._entry_flow = EntryFlow(entry_flow_block_num)
			
 
				+        self._middle_flow = MiddleFlow(middle_flow_block_num)
			
 
				+        self._exit_flow = ExitFlow(class_num)
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._entry_flow(inputs)
			
 
				+        x = self._middle_flow(x)
			
 
				+        x = self._exit_flow(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def Xception41(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = Xception(entry_flow_block_num=3, middle_flow_block_num=8, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["Xception41"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def Xception65(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = Xception(
			
 
				+        entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def Xception71(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = Xception(
			
 
				+        entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/xception_deeplab.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/xception_deeplab.py
@@ -0,0 +1,421 @@
 
				+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+from paddle import ParamAttr
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
			
 
				+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
			
 
				+
			
 
				+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
			
 
				+
			
 
				+MODEL_URLS = {
			
 
				+    "Xception41_deeplab":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams",
			
 
				+    "Xception65_deeplab":
			
 
				+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams"
			
 
				+}
			
 
				+
			
 
				+__all__ = list(MODEL_URLS.keys())
			
 
				+
			
 
				+
			
 
				+def check_data(data, number):
			
 
				+    if type(data) == int:
			
 
				+        return [data] * number
			
 
				+    assert len(data) == number
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+def check_stride(s, os):
			
 
				+    if s <= os:
			
 
				+        return True
			
 
				+    else:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def check_points(count, points):
			
 
				+    if points is None:
			
 
				+        return False
			
 
				+    else:
			
 
				+        if isinstance(points, list):
			
 
				+            return (True if count in points else False)
			
 
				+        else:
			
 
				+            return (True if count == points else False)
			
 
				+
			
 
				+
			
 
				+def gen_bottleneck_params(backbone='xception_65'):
			
 
				+    if backbone == 'xception_65':
			
 
				+        bottleneck_params = {
			
 
				+            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
			
 
				+            "middle_flow": (16, 1, 728),
			
 
				+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
			
 
				+        }
			
 
				+    elif backbone == 'xception_41':
			
 
				+        bottleneck_params = {
			
 
				+            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
			
 
				+            "middle_flow": (8, 1, 728),
			
 
				+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
			
 
				+        }
			
 
				+    elif backbone == 'xception_71':
			
 
				+        bottleneck_params = {
			
 
				+            "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]),
			
 
				+            "middle_flow": (16, 1, 728),
			
 
				+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
			
 
				+        }
			
 
				+    else:
			
 
				+        raise Exception(
			
 
				+            "xception backbont only support xception_41/xception_65/xception_71"
			
 
				+        )
			
 
				+    return bottleneck_params
			
 
				+
			
 
				+
			
 
				+class ConvBNLayer(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 filter_size,
			
 
				+                 stride=1,
			
 
				+                 padding=0,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(ConvBNLayer, self).__init__()
			
 
				+
			
 
				+        self._conv = Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=output_channels,
			
 
				+            kernel_size=filter_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            weight_attr=ParamAttr(name=name + "/weights"),
			
 
				+            bias_attr=False)
			
 
				+        self._bn = BatchNorm(
			
 
				+            num_channels=output_channels,
			
 
				+            act=act,
			
 
				+            epsilon=1e-3,
			
 
				+            momentum=0.99,
			
 
				+            param_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
			
 
				+            bias_attr=ParamAttr(name=name + "/BatchNorm/beta"),
			
 
				+            moving_mean_name=name + "/BatchNorm/moving_mean",
			
 
				+            moving_variance_name=name + "/BatchNorm/moving_variance")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        return self._bn(self._conv(inputs))
			
 
				+
			
 
				+
			
 
				+class Seperate_Conv(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 stride,
			
 
				+                 filter,
			
 
				+                 dilation=1,
			
 
				+                 act=None,
			
 
				+                 name=None):
			
 
				+        super(Seperate_Conv, self).__init__()
			
 
				+
			
 
				+        self._conv1 = Conv2D(
			
 
				+            in_channels=input_channels,
			
 
				+            out_channels=input_channels,
			
 
				+            kernel_size=filter,
			
 
				+            stride=stride,
			
 
				+            groups=input_channels,
			
 
				+            padding=(filter) // 2 * dilation,
			
 
				+            dilation=dilation,
			
 
				+            weight_attr=ParamAttr(name=name + "/depthwise/weights"),
			
 
				+            bias_attr=False)
			
 
				+        self._bn1 = BatchNorm(
			
 
				+            input_channels,
			
 
				+            act=act,
			
 
				+            epsilon=1e-3,
			
 
				+            momentum=0.99,
			
 
				+            param_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
			
 
				+            bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"),
			
 
				+            moving_mean_name=name + "/depthwise/BatchNorm/moving_mean",
			
 
				+            moving_variance_name=name + "/depthwise/BatchNorm/moving_variance")
			
 
				+        self._conv2 = Conv2D(
			
 
				+            input_channels,
			
 
				+            output_channels,
			
 
				+            1,
			
 
				+            stride=1,
			
 
				+            groups=1,
			
 
				+            padding=0,
			
 
				+            weight_attr=ParamAttr(name=name + "/pointwise/weights"),
			
 
				+            bias_attr=False)
			
 
				+        self._bn2 = BatchNorm(
			
 
				+            output_channels,
			
 
				+            act=act,
			
 
				+            epsilon=1e-3,
			
 
				+            momentum=0.99,
			
 
				+            param_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
			
 
				+            bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"),
			
 
				+            moving_mean_name=name + "/pointwise/BatchNorm/moving_mean",
			
 
				+            moving_variance_name=name + "/pointwise/BatchNorm/moving_variance")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv1(inputs)
			
 
				+        x = self._bn1(x)
			
 
				+        x = self._conv2(x)
			
 
				+        x = self._bn2(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class Xception_Block(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 input_channels,
			
 
				+                 output_channels,
			
 
				+                 strides=1,
			
 
				+                 filter_size=3,
			
 
				+                 dilation=1,
			
 
				+                 skip_conv=True,
			
 
				+                 has_skip=True,
			
 
				+                 activation_fn_in_separable_conv=False,
			
 
				+                 name=None):
			
 
				+        super(Xception_Block, self).__init__()
			
 
				+
			
 
				+        repeat_number = 3
			
 
				+        output_channels = check_data(output_channels, repeat_number)
			
 
				+        filter_size = check_data(filter_size, repeat_number)
			
 
				+        strides = check_data(strides, repeat_number)
			
 
				+
			
 
				+        self.has_skip = has_skip
			
 
				+        self.skip_conv = skip_conv
			
 
				+        self.activation_fn_in_separable_conv = activation_fn_in_separable_conv
			
 
				+        if not activation_fn_in_separable_conv:
			
 
				+            self._conv1 = Seperate_Conv(
			
 
				+                input_channels,
			
 
				+                output_channels[0],
			
 
				+                stride=strides[0],
			
 
				+                filter=filter_size[0],
			
 
				+                dilation=dilation,
			
 
				+                name=name + "/separable_conv1")
			
 
				+            self._conv2 = Seperate_Conv(
			
 
				+                output_channels[0],
			
 
				+                output_channels[1],
			
 
				+                stride=strides[1],
			
 
				+                filter=filter_size[1],
			
 
				+                dilation=dilation,
			
 
				+                name=name + "/separable_conv2")
			
 
				+            self._conv3 = Seperate_Conv(
			
 
				+                output_channels[1],
			
 
				+                output_channels[2],
			
 
				+                stride=strides[2],
			
 
				+                filter=filter_size[2],
			
 
				+                dilation=dilation,
			
 
				+                name=name + "/separable_conv3")
			
 
				+        else:
			
 
				+            self._conv1 = Seperate_Conv(
			
 
				+                input_channels,
			
 
				+                output_channels[0],
			
 
				+                stride=strides[0],
			
 
				+                filter=filter_size[0],
			
 
				+                act="relu",
			
 
				+                dilation=dilation,
			
 
				+                name=name + "/separable_conv1")
			
 
				+            self._conv2 = Seperate_Conv(
			
 
				+                output_channels[0],
			
 
				+                output_channels[1],
			
 
				+                stride=strides[1],
			
 
				+                filter=filter_size[1],
			
 
				+                act="relu",
			
 
				+                dilation=dilation,
			
 
				+                name=name + "/separable_conv2")
			
 
				+            self._conv3 = Seperate_Conv(
			
 
				+                output_channels[1],
			
 
				+                output_channels[2],
			
 
				+                stride=strides[2],
			
 
				+                filter=filter_size[2],
			
 
				+                act="relu",
			
 
				+                dilation=dilation,
			
 
				+                name=name + "/separable_conv3")
			
 
				+
			
 
				+        if has_skip and skip_conv:
			
 
				+            self._short = ConvBNLayer(
			
 
				+                input_channels,
			
 
				+                output_channels[-1],
			
 
				+                1,
			
 
				+                stride=strides[-1],
			
 
				+                padding=0,
			
 
				+                name=name + "/shortcut")
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        if not self.activation_fn_in_separable_conv:
			
 
				+            x = F.relu(inputs)
			
 
				+            x = self._conv1(x)
			
 
				+            x = F.relu(x)
			
 
				+            x = self._conv2(x)
			
 
				+            x = F.relu(x)
			
 
				+            x = self._conv3(x)
			
 
				+        else:
			
 
				+            x = self._conv1(inputs)
			
 
				+            x = self._conv2(x)
			
 
				+            x = self._conv3(x)
			
 
				+        if self.has_skip:
			
 
				+            if self.skip_conv:
			
 
				+                skip = self._short(inputs)
			
 
				+            else:
			
 
				+                skip = inputs
			
 
				+            return paddle.add(x, skip)
			
 
				+        else:
			
 
				+            return x
			
 
				+
			
 
				+
			
 
				+class XceptionDeeplab(nn.Layer):
			
 
				+    def __init__(self, backbone, class_num=1000):
			
 
				+        super(XceptionDeeplab, self).__init__()
			
 
				+
			
 
				+        bottleneck_params = gen_bottleneck_params(backbone)
			
 
				+        self.backbone = backbone
			
 
				+
			
 
				+        self._conv1 = ConvBNLayer(
			
 
				+            3,
			
 
				+            32,
			
 
				+            3,
			
 
				+            stride=2,
			
 
				+            padding=1,
			
 
				+            act="relu",
			
 
				+            name=self.backbone + "/entry_flow/conv1")
			
 
				+        self._conv2 = ConvBNLayer(
			
 
				+            32,
			
 
				+            64,
			
 
				+            3,
			
 
				+            stride=1,
			
 
				+            padding=1,
			
 
				+            act="relu",
			
 
				+            name=self.backbone + "/entry_flow/conv2")
			
 
				+
			
 
				+        self.block_num = bottleneck_params["entry_flow"][0]
			
 
				+        self.strides = bottleneck_params["entry_flow"][1]
			
 
				+        self.chns = bottleneck_params["entry_flow"][2]
			
 
				+        self.strides = check_data(self.strides, self.block_num)
			
 
				+        self.chns = check_data(self.chns, self.block_num)
			
 
				+
			
 
				+        self.entry_flow = []
			
 
				+        self.middle_flow = []
			
 
				+
			
 
				+        self.stride = 2
			
 
				+        self.output_stride = 32
			
 
				+        s = self.stride
			
 
				+
			
 
				+        for i in range(self.block_num):
			
 
				+            stride = self.strides[i] if check_stride(s * self.strides[i],
			
 
				+                                                     self.output_stride) else 1
			
 
				+            xception_block = self.add_sublayer(
			
 
				+                self.backbone + "/entry_flow/block" + str(i + 1),
			
 
				+                Xception_Block(
			
 
				+                    input_channels=64 if i == 0 else self.chns[i - 1],
			
 
				+                    output_channels=self.chns[i],
			
 
				+                    strides=[1, 1, self.stride],
			
 
				+                    name=self.backbone + "/entry_flow/block" + str(i + 1)))
			
 
				+            self.entry_flow.append(xception_block)
			
 
				+            s = s * stride
			
 
				+        self.stride = s
			
 
				+
			
 
				+        self.block_num = bottleneck_params["middle_flow"][0]
			
 
				+        self.strides = bottleneck_params["middle_flow"][1]
			
 
				+        self.chns = bottleneck_params["middle_flow"][2]
			
 
				+        self.strides = check_data(self.strides, self.block_num)
			
 
				+        self.chns = check_data(self.chns, self.block_num)
			
 
				+        s = self.stride
			
 
				+
			
 
				+        for i in range(self.block_num):
			
 
				+            stride = self.strides[i] if check_stride(s * self.strides[i],
			
 
				+                                                     self.output_stride) else 1
			
 
				+            xception_block = self.add_sublayer(
			
 
				+                self.backbone + "/middle_flow/block" + str(i + 1),
			
 
				+                Xception_Block(
			
 
				+                    input_channels=728,
			
 
				+                    output_channels=728,
			
 
				+                    strides=[1, 1, self.strides[i]],
			
 
				+                    skip_conv=False,
			
 
				+                    name=self.backbone + "/middle_flow/block" + str(i + 1)))
			
 
				+            self.middle_flow.append(xception_block)
			
 
				+            s = s * stride
			
 
				+        self.stride = s
			
 
				+
			
 
				+        self.block_num = bottleneck_params["exit_flow"][0]
			
 
				+        self.strides = bottleneck_params["exit_flow"][1]
			
 
				+        self.chns = bottleneck_params["exit_flow"][2]
			
 
				+        self.strides = check_data(self.strides, self.block_num)
			
 
				+        self.chns = check_data(self.chns, self.block_num)
			
 
				+        s = self.stride
			
 
				+        stride = self.strides[0] if check_stride(s * self.strides[0],
			
 
				+                                                 self.output_stride) else 1
			
 
				+        self._exit_flow_1 = Xception_Block(
			
 
				+            728,
			
 
				+            self.chns[0], [1, 1, stride],
			
 
				+            name=self.backbone + "/exit_flow/block1")
			
 
				+        s = s * stride
			
 
				+        stride = self.strides[1] if check_stride(s * self.strides[1],
			
 
				+                                                 self.output_stride) else 1
			
 
				+        self._exit_flow_2 = Xception_Block(
			
 
				+            self.chns[0][-1],
			
 
				+            self.chns[1], [1, 1, stride],
			
 
				+            dilation=2,
			
 
				+            has_skip=False,
			
 
				+            activation_fn_in_separable_conv=True,
			
 
				+            name=self.backbone + "/exit_flow/block2")
			
 
				+        s = s * stride
			
 
				+
			
 
				+        self.stride = s
			
 
				+
			
 
				+        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
			
 
				+        self._pool = AdaptiveAvgPool2D(1)
			
 
				+        self._fc = Linear(
			
 
				+            self.chns[1][-1],
			
 
				+            class_num,
			
 
				+            weight_attr=ParamAttr(name="fc_weights"),
			
 
				+            bias_attr=ParamAttr(name="fc_bias"))
			
 
				+
			
 
				+    def forward(self, inputs):
			
 
				+        x = self._conv1(inputs)
			
 
				+        x = self._conv2(x)
			
 
				+        for ef in self.entry_flow:
			
 
				+            x = ef(x)
			
 
				+        for mf in self.middle_flow:
			
 
				+            x = mf(x)
			
 
				+        x = self._exit_flow_1(x)
			
 
				+        x = self._exit_flow_2(x)
			
 
				+        x = self._drop(x)
			
 
				+        x = self._pool(x)
			
 
				+        x = paddle.squeeze(x, axis=[2, 3])
			
 
				+        x = self._fc(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
			
 
				+    if pretrained is False:
			
 
				+        pass
			
 
				+    elif pretrained is True:
			
 
				+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
			
 
				+    elif isinstance(pretrained, str):
			
 
				+        load_dygraph_pretrain(model, pretrained)
			
 
				+    else:
			
 
				+        raise RuntimeError(
			
 
				+            "pretrained type is not available. Please use `string` or `boolean` type."
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def Xception41_deeplab(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = XceptionDeeplab('xception_41', **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["Xception41_deeplab"], use_ssld=use_ssld)
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def Xception65_deeplab(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    model = XceptionDeeplab("xception_65", **kwargs)
			
 
				+    _load_pretrained(
			
 
				+        pretrained, model, MODEL_URLS["Xception65_deeplab"], use_ssld=use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/variant_models/__init__.py
+++ b/paddlers/models/ppcls/arch/backbone/variant_models/__init__.py
@@ -0,0 +1,3 @@
 
				+from .resnet_variant import ResNet50_last_stage_stride1
			
 
				+from .vgg_variant import VGG19Sigmoid
			
 
				+from .pp_lcnet_variant import PPLCNet_x2_5_Tanh
			
--- a/paddlers/models/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py
+++ b/paddlers/models/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py
@@ -0,0 +1,29 @@
 
				+import paddle
			
 
				+from paddle.nn import Sigmoid
			
 
				+from paddle.nn import Tanh
			
 
				+from ppcls.arch.backbone.legendary_models.pp_lcnet import PPLCNet_x2_5
			
 
				+
			
 
				+__all__ = ["PPLCNet_x2_5_Tanh"]
			
 
				+
			
 
				+
			
 
				+class TanhSuffix(paddle.nn.Layer):
			
 
				+    def __init__(self, origin_layer):
			
 
				+        super(TanhSuffix, self).__init__()
			
 
				+        self.origin_layer = origin_layer
			
 
				+        self.tanh = Tanh()
			
 
				+
			
 
				+    def forward(self, input, res_dict=None, **kwargs):
			
 
				+        x = self.origin_layer(input)
			
 
				+        x = self.tanh(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def PPLCNet_x2_5_Tanh(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    def replace_function(origin_layer, pattern):
			
 
				+        new_layer = TanhSuffix(origin_layer)
			
 
				+        return new_layer
			
 
				+
			
 
				+    pattern = "fc"
			
 
				+    model = PPLCNet_x2_5(pretrained=pretrained, use_ssld=use_ssld, **kwargs)
			
 
				+    model.upgrade_sublayer(pattern, replace_function)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/variant_models/resnet_variant.py
+++ b/paddlers/models/ppcls/arch/backbone/variant_models/resnet_variant.py
@@ -0,0 +1,23 @@
 
				+from paddle.nn import Conv2D
			
 
				+from ppcls.arch.backbone.legendary_models.resnet import ResNet50, MODEL_URLS, _load_pretrained
			
 
				+
			
 
				+__all__ = ["ResNet50_last_stage_stride1"]
			
 
				+
			
 
				+
			
 
				+def ResNet50_last_stage_stride1(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    def replace_function(conv, pattern):
			
 
				+        new_conv = Conv2D(
			
 
				+            in_channels=conv._in_channels,
			
 
				+            out_channels=conv._out_channels,
			
 
				+            kernel_size=conv._kernel_size,
			
 
				+            stride=1,
			
 
				+            padding=conv._padding,
			
 
				+            groups=conv._groups,
			
 
				+            bias_attr=conv._bias_attr)
			
 
				+        return new_conv
			
 
				+
			
 
				+    pattern = ["blocks[13].conv1.conv", "blocks[13].short.conv"]
			
 
				+    model = ResNet50(pretrained=False, use_ssld=use_ssld, **kwargs)
			
 
				+    model.upgrade_sublayer(pattern, replace_function)
			
 
				+    _load_pretrained(pretrained, model, MODEL_URLS["ResNet50"], use_ssld)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/backbone/variant_models/vgg_variant.py
+++ b/paddlers/models/ppcls/arch/backbone/variant_models/vgg_variant.py
@@ -0,0 +1,28 @@
 
				+import paddle
			
 
				+from paddle.nn import Sigmoid
			
 
				+from ppcls.arch.backbone.legendary_models.vgg import VGG19
			
 
				+
			
 
				+__all__ = ["VGG19Sigmoid"]
			
 
				+
			
 
				+
			
 
				+class SigmoidSuffix(paddle.nn.Layer):
			
 
				+    def __init__(self, origin_layer):
			
 
				+        super().__init__()
			
 
				+        self.origin_layer = origin_layer
			
 
				+        self.sigmoid = Sigmoid()
			
 
				+
			
 
				+    def forward(self, input, res_dict=None, **kwargs):
			
 
				+        x = self.origin_layer(input)
			
 
				+        x = self.sigmoid(x)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+def VGG19Sigmoid(pretrained=False, use_ssld=False, **kwargs):
			
 
				+    def replace_function(origin_layer, pattern):
			
 
				+        new_layer = SigmoidSuffix(origin_layer)
			
 
				+        return new_layer
			
 
				+
			
 
				+    pattern = "fc2"
			
 
				+    model = VGG19(pretrained=pretrained, use_ssld=use_ssld, **kwargs)
			
 
				+    model.upgrade_sublayer(pattern, replace_function)
			
 
				+    return model
			
--- a/paddlers/models/ppcls/arch/gears/__init__.py
+++ b/paddlers/models/ppcls/arch/gears/__init__.py
@@ -0,0 +1,32 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .arcmargin import ArcMargin
			
 
				+from .cosmargin import CosMargin
			
 
				+from .circlemargin import CircleMargin
			
 
				+from .fc import FC
			
 
				+from .vehicle_neck import VehicleNeck
			
 
				+
			
 
				+__all__ = ['build_gear']
			
 
				+
			
 
				+
			
 
				+def build_gear(config):
			
 
				+    support_dict = [
			
 
				+        'ArcMargin', 'CosMargin', 'CircleMargin', 'FC', 'VehicleNeck'
			
 
				+    ]
			
 
				+    module_name = config.pop('name')
			
 
				+    assert module_name in support_dict, Exception(
			
 
				+        'head only support {}'.format(support_dict))
			
 
				+    module_class = eval(module_name)(**config)
			
 
				+    return module_class
			
--- a/paddlers/models/ppcls/arch/gears/arcmargin.py
+++ b/paddlers/models/ppcls/arch/gears/arcmargin.py
@@ -0,0 +1,72 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import math
			
 
				+
			
 
				+
			
 
				+class ArcMargin(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 embedding_size,
			
 
				+                 class_num,
			
 
				+                 margin=0.5,
			
 
				+                 scale=80.0,
			
 
				+                 easy_margin=False):
			
 
				+        super().__init__()
			
 
				+        self.embedding_size = embedding_size
			
 
				+        self.class_num = class_num
			
 
				+        self.margin = margin
			
 
				+        self.scale = scale
			
 
				+        self.easy_margin = easy_margin
			
 
				+        self.weight = self.create_parameter(
			
 
				+            shape=[self.embedding_size, self.class_num],
			
 
				+            is_bias=False,
			
 
				+            default_initializer=paddle.nn.initializer.XavierNormal())
			
 
				+
			
 
				+    def forward(self, input, label=None):
			
 
				+        input_norm = paddle.sqrt(
			
 
				+            paddle.sum(paddle.square(input), axis=1, keepdim=True))
			
 
				+        input = paddle.divide(input, input_norm)
			
 
				+
			
 
				+        weight_norm = paddle.sqrt(
			
 
				+            paddle.sum(paddle.square(self.weight), axis=0, keepdim=True))
			
 
				+        weight = paddle.divide(self.weight, weight_norm)
			
 
				+
			
 
				+        cos = paddle.matmul(input, weight)
			
 
				+        if not self.training or label is None:
			
 
				+            return cos
			
 
				+        sin = paddle.sqrt(1.0 - paddle.square(cos) + 1e-6)
			
 
				+        cos_m = math.cos(self.margin)
			
 
				+        sin_m = math.sin(self.margin)
			
 
				+        phi = cos * cos_m - sin * sin_m
			
 
				+
			
 
				+        th = math.cos(self.margin) * (-1)
			
 
				+        mm = math.sin(self.margin) * self.margin
			
 
				+        if self.easy_margin:
			
 
				+            phi = self._paddle_where_more_than(cos, 0, phi, cos)
			
 
				+        else:
			
 
				+            phi = self._paddle_where_more_than(cos, th, phi, cos - mm)
			
 
				+
			
 
				+        one_hot = paddle.nn.functional.one_hot(label, self.class_num)
			
 
				+        one_hot = paddle.squeeze(one_hot, axis=[1])
			
 
				+        output = paddle.multiply(one_hot, phi) + paddle.multiply(
			
 
				+            (1.0 - one_hot), cos)
			
 
				+        output = output * self.scale
			
 
				+        return output
			
 
				+
			
 
				+    def _paddle_where_more_than(self, target, limit, x, y):
			
 
				+        mask = paddle.cast(x=(target > limit), dtype='float32')
			
 
				+        output = paddle.multiply(mask, x) + paddle.multiply((1.0 - mask), y)
			
 
				+        return output
			
--- a/paddlers/models/ppcls/arch/gears/circlemargin.py
+++ b/paddlers/models/ppcls/arch/gears/circlemargin.py
@@ -0,0 +1,59 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import math
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+import paddle.nn.functional as F
			
 
				+
			
 
				+
			
 
				+class CircleMargin(nn.Layer):
			
 
				+    def __init__(self, embedding_size, class_num, margin, scale):
			
 
				+        super(CircleMargin, self).__init__()
			
 
				+        self.scale = scale
			
 
				+        self.margin = margin
			
 
				+        self.embedding_size = embedding_size
			
 
				+        self.class_num = class_num
			
 
				+
			
 
				+        self.weight = self.create_parameter(
			
 
				+            shape=[self.embedding_size, self.class_num],
			
 
				+            is_bias=False,
			
 
				+            default_initializer=paddle.nn.initializer.XavierNormal())
			
 
				+
			
 
				+    def forward(self, input, label):
			
 
				+        feat_norm = paddle.sqrt(
			
 
				+            paddle.sum(paddle.square(input), axis=1, keepdim=True))
			
 
				+        input = paddle.divide(input, feat_norm)
			
 
				+
			
 
				+        weight_norm = paddle.sqrt(
			
 
				+            paddle.sum(paddle.square(self.weight), axis=0, keepdim=True))
			
 
				+        weight = paddle.divide(self.weight, weight_norm)
			
 
				+
			
 
				+        logits = paddle.matmul(input, weight)
			
 
				+        if not self.training or label is None:
			
 
				+            return logits
			
 
				+
			
 
				+        alpha_p = paddle.clip(-logits.detach() + 1 + self.margin, min=0.)
			
 
				+        alpha_n = paddle.clip(logits.detach() + self.margin, min=0.)
			
 
				+        delta_p = 1 - self.margin
			
 
				+        delta_n = self.margin
			
 
				+
			
 
				+        m_hot = F.one_hot(label.reshape([-1]), num_classes=logits.shape[1])
			
 
				+
			
 
				+        logits_p = alpha_p * (logits - delta_p)
			
 
				+        logits_n = alpha_n * (logits - delta_n)
			
 
				+        pre_logits = logits_p * m_hot + logits_n * (1 - m_hot)
			
 
				+        pre_logits = self.scale * pre_logits
			
 
				+
			
 
				+        return pre_logits
			
--- a/paddlers/models/ppcls/arch/gears/cosmargin.py
+++ b/paddlers/models/ppcls/arch/gears/cosmargin.py
@@ -0,0 +1,55 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import paddle
			
 
				+import math
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+
			
 
				+class CosMargin(paddle.nn.Layer):
			
 
				+    def __init__(self, embedding_size, class_num, margin=0.35, scale=64.0):
			
 
				+        super(CosMargin, self).__init__()
			
 
				+        self.scale = scale
			
 
				+        self.margin = margin
			
 
				+        self.embedding_size = embedding_size
			
 
				+        self.class_num = class_num
			
 
				+
			
 
				+        self.weight = self.create_parameter(
			
 
				+            shape=[self.embedding_size, self.class_num],
			
 
				+            is_bias=False,
			
 
				+            default_initializer=paddle.nn.initializer.XavierNormal())
			
 
				+
			
 
				+    def forward(self, input, label):
			
 
				+        label.stop_gradient = True
			
 
				+
			
 
				+        input_norm = paddle.sqrt(
			
 
				+            paddle.sum(paddle.square(input), axis=1, keepdim=True))
			
 
				+        input = paddle.divide(input, input_norm)
			
 
				+
			
 
				+        weight_norm = paddle.sqrt(
			
 
				+            paddle.sum(paddle.square(self.weight), axis=0, keepdim=True))
			
 
				+        weight = paddle.divide(self.weight, weight_norm)
			
 
				+
			
 
				+        cos = paddle.matmul(input, weight)
			
 
				+        if not self.training or label is None:
			
 
				+            return cos
			
 
				+
			
 
				+        cos_m = cos - self.margin
			
 
				+
			
 
				+        one_hot = paddle.nn.functional.one_hot(label, self.class_num)
			
 
				+        one_hot = paddle.squeeze(one_hot, axis=[1])
			
 
				+        output = paddle.multiply(one_hot, cos_m) + paddle.multiply(
			
 
				+            (1.0 - one_hot), cos)
			
 
				+        output = output * self.scale
			
 
				+        return output
			
--- a/paddlers/models/ppcls/arch/gears/fc.py
+++ b/paddlers/models/ppcls/arch/gears/fc.py
@@ -0,0 +1,35 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+
			
 
				+class FC(nn.Layer):
			
 
				+    def __init__(self, embedding_size, class_num):
			
 
				+        super(FC, self).__init__()
			
 
				+        self.embedding_size = embedding_size
			
 
				+        self.class_num = class_num
			
 
				+        weight_attr = paddle.ParamAttr(
			
 
				+            initializer=paddle.nn.initializer.XavierNormal())
			
 
				+        self.fc = paddle.nn.Linear(
			
 
				+            self.embedding_size, self.class_num, weight_attr=weight_attr)
			
 
				+
			
 
				+    def forward(self, input, label=None):
			
 
				+        out = self.fc(input)
			
 
				+        return out
			
--- a/paddlers/models/ppcls/arch/gears/identity_head.py
+++ b/paddlers/models/ppcls/arch/gears/identity_head.py
@@ -0,0 +1,9 @@
 
				+from paddle import nn
			
 
				+
			
 
				+
			
 
				+class IdentityHead(nn.Layer):
			
 
				+    def __init__(self):
			
 
				+        super(IdentityHead, self).__init__()
			
 
				+
			
 
				+    def forward(self, x, label=None):
			
 
				+        return {"features": x, "logits": None}
			
--- a/paddlers/models/ppcls/arch/gears/vehicle_neck.py
+++ b/paddlers/models/ppcls/arch/gears/vehicle_neck.py
@@ -0,0 +1,52 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+
			
 
				+import paddle
			
 
				+import paddle.nn as nn
			
 
				+
			
 
				+
			
 
				+class VehicleNeck(nn.Layer):
			
 
				+    def __init__(self,
			
 
				+                 in_channels,
			
 
				+                 out_channels,
			
 
				+                 kernel_size=1,
			
 
				+                 stride=1,
			
 
				+                 padding=0,
			
 
				+                 dilation=1,
			
 
				+                 groups=1,
			
 
				+                 padding_mode='zeros',
			
 
				+                 weight_attr=None,
			
 
				+                 bias_attr=None,
			
 
				+                 data_format='NCHW'):
			
 
				+        super().__init__()
			
 
				+        self.conv = nn.Conv2D(
			
 
				+            in_channels=in_channels,
			
 
				+            out_channels=out_channels,
			
 
				+            kernel_size=kernel_size,
			
 
				+            stride=stride,
			
 
				+            padding=padding,
			
 
				+            dilation=dilation,
			
 
				+            groups=groups,
			
 
				+            padding_mode=padding_mode,
			
 
				+            weight_attr=weight_attr,
			
 
				+            bias_attr=weight_attr,
			
 
				+            data_format=data_format)
			
 
				+        self.flatten = nn.Flatten()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        x = self.conv(x)
			
 
				+        x = self.flatten(x)
			
 
				+        return x
			
--- a/paddlers/models/ppcls/arch/slim/__init__.py
+++ b/paddlers/models/ppcls/arch/slim/__init__.py
@@ -0,0 +1,16 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from ppcls.arch.slim.prune import prune_model
			
 
				+from ppcls.arch.slim.quant import quantize_model
			
--- a/paddlers/models/ppcls/arch/slim/prune.py
+++ b/paddlers/models/ppcls/arch/slim/prune.py
@@ -0,0 +1,65 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+import paddle
			
 
				+from ppcls.utils import logger
			
 
				+
			
 
				+
			
 
				+def prune_model(config, model):
			
 
				+    if config.get("Slim", False) and config["Slim"].get("prune", False):
			
 
				+        import paddleslim
			
 
				+        prune_method_name = config["Slim"]["prune"]["name"].lower()
			
 
				+        assert prune_method_name in [
			
 
				+            "fpgm", "l1_norm"
			
 
				+        ], "The prune methods only support 'fpgm' and 'l1_norm'"
			
 
				+        if prune_method_name == "fpgm":
			
 
				+            model.pruner = paddleslim.dygraph.FPGMFilterPruner(
			
 
				+                model, [1] + config["Global"]["image_shape"])
			
 
				+        else:
			
 
				+            model.pruner = paddleslim.dygraph.L1NormFilterPruner(
			
 
				+                model, [1] + config["Global"]["image_shape"])
			
 
				+
			
 
				+        # prune model
			
 
				+        _prune_model(config, model)
			
 
				+    else:
			
 
				+        model.pruner = None
			
 
				+
			
 
				+
			
 
				+
			
 
				+def _prune_model(config, model):
			
 
				+    from paddleslim.analysis import dygraph_flops as flops
			
 
				+    logger.info("FLOPs before pruning: {}GFLOPs".format(
			
 
				+        flops(model, [1] + config["Global"]["image_shape"]) / 1e9))
			
 
				+    model.eval()
			
 
				+
			
 
				+    params = []
			
 
				+    for sublayer in model.sublayers():
			
 
				+        for param in sublayer.parameters(include_sublayers=False):
			
 
				+            if isinstance(sublayer, paddle.nn.Conv2D):
			
 
				+                params.append(param.name)
			
 
				+    ratios = {}
			
 
				+    for param in params:
			
 
				+        ratios[param] = config["Slim"]["prune"]["pruned_ratio"]
			
 
				+    plan = model.pruner.prune_vars(ratios, [0])
			
 
				+
			
 
				+    logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
			
 
				+        flops(model, [1] + config["Global"]["image_shape"]) / 1e9,
			
 
				+        plan.pruned_flops))
			
 
				+
			
 
				+    for param in model.parameters():
			
 
				+        if "conv2d" in param.name:
			
 
				+            logger.info("{}\t{}".format(param.name, param.shape))
			
 
				+
			
 
				+    model.train()
			
--- a/paddlers/models/ppcls/arch/slim/quant.py
+++ b/paddlers/models/ppcls/arch/slim/quant.py
@@ -0,0 +1,55 @@
 
				+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from __future__ import absolute_import, division, print_function
			
 
				+import paddle
			
 
				+from ppcls.utils import logger
			
 
				+
			
 
				+QUANT_CONFIG = {
			
 
				+    # weight preprocess type, default is None and no preprocessing is performed.
			
 
				+    'weight_preprocess_type': None,
			
 
				+    # activation preprocess type, default is None and no preprocessing is performed.
			
 
				+    'activation_preprocess_type': None,
			
 
				+    # weight quantize type, default is 'channel_wise_abs_max'
			
 
				+    'weight_quantize_type': 'channel_wise_abs_max',
			
 
				+    # activation quantize type, default is 'moving_average_abs_max'
			
 
				+    'activation_quantize_type': 'moving_average_abs_max',
			
 
				+    # weight quantize bit num, default is 8
			
 
				+    'weight_bits': 8,
			
 
				+    # activation quantize bit num, default is 8
			
 
				+    'activation_bits': 8,
			
 
				+    # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
			
 
				+    'dtype': 'int8',
			
 
				+    # window size for 'range_abs_max' quantization. default is 10000
			
 
				+    'window_size': 10000,
			
 
				+    # The decay coefficient of moving average, default is 0.9
			
 
				+    'moving_rate': 0.9,
			
 
				+    # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
			
 
				+    'quantizable_layer_type': ['Conv2D', 'Linear'],
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def quantize_model(config, model):
			
 
				+    if config.get("Slim", False) and config["Slim"].get("quant", False):
			
 
				+        from paddleslim.dygraph.quant import QAT
			
 
				+        assert config["Slim"]["quant"]["name"].lower(
			
 
				+        ) == 'pact', 'Only PACT quantization method is supported now'
			
 
				+        QUANT_CONFIG["activation_preprocess_type"] = "PACT"
			
 
				+        model.quanter = QAT(config=QUANT_CONFIG)
			
 
				+        model.quanter.quantize(model)
			
 
				+        logger.info("QAT model summary:")
			
 
				+        paddle.summary(model, (1, 3, 224, 224))
			
 
				+    else:
			
 
				+        model.quanter = None
			
 
				+    return
			
--- a/paddlers/models/ppcls/arch/utils.py
+++ b/paddlers/models/ppcls/arch/utils.py
@@ -0,0 +1,53 @@
 
				+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import six
			
 
				+import types
			
 
				+from difflib import SequenceMatcher
			
 
				+
			
 
				+from . import backbone
			
 
				+
			
 
				+
			
 
				+def get_architectures():
			
 
				+    """
			
 
				+    get all of model architectures
			
 
				+    """
			
 
				+    names = []
			
 
				+    for k, v in backbone.__dict__.items():
			
 
				+        if isinstance(v, (types.FunctionType, six.class_types)):
			
 
				+            names.append(k)
			
 
				+    return names
			
 
				+
			
 
				+
			
 
				+def get_blacklist_model_in_static_mode():
			
 
				+    from ppcls.arch.backbone import distilled_vision_transformer
			
 
				+    from ppcls.arch.backbone import vision_transformer
			
 
				+    blacklist = distilled_vision_transformer.__all__ + vision_transformer.__all__
			
 
				+    return blacklist
			
 
				+
			
 
				+
			
 
				+def similar_architectures(name='', names=[], thresh=0.1, topk=10):
			
 
				+    """
			
 
				+    inferred similar architectures
			
 
				+    """
			
 
				+    scores = []
			
 
				+    for idx, n in enumerate(names):
			
 
				+        if n.startswith('__'):
			
 
				+            continue
			
 
				+        score = SequenceMatcher(None, n.lower(), name.lower()).quick_ratio()
			
 
				+        if score > thresh:
			
 
				+            scores.append((idx, score))
			
 
				+    scores.sort(key=lambda x: x[1], reverse=True)
			
 
				+    similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]]
			
 
				+    return similar_names
			
--- a/paddlers/models/ppcls/configs/Cartoonface/ResNet50_icartoon.yaml
+++ b/paddlers/models/ppcls/configs/Cartoonface/ResNet50_icartoon.yaml
@@ -0,0 +1,149 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: "./output/"
			
 
				+  device: "gpu"
			
 
				+  save_interval: 1
			
 
				+  eval_mode: "retrieval"
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  infer_imgs:
			
 
				+  save_inference_dir: "./inference"
			
 
				+  feature_normalize: True
			
 
				+
			
 
				+Arch:  
			
 
				+  name: "RecModel"
			
 
				+  Backbone:
			
 
				+    name: "ResNet50"
			
 
				+    pretrained: True
			
 
				+  BackboneStopLayer: 
			
 
				+    name: "flatten"
			
 
				+    output_dim: 2048
			
 
				+  Head:
			
 
				+    name: "FC"
			
 
				+    class_num: 5013
			
 
				+    embedding_size: 2048
			
 
				+    # margin: 0.5
			
 
				+    # scale:  80
			
 
				+  infer_output_key:  "features"
			
 
				+  infer_add_softmax: "false"
			
 
				+
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+    # - TripletLoss:  
			
 
				+    #     margin: 0.1
			
 
				+    #     weight: 0.1
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ICartoonDataset
			
 
				+      image_root:  "./dataset/iCartoonFace"
			
 
				+      cls_label_path:  "./dataset/iCartoonFace/train_list.txt"
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 0.00392157
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        #num_instances: 2
			
 
				+        batch_size: 256
			
 
				+        drop_last: False
			
 
				+        shuffle: True
			
 
				+    loader:
			
 
				+        num_workers: 6
			
 
				+        use_shared_memory: True
			
 
				+  
			
 
				+  Eval:
			
 
				+    Query:
			
 
				+      dataset: 
			
 
				+        name: ICartoonDataset
			
 
				+        image_root: "./dataset/iCartoonFace"
			
 
				+        cls_label_path: "./dataset/iCartoonFace/query.txt"
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              resize_short: 256
			
 
				+          - CropImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+          name: DistributedBatchSampler
			
 
				+          batch_size: 64
			
 
				+          drop_last: False
			
 
				+          shuffle: False
			
 
				+      loader:
			
 
				+          num_workers: 8
			
 
				+          use_shared_memory: True
			
 
				+
			
 
				+    Gallery:
			
 
				+      dataset: 
			
 
				+          name: ICartoonDataset
			
 
				+          image_root: "./dataset/iCartoonFace"
			
 
				+          cls_label_path: "./dataset/iCartoonFace/gallery.txt"
			
 
				+          transform_ops:
			
 
				+            - DecodeImage:
			
 
				+                to_rgb: True
			
 
				+                channel_first: False
			
 
				+            - ResizeImage:
			
 
				+                resize_short: 256
			
 
				+            - CropImage:
			
 
				+                size: 224
			
 
				+            - NormalizeImage:
			
 
				+                scale: 0.00392157
			
 
				+                mean: [0.485, 0.456, 0.406]
			
 
				+                std: [0.229, 0.224, 0.225]
			
 
				+                order: ''
			
 
				+      sampler:
			
 
				+          name: DistributedBatchSampler
			
 
				+          batch_size: 64
			
 
				+          drop_last: False
			
 
				+          shuffle: False
			
 
				+      loader:
			
 
				+          num_workers: 8
			
 
				+          use_shared_memory: True
			
 
				+
			
 
				+Metric:
			
 
				+    Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+    Eval:
			
 
				+    - Recallk:
			
 
				+        topk: [1]
			
--- a/paddlers/models/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5.yaml
+++ b/paddlers/models/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5.yaml
@@ -0,0 +1,148 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 100
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+  eval_mode: retrieval
			
 
				+  use_dali: False
			
 
				+  to_static: False
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: RecModel
			
 
				+  infer_output_key: features
			
 
				+  infer_add_softmax: False
			
 
				+
			
 
				+  Backbone: 
			
 
				+    name: PPLCNet_x2_5
			
 
				+    pretrained: True
			
 
				+    use_ssld: True
			
 
				+  BackboneStopLayer:
			
 
				+    name: "flatten"
			
 
				+  Neck:
			
 
				+    name: FC
			
 
				+    embedding_size: 1280
			
 
				+    class_num: 512
			
 
				+  Head:
			
 
				+    name: ArcMargin 
			
 
				+    embedding_size: 512
			
 
				+    class_num: 185341
			
 
				+    margin: 0.2
			
 
				+    scale: 30
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.04
			
 
				+    warmup_epoch: 5
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.00001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/
			
 
				+      cls_label_path: ./dataset/train_reg_all_data.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 256
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    Query:
			
 
				+      dataset: 
			
 
				+        name: VeriWild
			
 
				+        image_root: ./dataset/Aliproduct/
			
 
				+        cls_label_path: ./dataset/Aliproduct/val_list.txt
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        batch_size: 64
			
 
				+        drop_last: False
			
 
				+        shuffle: False
			
 
				+      loader:
			
 
				+        num_workers: 4
			
 
				+        use_shared_memory: True
			
 
				+
			
 
				+    Gallery:
			
 
				+      dataset: 
			
 
				+        name: VeriWild
			
 
				+        image_root: ./dataset/Aliproduct/
			
 
				+        cls_label_path: ./dataset/Aliproduct/val_list.txt
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        batch_size: 64
			
 
				+        drop_last: False
			
 
				+        shuffle: False
			
 
				+      loader:
			
 
				+        num_workers: 4
			
 
				+        use_shared_memory: True
			
 
				+
			
 
				+Metric:
			
 
				+  Eval:
			
 
				+    - Recallk:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_binary.yaml
+++ b/paddlers/models/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_binary.yaml
@@ -0,0 +1,145 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 100
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+  eval_mode: retrieval
			
 
				+  use_dali: False
			
 
				+  to_static: False
			
 
				+
			
 
				+  #feature postprocess
			
 
				+  feature_normalize: False
			
 
				+  feature_binarize: "sign"
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: RecModel
			
 
				+  infer_output_key: features
			
 
				+  infer_add_softmax: False
			
 
				+
			
 
				+  Backbone:
			
 
				+    name: PPLCNet_x2_5_Tanh
			
 
				+    pretrained: True
			
 
				+    use_ssld: True
			
 
				+    class_num: 512
			
 
				+  Head:
			
 
				+    name: FC
			
 
				+    embedding_size: 512
			
 
				+    class_num: 185341
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.04
			
 
				+    warmup_epoch: 5
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.00001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/all_data
			
 
				+      cls_label_path: ./dataset/all_data/train_reg_all_data.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 256
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    Query:
			
 
				+      dataset: 
			
 
				+        name: VeriWild
			
 
				+        image_root: ./dataset/Aliproduct/
			
 
				+        cls_label_path: ./dataset/Aliproduct/val_list.txt
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        batch_size: 64
			
 
				+        drop_last: False
			
 
				+        shuffle: False
			
 
				+      loader:
			
 
				+        num_workers: 4
			
 
				+        use_shared_memory: True
			
 
				+
			
 
				+    Gallery:
			
 
				+      dataset: 
			
 
				+        name: VeriWild
			
 
				+        image_root: ./dataset/Aliproduct/
			
 
				+        cls_label_path: ./dataset/Aliproduct/val_list.txt
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        batch_size: 64
			
 
				+        drop_last: False
			
 
				+        shuffle: False
			
 
				+      loader:
			
 
				+        num_workers: 4
			
 
				+        use_shared_memory: True
			
 
				+
			
 
				+Metric:
			
 
				+  Eval:
			
 
				+    - Recallk:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml
+++ b/paddlers/models/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml
@@ -0,0 +1,188 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: true
			
 
				+  eval_interval: 1
			
 
				+  epochs: 100
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+  eval_mode: retrieval
			
 
				+  use_dali: False
			
 
				+  to_static: False
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: "DistillationModel"
			
 
				+  infer_output_key: features
			
 
				+  infer_add_softmax: False
			
 
				+  is_rec: True
			
 
				+  infer_model_name: "Student"
			
 
				+  # if not null, its lengths should be same as models
			
 
				+  pretrained_list:
			
 
				+  # if not null, its lengths should be same as models
			
 
				+  freeze_params_list:
			
 
				+  - False
			
 
				+  - False
			
 
				+  models:
			
 
				+    - Teacher:
			
 
				+        name: RecModel
			
 
				+        infer_output_key: features
			
 
				+        infer_add_softmax: False
			
 
				+        Backbone: 
			
 
				+          name: PPLCNet_x2_5
			
 
				+          pretrained: True
			
 
				+          use_ssld: True
			
 
				+        BackboneStopLayer:
			
 
				+          name: "flatten"
			
 
				+        Neck:
			
 
				+          name: FC
			
 
				+          embedding_size: 1280
			
 
				+          class_num: 512
			
 
				+        Head:
			
 
				+          name: ArcMargin 
			
 
				+          embedding_size: 512
			
 
				+          class_num: 185341
			
 
				+          margin: 0.2
			
 
				+          scale: 30
			
 
				+    - Student:
			
 
				+        name: RecModel
			
 
				+        infer_output_key: features
			
 
				+        infer_add_softmax: False
			
 
				+        Backbone: 
			
 
				+          name: PPLCNet_x2_5
			
 
				+          pretrained: True
			
 
				+          use_ssld: True
			
 
				+        BackboneStopLayer:
			
 
				+          name: "flatten"
			
 
				+        Neck:
			
 
				+          name: FC
			
 
				+          embedding_size: 1280
			
 
				+          class_num: 512
			
 
				+        Head:
			
 
				+          name: ArcMargin 
			
 
				+          embedding_size: 512
			
 
				+          class_num: 185341
			
 
				+          margin: 0.2
			
 
				+          scale: 30
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - DistillationGTCELoss:
			
 
				+        weight: 1.0
			
 
				+        key: "logits"
			
 
				+        model_names: ["Student", "Teacher"]
			
 
				+    - DistillationDMLLoss:
			
 
				+        weight: 1.0
			
 
				+        key: "logits"
			
 
				+        model_name_pairs:
			
 
				+        - ["Student", "Teacher"]
			
 
				+  Eval:
			
 
				+    - DistillationGTCELoss:
			
 
				+        weight: 1.0
			
 
				+        model_names: ["Student"]
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.02
			
 
				+    warmup_epoch: 5
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.00001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/
			
 
				+      cls_label_path: ./dataset/train_reg_all_data.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    Query:
			
 
				+      dataset: 
			
 
				+        name: VeriWild
			
 
				+        image_root: ./dataset/Aliproduct/
			
 
				+        cls_label_path: ./dataset/Aliproduct/val_list.txt
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        batch_size: 64
			
 
				+        drop_last: False
			
 
				+        shuffle: False
			
 
				+      loader:
			
 
				+        num_workers: 4
			
 
				+        use_shared_memory: True
			
 
				+
			
 
				+    Gallery:
			
 
				+      dataset: 
			
 
				+        name: VeriWild
			
 
				+        image_root: ./dataset/Aliproduct/
			
 
				+        cls_label_path: ./dataset/Aliproduct/val_list.txt
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        batch_size: 64
			
 
				+        drop_last: False
			
 
				+        shuffle: False
			
 
				+      loader:
			
 
				+        num_workers: 4
			
 
				+        use_shared_memory: True
			
 
				+
			
 
				+Metric:
			
 
				+  Eval:
			
 
				+    - Recallk:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_udml.yaml
+++ b/paddlers/models/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_udml.yaml
@@ -0,0 +1,193 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: true
			
 
				+  eval_interval: 1
			
 
				+  epochs: 100
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+  eval_mode: retrieval
			
 
				+  use_dali: False
			
 
				+  to_static: False
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: "DistillationModel"
			
 
				+  infer_output_key: features
			
 
				+  infer_add_softmax: False
			
 
				+  is_rec: True
			
 
				+  infer_model_name: "Student"
			
 
				+  # if not null, its lengths should be same as models
			
 
				+  pretrained_list:
			
 
				+  # if not null, its lengths should be same as models
			
 
				+  freeze_params_list:
			
 
				+  - False
			
 
				+  - False
			
 
				+  models:
			
 
				+    - Teacher:
			
 
				+        name: RecModel
			
 
				+        infer_output_key: features
			
 
				+        infer_add_softmax: False
			
 
				+        Backbone: 
			
 
				+          name: PPLCNet_x2_5
			
 
				+          pretrained: True
			
 
				+          use_ssld: True
			
 
				+        BackboneStopLayer:
			
 
				+          name: "flatten"
			
 
				+        Neck:
			
 
				+          name: FC
			
 
				+          embedding_size: 1280
			
 
				+          class_num: 512
			
 
				+        Head:
			
 
				+          name: ArcMargin 
			
 
				+          embedding_size: 512
			
 
				+          class_num: 185341
			
 
				+          margin: 0.2
			
 
				+          scale: 30
			
 
				+    - Student:
			
 
				+        name: RecModel
			
 
				+        infer_output_key: features
			
 
				+        infer_add_softmax: False
			
 
				+        Backbone: 
			
 
				+          name: PPLCNet_x2_5
			
 
				+          pretrained: True
			
 
				+          use_ssld: True
			
 
				+        BackboneStopLayer:
			
 
				+          name: "flatten"
			
 
				+        Neck:
			
 
				+          name: FC
			
 
				+          embedding_size: 1280
			
 
				+          class_num: 512
			
 
				+        Head:
			
 
				+          name: ArcMargin 
			
 
				+          embedding_size: 512
			
 
				+          class_num: 185341
			
 
				+          margin: 0.2
			
 
				+          scale: 30
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - DistillationGTCELoss:
			
 
				+        weight: 1.0
			
 
				+        key: "logits"
			
 
				+        model_names: ["Student", "Teacher"]
			
 
				+    - DistillationDMLLoss:
			
 
				+        weight: 1.0
			
 
				+        key: "logits"
			
 
				+        model_name_pairs:
			
 
				+        - ["Student", "Teacher"]
			
 
				+    - DistillationDistanceLoss:
			
 
				+        weight: 1.0
			
 
				+        key: "backbone"
			
 
				+        model_name_pairs:
			
 
				+        - ["Student", "Teacher"]
			
 
				+  Eval:
			
 
				+    - DistillationGTCELoss:
			
 
				+        weight: 1.0
			
 
				+        model_names: ["Student"]
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.02
			
 
				+    warmup_epoch: 5
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.00001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/
			
 
				+      cls_label_path: ./dataset/train_reg_all_data.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    Query:
			
 
				+      dataset: 
			
 
				+        name: VeriWild
			
 
				+        image_root: ./dataset/Aliproduct/
			
 
				+        cls_label_path: ./dataset/Aliproduct/val_list.txt
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        batch_size: 64
			
 
				+        drop_last: False
			
 
				+        shuffle: False
			
 
				+      loader:
			
 
				+        num_workers: 4
			
 
				+        use_shared_memory: True
			
 
				+
			
 
				+    Gallery:
			
 
				+      dataset: 
			
 
				+        name: VeriWild
			
 
				+        image_root: ./dataset/Aliproduct/
			
 
				+        cls_label_path: ./dataset/Aliproduct/val_list.txt
			
 
				+        transform_ops:
			
 
				+          - DecodeImage:
			
 
				+              to_rgb: True
			
 
				+              channel_first: False
			
 
				+          - ResizeImage:
			
 
				+              size: 224
			
 
				+          - NormalizeImage:
			
 
				+              scale: 0.00392157
			
 
				+              mean: [0.485, 0.456, 0.406]
			
 
				+              std: [0.229, 0.224, 0.225]
			
 
				+              order: ''
			
 
				+      sampler:
			
 
				+        name: DistributedBatchSampler
			
 
				+        batch_size: 64
			
 
				+        drop_last: False
			
 
				+        shuffle: False
			
 
				+      loader:
			
 
				+        num_workers: 4
			
 
				+        use_shared_memory: True
			
 
				+
			
 
				+Metric:
			
 
				+  Eval:
			
 
				+    - Recallk:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/AlexNet/AlexNet.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/AlexNet/AlexNet.yaml
@@ -0,0 +1,129 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: AlexNet
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.01, 0.001, 0.0001, 0.00001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml
@@ -0,0 +1,131 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 256, 256]
			
 
				+  save_inference_dir: ./inference
			
 
				+  # training model under @to_static
			
 
				+  to_static: False
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: CSPDarkNet53
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 256
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 288
			
 
				+        - CropImage:
			
 
				+            size: 256
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 288
			
 
				+    - CropImage:
			
 
				+        size: 256
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA102.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA102.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA102
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA102x.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA102x.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA102x
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA102x2.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA102x2.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA102x2
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA169.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA169.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA169
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA34.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA34.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA34
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA46_c.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA46_c.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA46_c
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA46x_c.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA46x_c.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA46x_c
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA60.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA60.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA60
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA60x.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA60x.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA60x
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DLA/DLA60x_c.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DLA/DLA60x_c.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 120
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DLA60x_c
			
 
				+  class_num: 1000
			
 
				+
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Piecewise
			
 
				+    learning_rate: 0.1
			
 
				+    decay_epochs: [30, 60, 90]
			
 
				+    values: [0.1, 0.01, 0.001, 0.0001]
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DPN/DPN107.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DPN/DPN107.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 200
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DPN107
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+        epsilon: 0.1
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+      batch_transform_ops:
			
 
				+        - MixupOperator:
			
 
				+            alpha: 0.2
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DPN/DPN131.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DPN/DPN131.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 200
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DPN131
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+        epsilon: 0.1
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+      batch_transform_ops:
			
 
				+        - MixupOperator:
			
 
				+            alpha: 0.2
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DPN/DPN68.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DPN/DPN68.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 200
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DPN68
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+        epsilon: 0.1
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+      batch_transform_ops:
			
 
				+        - MixupOperator:
			
 
				+            alpha: 0.2
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DPN/DPN92.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DPN/DPN92.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 200
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DPN92
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+        epsilon: 0.1
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+      batch_transform_ops:
			
 
				+        - MixupOperator:
			
 
				+            alpha: 0.2
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DPN/DPN98.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DPN/DPN98.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 200
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DPN98
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+        epsilon: 0.1
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+      batch_transform_ops:
			
 
				+        - MixupOperator:
			
 
				+            alpha: 0.2
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DarkNet/DarkNet53.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DarkNet/DarkNet53.yaml
@@ -0,0 +1,130 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 200
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 256, 256]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: DarkNet53
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+        epsilon: 0.1
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 256
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+      batch_transform_ops:
			
 
				+        - MixupOperator:
			
 
				+            alpha: 0.2
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 292
			
 
				+        - CropImage:
			
 
				+            size: 256
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 292
			
 
				+    - CropImage:
			
 
				+        size: 256
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_AutoAugment.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_AutoAugment.yaml
@@ -0,0 +1,129 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 300
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: ResNet50
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - AutoAugment:
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_Baseline.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_Baseline.yaml
@@ -0,0 +1,128 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 300
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: ResNet50
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutmix.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutmix.yaml
@@ -0,0 +1,128 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 300
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: ResNet50
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+      batch_transform_ops:
			
 
				+        - CutmixOperator:
			
 
				+            alpha: 0.2
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutout.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutout.yaml
@@ -0,0 +1,131 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 300
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: ResNet50
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+        - Cutout:
			
 
				+            n_holes: 1
			
 
				+            length: 112
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_GridMask.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_GridMask.yaml
@@ -0,0 +1,134 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 300
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: ResNet50
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+        - GridMask:
			
 
				+            d1: 96
			
 
				+            d2: 224
			
 
				+            rotate: 1
			
 
				+            ratio: 0.5
			
 
				+            mode: 0
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_HideAndSeek.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_HideAndSeek.yaml
@@ -0,0 +1,129 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 300
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: ResNet50
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+        - HideAndSeek:
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Train:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]
			
--- a/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_Mixup.yaml
+++ b/paddlers/models/ppcls/configs/ImageNet/DataAugment/ResNet50_Mixup.yaml
@@ -0,0 +1,128 @@
 
				+# global configs
			
 
				+Global:
			
 
				+  checkpoints: null
			
 
				+  pretrained_model: null
			
 
				+  output_dir: ./output/
			
 
				+  device: gpu
			
 
				+  save_interval: 1
			
 
				+  eval_during_train: True
			
 
				+  eval_interval: 1
			
 
				+  epochs: 300
			
 
				+  print_batch_step: 10
			
 
				+  use_visualdl: False
			
 
				+  # used for static mode and model export
			
 
				+  image_shape: [3, 224, 224]
			
 
				+  save_inference_dir: ./inference
			
 
				+
			
 
				+# model architecture
			
 
				+Arch:
			
 
				+  name: ResNet50
			
 
				+  class_num: 1000
			
 
				+ 
			
 
				+# loss function config for traing/eval process
			
 
				+Loss:
			
 
				+  Train:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+  Eval:
			
 
				+    - CELoss:
			
 
				+        weight: 1.0
			
 
				+
			
 
				+
			
 
				+Optimizer:
			
 
				+  name: Momentum
			
 
				+  momentum: 0.9
			
 
				+  lr:
			
 
				+    name: Cosine
			
 
				+    learning_rate: 0.1
			
 
				+  regularizer:
			
 
				+    name: 'L2'
			
 
				+    coeff: 0.0001
			
 
				+
			
 
				+
			
 
				+# data loader for train and eval
			
 
				+DataLoader:
			
 
				+  Train:
			
 
				+    dataset:
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - RandCropImage:
			
 
				+            size: 224
			
 
				+        - RandFlipImage:
			
 
				+            flip_code: 1
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+      batch_transform_ops:
			
 
				+        - MixupOperator:
			
 
				+            alpha: 0.2
			
 
				+
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: True
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+  Eval:
			
 
				+    dataset: 
			
 
				+      name: ImageNetDataset
			
 
				+      image_root: ./dataset/ILSVRC2012/
			
 
				+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
			
 
				+      transform_ops:
			
 
				+        - DecodeImage:
			
 
				+            to_rgb: True
			
 
				+            channel_first: False
			
 
				+        - ResizeImage:
			
 
				+            resize_short: 256
			
 
				+        - CropImage:
			
 
				+            size: 224
			
 
				+        - NormalizeImage:
			
 
				+            scale: 1.0/255.0
			
 
				+            mean: [0.485, 0.456, 0.406]
			
 
				+            std: [0.229, 0.224, 0.225]
			
 
				+            order: ''
			
 
				+    sampler:
			
 
				+      name: DistributedBatchSampler
			
 
				+      batch_size: 64
			
 
				+      drop_last: False
			
 
				+      shuffle: False
			
 
				+    loader:
			
 
				+      num_workers: 4
			
 
				+      use_shared_memory: True
			
 
				+
			
 
				+Infer:
			
 
				+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
			
 
				+  batch_size: 10
			
 
				+  transforms:
			
 
				+    - DecodeImage:
			
 
				+        to_rgb: True
			
 
				+        channel_first: False
			
 
				+    - ResizeImage:
			
 
				+        resize_short: 256
			
 
				+    - CropImage:
			
 
				+        size: 224
			
 
				+    - NormalizeImage:
			
 
				+        scale: 1.0/255.0
			
 
				+        mean: [0.485, 0.456, 0.406]
			
 
				+        std: [0.229, 0.224, 0.225]
			
 
				+        order: ''
			
 
				+    - ToCHWImage:
			
 
				+  PostProcess:
			
 
				+    name: Topk
			
 
				+    topk: 5
			
 
				+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
			
 
				+
			
 
				+Metric:
			
 
				+  Eval:
			
 
				+    - TopkAcc:
			
 
				+        topk: [1, 5]