Jelajahi Sumber

Rename and polish doc/comments

Bobholamovic 2 tahun lalu
induk
melakukan
5a6d4b1fc6

+ 5 - 6
README.md

@@ -92,7 +92,7 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
           <li>ResizeByShort</li>
           <li>RandomResizeByShort</li>
           <li>ResizeByLong</li>  
-          <li>RandomFlipOrRotation</li> 
+          <li>RandomFlipOrRotate</li>
           <li>RandomHorizontalFlip</li>  
           <li>RandomVerticalFlip</li>
           <li>Normalize</li>
@@ -100,13 +100,13 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
           <li>RandomCrop</li>
           <li>RandomScaleAspect</li>  
           <li>RandomExpand</li>
-          <li>Padding</li>
+          <li>Pad</li>
           <li>MixupImage</li>  
           <li>RandomDistort</li>  
           <li>RandomBlur</li>  
-          <li>Defogging</li>  
-          <li>DimReducing</li>  
-          <li>BandSelecting</li>  
+          <li>Dehaze</li>  
+          <li>ReduceDim</li>  
+          <li>SelectBand</li>  
           <li>RandomSwap</li>
         </ul>  
       </td>
@@ -223,4 +223,3 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
     year={2022}
 }
 ```
-

+ 5 - 5
docs/apis/transforms.md

@@ -1,6 +1,6 @@
 # 数据增强
 
-PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Compose`进行使用,数据读取方面通过`ImgDecoder`可以对不只三通道RGB图像进行读取,还可以对SAR以及多通道图像进行读取,提供有转为`uint8`的选项。此外提供以下数据增强的方法。
+PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Compose`进行使用,数据读取方面通过`DecodeImg`可以对不只三通道RGB图像进行读取,还可以对SAR以及多通道图像进行读取,提供有转为`uint8`的选项。此外提供以下数据增强的方法。
 
 | 数据增强名称         | 用途                                            | 任务     | ...  |
 | -------------------- | ----------------------------------------------- | -------- | ---- |
@@ -16,13 +16,13 @@ PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Co
 | RandomCrop           | 对输入进行随机中心裁剪                          | 所有     | ...  |
 | RandomScaleAspect    | 裁剪输入并重新调整大小至原始大小                | 所有     | ...  |
 | RandomExpand         | 通过根据随机偏移填充来随机扩展输入              | 所有     | ...  |
-| Padding              | 将输入填充到指定的大小                          | 所有     | ...  |
+| Pad              | 将输入填充到指定的大小                          | 所有     | ...  |
 | MixupImage           | 将两张图片和它们的`gt_bbbox/gt_score`混合在一起 | 目标检测 | ...  |
 | RandomDistort        | 对输入进行随机色彩变换                          | 所有     | ...  |
 | RandomBlur           | 对输入进行随机模糊                              | 所有     | ...  |
-| Defogging            | 对输入图像进行去雾                              | 所有     | ...  |
-| DimReducing          | 对输入图像进行降维                              | 所有     | ...  |
-| BandSelecting        | 选择输入图像的波段                              | 所有     | ...  |
+| Dehaze            | 对输入图像进行去雾                              | 所有     | ...  |
+| ReduceDim          | 对输入图像进行降维                              | 所有     | ...  |
+| SelectBand        | 选择输入图像的波段                              | 所有     | ...  |
 | RandomSwap           | 随机交换两个输入图像                            | 变化检测 | ...  |
 | ...                  | ...                                             |          | ...  |
 

+ 1 - 0
paddlers/datasets/cd_dataset.py

@@ -17,6 +17,7 @@ from enum import IntEnum
 import os.path as osp
 
 from paddle.io import Dataset
+
 from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
 
 

+ 1 - 0
paddlers/datasets/clas_dataset.py

@@ -16,6 +16,7 @@ import os.path as osp
 import copy
 
 from paddle.io import Dataset
+
 from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
 
 

+ 3 - 3
paddlers/datasets/coco.py

@@ -23,7 +23,7 @@ import numpy as np
 from paddle.io import Dataset
 
 from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
-from paddlers.transforms import ImgDecoder, MixupImage
+from paddlers.transforms import DecodeImg, MixupImage
 from paddlers.tools import YOLOAnchorCluster
 
 
@@ -256,8 +256,8 @@ class COCODetection(Dataset):
             if self.data_fields is not None:
                 sample_mix = {k: sample_mix[k] for k in self.data_fields}
             sample = self.mixup_op(sample=[
-                ImgDecoder(to_rgb=False)(sample),
-                ImgDecoder(to_rgb=False)(sample_mix)
+                DecodeImg(to_rgb=False)(sample),
+                DecodeImg(to_rgb=False)(sample_mix)
             ])
         sample = self.transforms(sample)
         return sample

+ 3 - 3
paddlers/datasets/voc.py

@@ -25,7 +25,7 @@ import numpy as np
 from paddle.io import Dataset
 
 from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
-from paddlers.transforms import ImgDecoder, MixupImage
+from paddlers.transforms import DecodeImg, MixupImage
 from paddlers.tools import YOLOAnchorCluster
 
 
@@ -320,8 +320,8 @@ class VOCDetection(Dataset):
             if self.data_fields is not None:
                 sample_mix = {k: sample_mix[k] for k in self.data_fields}
             sample = self.mixup_op(sample=[
-                ImgDecoder(to_rgb=False)(sample),
-                ImgDecoder(to_rgb=False)(sample_mix)
+                DecodeImg(to_rgb=False)(sample),
+                DecodeImg(to_rgb=False)(sample_mix)
             ])
         sample = self.transforms(sample)
         return sample

+ 2 - 2
paddlers/models/ppseg/core/infer.py

@@ -64,10 +64,10 @@ def get_reverse_list(ori_shape, transforms):
             else:
                 w = long_edge
                 h = short_edge
-        if op.__class__.__name__ in ['Padding']:
+        if op.__class__.__name__ in ['Pad']:
             reverse_list.append(('padding', (h, w)))
             w, h = op.target_size[0], op.target_size[1]
-        if op.__class__.__name__ in ['PaddingByAspectRatio']:
+        if op.__class__.__name__ in ['PadByAspectRatio']:
             reverse_list.append(('padding', (h, w)))
             ratio = w / h
             if ratio == op.aspect_ratio:

+ 1 - 1
paddlers/models/ppseg/datasets/dataset.py

@@ -49,7 +49,7 @@ class Dataset(paddle.io.Dataset):
             import paddlers.models.ppseg.transforms as T
             from paddlers.models.ppseg.datasets import Dataset
 
-            transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
+            transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
             dataset_root = 'dataset_root_path'
             train_path = 'train_path'
             num_classes = 2

+ 7 - 8
paddlers/models/ppseg/transforms/transforms.py

@@ -480,7 +480,7 @@ class Normalize:
 
 
 @manager.TRANSFORMS.add_component
-class Padding:
+class Pad:
     """
     Add bottom-right padding to a raw image or annotation image.
 
@@ -560,7 +560,7 @@ class Padding:
 
 
 @manager.TRANSFORMS.add_component
-class PaddingByAspectRatio:
+class PadByAspectRatio:
     """
 
     Args:
@@ -597,15 +597,14 @@ class PaddingByAspectRatio:
             img_height = int(img_width / self.aspect_ratio)
         else:
             img_width = int(img_height * self.aspect_ratio)
-        padding = Padding(
-            (img_width, img_height),
-            im_padding_value=self.im_padding_value,
-            label_padding_value=self.label_padding_value)
+        padding = Pad((img_width, img_height),
+                      im_padding_value=self.im_padding_value,
+                      label_padding_value=self.label_padding_value)
         return padding(im, label)
 
 
 @manager.TRANSFORMS.add_component
-class RandomPaddingCrop:
+class RandomPadCrop:
     """
     Crop a sub-image from a raw image and annotation image randomly. If the target cropping size
     is larger than original image, then the bottom-right padding will be added.
@@ -768,7 +767,7 @@ class RandomCenterCrop:
 
 
 @manager.TRANSFORMS.add_component
-class ScalePadding:
+class ScalePad:
     """
         Add center padding to a raw image or annotation image,then scale the
         image to target size.

+ 24 - 12
paddlers/tasks/change_detector.py

@@ -29,7 +29,7 @@ import paddlers.custom_models.cd as cmcd
 import paddlers.utils.logging as logging
 import paddlers.models.ppseg as paddleseg
 from paddlers.transforms import arrange_transforms
-from paddlers.transforms import ImgDecoder, Resize
+from paddlers.transforms import DecodeImg, Resize
 from paddlers.utils import get_single_card_bs, DisablePrint
 from paddlers.utils.checkpoint import seg_pretrain_weights_dict
 from .base import BaseModel
@@ -546,7 +546,12 @@ class BaseChangeDetector(BaseModel):
             }
         return prediction
 
-    def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=None):
+    def slider_predict(self,
+                       img_file,
+                       save_dir,
+                       block_size,
+                       overlap=36,
+                       transforms=None):
         """
         Do inference.
         Args:
@@ -566,7 +571,7 @@ class BaseChangeDetector(BaseModel):
             from osgeo import gdal
         except:
             import gdal
-        
+
         if len(img_file) != 2:
             raise ValueError("`img_file` must be a list of length 2.")
         if isinstance(block_size, int):
@@ -574,13 +579,15 @@ class BaseChangeDetector(BaseModel):
         elif isinstance(block_size, (tuple, list)) and len(block_size) == 2:
             block_size = tuple(block_size)
         else:
-            raise ValueError("`block_size` must be a tuple/list of length 2 or an integer.")
+            raise ValueError(
+                "`block_size` must be a tuple/list of length 2 or an integer.")
         if isinstance(overlap, int):
             overlap = (overlap, overlap)
         elif isinstance(overlap, (tuple, list)) and len(overlap) == 2:
             overlap = tuple(overlap)
         else:
-            raise ValueError("`overlap` must be a tuple/list of length 2 or an integer.")
+            raise ValueError(
+                "`overlap` must be a tuple/list of length 2 or an integer.")
 
         src1_data = gdal.Open(img_file[0])
         src2_data = gdal.Open(img_file[1])
@@ -589,7 +596,8 @@ class BaseChangeDetector(BaseModel):
         bands = src1_data.RasterCount
 
         driver = gdal.GetDriverByName("GTiff")
-        file_name = osp.splitext(osp.normpath(img_file[0]).split(os.sep)[-1])[0] + ".tif"
+        file_name = osp.splitext(osp.normpath(img_file[0]).split(os.sep)[-1])[
+            0] + ".tif"
         if not osp.exists(save_dir):
             os.makedirs(save_dir)
         save_file = osp.join(save_dir, file_name)
@@ -607,17 +615,21 @@ class BaseChangeDetector(BaseModel):
                     xsize = int(width - xoff)
                 if yoff + ysize > height:
                     ysize = int(height - yoff)
-                im1 = src1_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
-                im2 = src2_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
+                im1 = src1_data.ReadAsArray(
+                    int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
+                im2 = src2_data.ReadAsArray(
+                    int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
                 # fill
                 h, w = im1.shape[:2]
-                im1_fill = np.zeros((block_size[1], block_size[0], bands), dtype=im1.dtype)
+                im1_fill = np.zeros(
+                    (block_size[1], block_size[0], bands), dtype=im1.dtype)
                 im2_fill = im1_fill.copy()
                 im1_fill[:h, :w, :] = im1
                 im2_fill[:h, :w, :] = im2
                 im_fill = (im1_fill, im2_fill)
                 # predict
-                pred = self.predict(im_fill, transforms)["label_map"].astype("uint8")
+                pred = self.predict(im_fill,
+                                    transforms)["label_map"].astype("uint8")
                 # overlap
                 rd_block = band.ReadAsArray(int(xoff), int(yoff), xsize, ysize)
                 mask = (rd_block == pred[:h, :w]) | (rd_block == 255)
@@ -637,7 +649,7 @@ class BaseChangeDetector(BaseModel):
             sample = {'image_t1': im1, 'image_t2': im2}
             if isinstance(sample['image_t1'], str) or \
                 isinstance(sample['image_t2'], str):
-                sample = ImgDecoder(to_rgb=False)(sample)
+                sample = DecodeImg(to_rgb=False)(sample)
                 ori_shape = sample['image'].shape[:2]
             else:
                 ori_shape = im1.shape[:2]
@@ -679,7 +691,7 @@ class BaseChangeDetector(BaseModel):
                     scale = float(op.long_size) / float(im_long_size)
                     h = int(round(h * scale))
                     w = int(round(w * scale))
-                elif op.__class__.__name__ == 'Padding':
+                elif op.__class__.__name__ == 'Pad':
                     if op.target_size:
                         target_h, target_w = op.target_size
                     else:

+ 3 - 3
paddlers/tasks/classifier.py

@@ -33,7 +33,7 @@ from paddlers.models.ppcls.metric import build_metrics
 from paddlers.models.ppcls.loss import build_loss
 from paddlers.models.ppcls.data.postprocess import build_postprocess
 from paddlers.utils.checkpoint import cls_pretrain_weights_dict
-from paddlers.transforms import ImgDecoder, Resize
+from paddlers.transforms import DecodeImg, Resize
 
 __all__ = [
     "ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b"
@@ -467,7 +467,7 @@ class BaseClassifier(BaseModel):
         for im in images:
             sample = {'image': im}
             if isinstance(sample['image'], str):
-                sample = ImgDecoder(to_rgb=False)(sample)
+                sample = DecodeImg(to_rgb=False)(sample)
             ori_shape = sample['image'].shape[:2]
             im = transforms(sample)
             batch_im.append(im)
@@ -504,7 +504,7 @@ class BaseClassifier(BaseModel):
                     scale = float(op.long_size) / float(im_long_size)
                     h = int(round(h * scale))
                     w = int(round(w * scale))
-                elif op.__class__.__name__ == 'Padding':
+                elif op.__class__.__name__ == 'Pad':
                     if op.target_size:
                         target_h, target_w = op.target_size
                     else:

+ 13 - 13
paddlers/tasks/object_detector.py

@@ -27,9 +27,9 @@ import paddlers.models.ppdet as ppdet
 from paddlers.models.ppdet.modeling.proposal_generator.target_layer import BBoxAssigner, MaskAssigner
 import paddlers
 import paddlers.utils.logging as logging
-from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Padding
+from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Pad
 from paddlers.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, \
-    _BatchPadding, _Gt2YoloTarget
+    _BatchPad, _Gt2YoloTarget
 from paddlers.transforms import arrange_transforms
 from .base import BaseModel
 from .utils.det_metrics import VOCMetric, COCOMetric
@@ -757,7 +757,7 @@ class PicoDet(BaseDetector):
             model_name='PicoDet', num_classes=num_classes, **params)
 
     def _compose_batch_transform(self, transforms, mode='train'):
-        default_batch_transforms = [_BatchPadding(pad_to_stride=32)]
+        default_batch_transforms = [_BatchPad(pad_to_stride=32)]
         if mode == 'eval':
             collate_batch = True
         else:
@@ -1005,7 +1005,7 @@ class YOLOv3(BaseDetector):
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
             default_batch_transforms = [
-                _BatchPadding(pad_to_stride=-1), _NormalizeBox(),
+                _BatchPad(pad_to_stride=-1), _NormalizeBox(),
                 _PadBox(getattr(self, 'num_max_boxes', 50)), _BboxXYXY2XYWH(),
                 _Gt2YoloTarget(
                     anchor_masks=self.anchor_masks,
@@ -1015,7 +1015,7 @@ class YOLOv3(BaseDetector):
                     num_classes=self.num_classes)
             ]
         else:
-            default_batch_transforms = [_BatchPadding(pad_to_stride=-1)]
+            default_batch_transforms = [_BatchPad(pad_to_stride=-1)]
         if mode == 'eval' and self.metric == 'voc':
             collate_batch = False
         else:
@@ -1362,11 +1362,11 @@ class FasterRCNN(BaseDetector):
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
             default_batch_transforms = [
-                _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
+                _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
             ]
         else:
             default_batch_transforms = [
-                _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
+                _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
             ]
         custom_batch_transforms = []
         for i, op in enumerate(transforms.transforms):
@@ -1408,7 +1408,7 @@ class FasterRCNN(BaseDetector):
                 self.test_transforms.transforms[resize_op_idx] = Resize(
                     target_size=image_shape, keep_ratio=True, interp='CUBIC')
             self.test_transforms.transforms.append(
-                Padding(im_padding_value=[0., 0., 0.]))
+                Pad(im_padding_value=[0., 0., 0.]))
 
     def _get_test_inputs(self, image_shape):
         if image_shape is not None:
@@ -1418,7 +1418,7 @@ class FasterRCNN(BaseDetector):
             image_shape = [None, 3, -1, -1]
             if self.with_fpn:
                 self.test_transforms.transforms.append(
-                    Padding(im_padding_value=[0., 0., 0.]))
+                    Pad(im_padding_value=[0., 0., 0.]))
 
         self.fixed_input_shape = image_shape
         return self._define_input_spec(image_shape)
@@ -2187,11 +2187,11 @@ class MaskRCNN(BaseDetector):
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
             default_batch_transforms = [
-                _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
+                _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
             ]
         else:
             default_batch_transforms = [
-                _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
+                _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
             ]
         custom_batch_transforms = []
         for i, op in enumerate(transforms.transforms):
@@ -2233,7 +2233,7 @@ class MaskRCNN(BaseDetector):
                 self.test_transforms.transforms[resize_op_idx] = Resize(
                     target_size=image_shape, keep_ratio=True, interp='CUBIC')
             self.test_transforms.transforms.append(
-                Padding(im_padding_value=[0., 0., 0.]))
+                Pad(im_padding_value=[0., 0., 0.]))
 
     def _get_test_inputs(self, image_shape):
         if image_shape is not None:
@@ -2243,7 +2243,7 @@ class MaskRCNN(BaseDetector):
             image_shape = [None, 3, -1, -1]
             if self.with_fpn:
                 self.test_transforms.transforms.append(
-                    Padding(im_padding_value=[0., 0., 0.]))
+                    Pad(im_padding_value=[0., 0., 0.]))
         self.fixed_input_shape = image_shape
 
         return self._define_input_spec(image_shape)

+ 22 - 11
paddlers/tasks/segmenter.py

@@ -32,7 +32,7 @@ import paddlers.utils.logging as logging
 from .base import BaseModel
 from .utils import seg_metrics as metrics
 from paddlers.utils.checkpoint import seg_pretrain_weights_dict
-from paddlers.transforms import ImgDecoder, Resize
+from paddlers.transforms import DecodeImg, Resize
 
 __all__ = ["UNet", "DeepLabV3P", "FastSCNN", "HRNet", "BiSeNetV2", "FarSeg"]
 
@@ -519,7 +519,12 @@ class BaseSegmenter(BaseModel):
             }
         return prediction
 
-    def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=None):
+    def slider_predict(self,
+                       img_file,
+                       save_dir,
+                       block_size,
+                       overlap=36,
+                       transforms=None):
         """
         Do inference.
         Args:
@@ -539,19 +544,21 @@ class BaseSegmenter(BaseModel):
             from osgeo import gdal
         except:
             import gdal
-        
+
         if isinstance(block_size, int):
             block_size = (block_size, block_size)
         elif isinstance(block_size, (tuple, list)) and len(block_size) == 2:
             block_size = tuple(block_size)
         else:
-            raise ValueError("`block_size` must be a tuple/list of length 2 or an integer.")
+            raise ValueError(
+                "`block_size` must be a tuple/list of length 2 or an integer.")
         if isinstance(overlap, int):
             overlap = (overlap, overlap)
         elif isinstance(overlap, (tuple, list)) and len(overlap) == 2:
             overlap = tuple(overlap)
         else:
-            raise ValueError("`overlap` must be a tuple/list of length 2 or an integer.")
+            raise ValueError(
+                "`overlap` must be a tuple/list of length 2 or an integer.")
 
         src_data = gdal.Open(img_file)
         width = src_data.RasterXSize
@@ -559,7 +566,8 @@ class BaseSegmenter(BaseModel):
         bands = src_data.RasterCount
 
         driver = gdal.GetDriverByName("GTiff")
-        file_name = osp.splitext(osp.normpath(img_file).split(os.sep)[-1])[0] + ".tif"
+        file_name = osp.splitext(osp.normpath(img_file).split(os.sep)[-1])[
+            0] + ".tif"
         if not osp.exists(save_dir):
             os.makedirs(save_dir)
         save_file = osp.join(save_dir, file_name)
@@ -577,13 +585,16 @@ class BaseSegmenter(BaseModel):
                     xsize = int(width - xoff)
                 if yoff + ysize > height:
                     ysize = int(height - yoff)
-                im = src_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
+                im = src_data.ReadAsArray(int(xoff), int(yoff), xsize,
+                                          ysize).transpose((1, 2, 0))
                 # fill
                 h, w = im.shape[:2]
-                im_fill = np.zeros((block_size[1], block_size[0], bands), dtype=im.dtype)
+                im_fill = np.zeros(
+                    (block_size[1], block_size[0], bands), dtype=im.dtype)
                 im_fill[:h, :w, :] = im
                 # predict
-                pred = self.predict(im_fill, transforms)["label_map"].astype("uint8")
+                pred = self.predict(im_fill,
+                                    transforms)["label_map"].astype("uint8")
                 # overlap
                 rd_block = band.ReadAsArray(int(xoff), int(yoff), xsize, ysize)
                 mask = (rd_block == pred[:h, :w]) | (rd_block == 255)
@@ -602,7 +613,7 @@ class BaseSegmenter(BaseModel):
         for im in images:
             sample = {'image': im}
             if isinstance(sample['image'], str):
-                sample = ImgDecoder(to_rgb=False)(sample)
+                sample = DecodeImg(to_rgb=False)(sample)
             ori_shape = sample['image'].shape[:2]
             im = transforms(sample)[0]
             batch_im.append(im)
@@ -639,7 +650,7 @@ class BaseSegmenter(BaseModel):
                     scale = float(op.long_size) / float(im_long_size)
                     h = int(round(h * scale))
                     w = int(round(w * scale))
-                elif op.__class__.__name__ == 'Padding':
+                elif op.__class__.__name__ == 'Pad':
                     if op.target_size:
                         target_h, target_w = op.target_size
                     else:

+ 1 - 0
paddlers/tools/yolo_cluster.py

@@ -28,6 +28,7 @@ class BaseAnchorCluster(object):
     def __init__(self, num_anchors, cache, cache_path):
         """
         Base Anchor Cluster
+        
         Args:
             num_anchors (int): number of clusters
             cache (bool): whether using cache

+ 1 - 1
paddlers/transforms/__init__.py

@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from .operators import *
-from .batch_operators import BatchRandomResize, BatchRandomResizeByShort, _BatchPadding
+from .batch_operators import BatchRandomResize, BatchRandomResizeByShort, _BatchPad
 from paddlers import transforms as T
 
 

+ 2 - 2
paddlers/transforms/batch_operators.py

@@ -149,9 +149,9 @@ class BatchRandomResizeByShort(Transform):
         return samples
 
 
-class _BatchPadding(Transform):
+class _BatchPad(Transform):
     def __init__(self, pad_to_stride=0):
-        super(_BatchPadding, self).__init__()
+        super(_BatchPad, self).__init__()
         self.pad_to_stride = pad_to_stride
 
     def __call__(self, samples):

+ 35 - 22
paddlers/transforms/functions.py

@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import cv2
 import copy
 
+import cv2
 import numpy as np
 import shapely.ops
 from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
@@ -126,45 +126,52 @@ def img_flip(im, method=0):
     """
     if not len(im.shape) >= 2:
         raise ValueError("Shape of image should 2d, 3d or more")
-    if method==0 or method=='h':
+    if method == 0 or method == 'h':
         return horizontal_flip(im)
-    elif method==1 or method=='v':
+    elif method == 1 or method == 'v':
         return vertical_flip(im)
-    elif method==2 or method=='hv':
+    elif method == 2 or method == 'hv':
         return hv_flip(im)
-    elif method==3 or method=='rt2lb' or method=='dia':
+    elif method == 3 or method == 'rt2lb' or method == 'dia':
         return rt2lb_flip(im)
-    elif method==4 or method=='lt2rb' or method=='adia':
+    elif method == 4 or method == 'lt2rb' or method == 'adia':
         return lt2rb_flip(im)
     else:
         return im
 
+
 def horizontal_flip(im):
     im = im[:, ::-1, ...]
     return im
 
+
 def vertical_flip(im):
     im = im[::-1, :, ...]
     return im
 
+
 def hv_flip(im):
     im = im[::-1, ::-1, ...]
     return im
 
+
 def rt2lb_flip(im):
     axs_list = list(range(len(im.shape)))
     axs_list[:2] = [1, 0]
     im = im.transpose(axs_list)
     return im
 
+
 def lt2rb_flip(im):
     axs_list = list(range(len(im.shape)))
     axs_list[:2] = [1, 0]
     im = im[::-1, ::-1, ...].transpose(axs_list)
     return im
 
+
 # endregion
 
+
 # region rotation
 def img_simple_rotate(im, method=0):
     """
@@ -223,30 +230,35 @@ def img_simple_rotate(im, method=0):
     """
     if not len(im.shape) >= 2:
         raise ValueError("Shape of image should 2d, 3d or more")
-    if method==0 or method==90:
+    if method == 0 or method == 90:
         return rot_90(im)
-    elif method==1 or method==180:
+    elif method == 1 or method == 180:
         return rot_180(im)
-    elif method==2 or method==270:
+    elif method == 2 or method == 270:
         return rot_270(im)
     else:
         return im
 
+
 def rot_90(im):
     axs_list = list(range(len(im.shape)))
     axs_list[:2] = [1, 0]
     im = im[::-1, :, ...].transpose(axs_list)
     return im
 
+
 def rot_180(im):
     im = im[::-1, ::-1, ...]
     return im
 
+
 def rot_270(im):
     axs_list = list(range(len(im.shape)))
     axs_list[:2] = [1, 0]
     im = im[:, ::-1, ...].transpose(axs_list)
     return im
+
+
 # endregion
 
 
@@ -477,15 +489,16 @@ def select_bands(im, band_list=[1, 2, 3]):
     return ima
 
 
-def de_haze(im, gamma=False):
-    """ Priori defogging of dark channel. (Just RGB)
+def dehaze(im, gamma=False):
+    """
+    Single image haze removal using dark channel prior.
 
     Args:
-        im (np.ndarray): The image.
+        im (np.ndarray): Input image.
         gamma (bool, optional): Use gamma correction or not. Defaults to False.
 
     Returns:
-        np.ndarray: The image after defogged.
+        np.ndarray: The image after dehazed.
     """
 
     def _guided_filter(I, p, r, eps):
@@ -501,7 +514,7 @@ def de_haze(im, gamma=False):
         m_b = cv2.boxFilter(b, -1, (r, r))
         return m_a * I + m_b
 
-    def _de_fog(im, r, w, maxatmo_mask, eps):
+    def _dehaze(im, r, w, maxatmo_mask, eps):
         # im is RGB and range[0, 1]
         atmo_mask = np.min(im, 2)
         dark_channel = cv2.erode(atmo_mask, np.ones((15, 15)))
@@ -519,7 +532,7 @@ def de_haze(im, gamma=False):
     if np.max(im) > 1:
         im = im / 255.
     result = np.zeros(im.shape)
-    mask_img, atmo_illum = _de_fog(
+    mask_img, atmo_illum = _dehaze(
         im, r=81, w=0.95, maxatmo_mask=0.80, eps=1e-8)
     for k in range(3):
         result[:, :, k] = (im[:, :, k] - mask_img) / (1 - mask_img / atmo_illum)
@@ -534,11 +547,11 @@ def match_histograms(im, ref):
     Match the cumulative histogram of one image to another.
 
     Args:
-        im (np.ndarray): The input image.
-        ref (np.ndarray): The reference image to match histogram of. `ref` must have the same number of channels as `im`.
+        im (np.ndarray): Input image.
+        ref (np.ndarray): Reference image to match histogram of. `ref` must have the same number of channels as `im`.
 
     Returns:
-        np.ndarray: The transformed input image.
+        np.ndarray: Transformed input image.
 
     Raises:
         ValueError: When the number of channels of `ref` differs from that of im`.
@@ -553,14 +566,14 @@ def match_by_regression(im, ref, pif_loc=None):
     Match the brightness values of two images using a linear regression method.
 
     Args:
-        im (np.ndarray): The input image.
-        ref (np.ndarray): The reference image to match. `ref` must have the same shape as `im`.
-        pif_loc (tuple|None, optional): The spatial locations where pseudo-invariant features (PIFs) are obtained. If 
+        im (np.ndarray): Input image.
+        ref (np.ndarray): Reference image to match. `ref` must have the same shape as `im`.
+        pif_loc (tuple|None, optional): Spatial locations where pseudo-invariant features (PIFs) are obtained. If 
             `pif_loc` is set to None, all pixels in the image will be used as training samples for the regression model. 
             In other cases, `pif_loc` should be a tuple of np.ndarrays. Default: None.
 
     Returns:
-        np.ndarray: The transformed input image.
+        np.ndarray: Transformed input image.
 
     Raises:
         ValueError: When the shape of `ref` differs from that of `im`.

+ 31 - 30
paddlers/transforms/operators.py

@@ -32,12 +32,12 @@ from joblib import load
 import paddlers
 from .functions import normalize, horizontal_flip, permute, vertical_flip, center_crop, is_poly, \
     horizontal_flip_poly, horizontal_flip_rle, vertical_flip_poly, vertical_flip_rle, crop_poly, \
-    crop_rle, expand_poly, expand_rle, resize_poly, resize_rle, de_haze, select_bands, \
+    crop_rle, expand_poly, expand_rle, resize_poly, resize_rle, dehaze, select_bands, \
     to_intensity, to_uint8, img_flip, img_simple_rotate
 
 __all__ = [
     "Compose",
-    "ImgDecoder",
+    "DecodeImg",
     "Resize",
     "RandomResize",
     "ResizeByShort",
@@ -50,19 +50,19 @@ __all__ = [
     "RandomCrop",
     "RandomScaleAspect",
     "RandomExpand",
-    "Padding",
+    "Pad",
     "MixupImage",
     "RandomDistort",
     "RandomBlur",
     "RandomSwap",
-    "Defogging",
-    "DimReducing",
-    "BandSelecting",
+    "Dehaze",
+    "ReduceDim",
+    "SelectBand",
     "ArrangeSegmenter",
     "ArrangeChangeDetector",
     "ArrangeClassifier",
     "ArrangeDetector",
-    "RandomFlipOrRotation",
+    "RandomFlipOrRotate",
 ]
 
 interp_dict = {
@@ -119,7 +119,7 @@ class Transform(object):
         return sample
 
 
-class ImgDecoder(Transform):
+class DecodeImg(Transform):
     """
     Decode image(s) in input.
     Args:
@@ -127,7 +127,7 @@ class ImgDecoder(Transform):
     """
 
     def __init__(self, to_rgb=True, to_uint8=True):
-        super(ImgDecoder, self).__init__()
+        super(DecodeImg, self).__init__()
         self.to_rgb = to_rgb
         self.to_uint8 = to_uint8
 
@@ -254,7 +254,7 @@ class Compose(Transform):
                 'Length of transforms must not be less than 1, but received is {}'
                 .format(len(transforms)))
         self.transforms = transforms
-        self.decode_image = ImgDecoder(to_uint8=to_uint8)
+        self.decode_image = DecodeImg(to_uint8=to_uint8)
         self.arrange_outputs = None
         self.apply_im_only = False
 
@@ -544,7 +544,7 @@ class ResizeByLong(Transform):
         return sample
 
 
-class RandomFlipOrRotation(Transform):
+class RandomFlipOrRotate(Transform):
     """
     Flip or Rotate an image in different ways with a certain probability.
 
@@ -561,7 +561,7 @@ class RandomFlipOrRotation(Transform):
 
         # 定义数据增强
         train_transforms = T.Compose([
-            T.RandomFlipOrRotation(
+            T.RandomFlipOrRotate(
                 probs  = [0.3, 0.2]             # 进行flip增强的概率是0.3,进行rotate增强的概率是0.2,不变的概率是0.5
                 probsf = [0.3, 0.25, 0, 0, 0]   # flip增强时,使用水平flip、垂直flip的概率分别是0.3、0.25,水平且垂直flip、对角线flip、反对角线flip概率均为0,不变的概率是0.45
                 probsr = [0, 0.65, 0]),         # rotate增强时,顺时针旋转90度的概率是0,顺时针旋转180度的概率是0.65,顺时针旋转90度的概率是0,不变的概率是0.35
@@ -574,7 +574,7 @@ class RandomFlipOrRotation(Transform):
                  probs=[0.35, 0.25],
                  probsf=[0.3, 0.3, 0.2, 0.1, 0.1],
                  probsr=[0.25, 0.5, 0.25]):
-        super(RandomFlipOrRotation, self).__init__()
+        super(RandomFlipOrRotate, self).__init__()
         # Change various probabilities into probability intervals, to judge in which mode to flip or rotate
         self.probs = [probs[0], probs[0] + probs[1]]
         self.probsf = self.get_probs_range(probsf)
@@ -1092,7 +1092,7 @@ class RandomExpand(Transform):
         label_padding_value(int, optional): Filling value for the mask. Defaults to 255.
 
     See Also:
-        paddlers.transforms.Padding
+        paddlers.transforms.Pad
     """
 
     def __init__(self,
@@ -1120,7 +1120,7 @@ class RandomExpand(Transform):
                 x = np.random.randint(0, w - im_w)
                 target_size = (h, w)
                 offsets = (x, y)
-                sample = Padding(
+                sample = Pad(
                     target_size=target_size,
                     pad_mode=-1,
                     offsets=offsets,
@@ -1129,7 +1129,7 @@ class RandomExpand(Transform):
         return sample
 
 
-class Padding(Transform):
+class Pad(Transform):
     def __init__(self,
                  target_size=None,
                  pad_mode=0,
@@ -1148,7 +1148,7 @@ class Padding(Transform):
             label_padding_value(int, optional): Filling value for the mask. Defaults to 255.
             size_divisor(int): Image width and height after padding is a multiple of coarsest_stride.
         """
-        super(Padding, self).__init__()
+        super(Pad, self).__init__()
         if isinstance(target_size, (list, tuple)):
             if len(target_size) != 2:
                 raise ValueError(
@@ -1525,20 +1525,20 @@ class RandomBlur(Transform):
         return sample
 
 
-class Defogging(Transform):
+class Dehaze(Transform):
     """
-    Defog input image(s).
+    Dehaze input image(s).
 
     Args: 
         gamma (bool, optional): Use gamma correction or not. Defaults to False.
     """
 
     def __init__(self, gamma=False):
-        super(Defogging, self).__init__()
+        super(Dehaze, self).__init__()
         self.gamma = gamma
 
     def apply_im(self, image):
-        image = de_haze(image, self.gamma)
+        image = dehaze(image, self.gamma)
         return image
 
     def apply(self, sample):
@@ -1548,19 +1548,20 @@ class Defogging(Transform):
         return sample
 
 
-class DimReducing(Transform):
+class ReduceDim(Transform):
     """
-    Use PCA to reduce input image(s) dimension.
+    Use PCA to reduce the dimension of input image(s).
 
     Args: 
-        joblib_path (str): Path of *.joblib about PCA.
+        joblib_path (str): Path of *.joblib file of PCA.
     """
 
     def __init__(self, joblib_path):
-        super(DimReducing, self).__init__()
+        super(ReduceDim, self).__init__()
         ext = joblib_path.split(".")[-1]
         if ext != "joblib":
-            raise ValueError("`joblib_path` must be *.joblib, not *.{}.".format(ext))
+            raise ValueError("`joblib_path` must be *.joblib, not *.{}.".format(
+                ext))
         self.pca = load(joblib_path)
 
     def apply_im(self, image):
@@ -1577,16 +1578,16 @@ class DimReducing(Transform):
         return sample
 
 
-class BandSelecting(Transform):
+class SelectBand(Transform):
     """
-    Select the band of the input image(s).
+    Select a set of bands of input image(s).
 
     Args: 
-        band_list (list, optional): Bands of selected (Start with 1). Defaults to [1, 2, 3].
+        band_list (list, optional): Bands to select (the band index starts with 1). Defaults to [1, 2, 3].
     """
 
     def __init__(self, band_list=[1, 2, 3]):
-        super(BandSelecting, self).__init__()
+        super(SelectBand, self).__init__()
         self.band_list = band_list
 
     def apply_im(self, image):

+ 2 - 2
tutorials/train/classification/condensenetv2_b_rs_mul.py

@@ -3,7 +3,7 @@ from paddlers import transforms as T
 
 # 定义训练和验证时的transforms
 train_transforms = T.Compose([
-    T.BandSelecting([5, 10, 15, 20, 25]),  # for tet
+    T.SelectBand([5, 10, 15, 20, 25]),  # for tet
     T.Resize(target_size=224),
     T.RandomHorizontalFlip(),
     T.Normalize(
@@ -11,7 +11,7 @@ train_transforms = T.Compose([
 ])
 
 eval_transforms = T.Compose([
-    T.BandSelecting([5, 10, 15, 20, 25]),
+    T.SelectBand([5, 10, 15, 20, 25]),
     T.Resize(target_size=224),
     T.Normalize(
         mean=[0.5, 0.5, 0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5, 0.5, 0.5]),