3 vuotta sitten · dd08827bcf
--- a/docs/README.md
+++ b/docs/README.md
@@ -1 +1,5 @@
 
				-PaddleSeg commit fec42fd869b6f796c74cd510671595e3512bc8e9
			
 
				+PaddleSeg commit fec42fd869b6f796c74cd510671595e3512bc8e9
			
 
				+
			
 
				+# 开发规范
			
 
				+请注意，paddlers/models/ppxxx系列除了修改import路径和支持多通道模型外，不要增删改任何代码。
			
 
				+新增的模型需放在paddlers/models/下的seg、det、cls、cd目录下。
			
--- a/docs/datasets.md
+++ b/docs/datasets.md
@@ -0,0 +1,40 @@
 
				+# 遥感数据集
			
 
				+
			
 
				+遥感影像的格式多种多样，不同传感器产生的数据格式也可能不同。PaddleRS至少兼容以下6种格式图片读取：
			
 
				+
			
 
				+- `tif`
			
 
				+- `png`, `jpeg`, `bmp`
			
 
				+- `img`
			
 
				+- `npy`
			
 
				+
			
 
				+标注图要求必须为单通道的png格式图像，像素值即为对应的类别，像素标注类别需要从0开始递增。例如0，1，2，3表示有4种类别，255用于指定不参与训练和评估的像素，标注类别最多为256类。
			
 
				+
			
 
				+## L8 SPARCS数据集
			
 
				+[L8 SPARCS公开数据集](https://www.usgs.gov/land-resources/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs-validation)进行云雪分割，该数据集包含80张卫星影像，涵盖10个波段。原始标注图片包含7个类别，分别是`cloud`, `cloud shadow`, `shadow over water`, `snow/ice`, `water`, `land`和`flooded`。由于`flooded`和`shadow over water`2个类别占比仅为`1.8%`和`0.24%`，我们将其进行合并，`flooded`归为`land`，`shadow over water`归为`shadow`，合并后标注包含5个类别。
			
 
				+
			
 
				+数值、类别、颜色对应表：
			
 
				+
			
 
				+|Pixel value|Class|Color|
			
 
				+|---|---|---|
			
 
				+|0|cloud|white|
			
 
				+|1|shadow|black|
			
 
				+|2|snow/ice|cyan|
			
 
				+|3|water|blue|
			
 
				+|4|land|grey|
			
 
				+
			
 
				+<p align="center">
			
 
				+ <img src="./images/dataset.png" align="middle"
			
 
				+</p>
			
 
				+
			
 
				+<p align='center'>
			
 
				+ L8 SPARCS数据集示例
			
 
				+</p>
			
 
				+
			
 
				+执行以下命令下载并解压经过类别合并后的数据集：
			
 
				+```shell script
			
 
				+mkdir dataset && cd dataset
			
 
				+wget https://paddleseg.bj.bcebos.com/dataset/remote_sensing_seg.zip
			
 
				+unzip remote_sensing_seg.zip
			
 
				+cd ..
			
 
				+```
			
 
				+其中`data`目录存放遥感影像，`data_vis`目录存放彩色合成预览图，`mask`目录存放标注图。
			
--- a/paddlers/datasets/seg_dataset.py
+++ b/paddlers/datasets/seg_dataset.py
@@ -64,10 +64,10 @@ class SegDataset(Dataset):
 
				                         " file_list[{}] has a space in the image or label path.".format(line, file_list))
			
 
				                 items[0] = path_normalization(items[0])
			
 
				                 items[1] = path_normalization(items[1])
			
 
				-                if not is_pic(items[0]) or not is_pic(items[1]):
			
 
				-                    continue
			
 
				                 full_path_im = osp.join(data_dir, items[0])
			
 
				                 full_path_label = osp.join(data_dir, items[1])
			
 
				+                if not is_pic(full_path_im) or not is_pic(full_path_label):
			
 
				+                    continue
			
 
				                 if not osp.exists(full_path_im):
			
 
				                     raise IOError('Image file {} does not exist!'.format(
			
 
				                         full_path_im))
			
--- a/paddlers/datasets/voc.py
+++ b/paddlers/datasets/voc.py
@@ -23,7 +23,7 @@ from collections import OrderedDict
 
				 import xml.etree.ElementTree as ET
			
 
				 from paddle.io import Dataset
			
 
				 from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
			
 
				-from paddlers.transforms import Decode, MixupImage
			
 
				+from paddlers.transforms import ImgDecoder, MixupImage
			
 
				 from paddlers.tools import YOLOAnchorCluster
			
 
				 
			
 
				 
			
@@ -319,7 +319,7 @@ class VOCDetection(Dataset):
 
				             if self.data_fields is not None:
			
 
				                 sample_mix = {k: sample_mix[k] for k in self.data_fields}
			
 
				             sample = self.mixup_op(sample=[
			
 
				-                Decode(to_rgb=False)(sample), Decode(to_rgb=False)(sample_mix)
			
 
				+                ImgDecoder(to_rgb=False)(sample), ImgDecoder(to_rgb=False)(sample_mix)
			
 
				             ])
			
 
				         sample = self.transforms(sample)
			
 
				         return sample
			
--- a/paddlers/models/ppseg/models/backbones/resnet_vd.py
+++ b/paddlers/models/ppseg/models/backbones/resnet_vd.py
@@ -211,13 +211,14 @@ class ResNet_vd(nn.Layer):
 
				     """
			
 
				 
			
 
				     def __init__(self,
			
 
				+                 input_channel=3,
			
 
				                  layers=50,
			
 
				                  output_stride=8,
			
 
				                  multi_grid=(1, 1, 1),
			
 
				                  pretrained=None,
			
 
				                  data_format='NCHW'):
			
 
				         super(ResNet_vd, self).__init__()
			
 
				-
			
 
				+        
			
 
				         self.data_format = data_format
			
 
				         self.conv1_logit = None  # for gscnn shape stream
			
 
				         self.layers = layers
			
@@ -251,7 +252,7 @@ class ResNet_vd(nn.Layer):
 
				             dilation_dict = {3: 2}
			
 
				 
			
 
				         self.conv1_1 = ConvBNLayer(
			
 
				-            in_channels=3,
			
 
				+            in_channels=input_channel,
			
 
				             out_channels=32,
			
 
				             kernel_size=3,
			
 
				             stride=2,
			
--- a/paddlers/tasks/changedetector.py
+++ b/paddlers/tasks/changedetector.py
@@ -28,7 +28,7 @@ import paddlers.utils.logging as logging
 
				 from .base import BaseModel
			
 
				 from .utils import seg_metrics as metrics
			
 
				 from paddlers.utils.checkpoint import seg_pretrain_weights_dict
			
 
				-from paddlers.transforms import Decode, Resize
			
 
				+from paddlers.transforms import ImgDecoder, Resize
			
 
				 from paddlers.models.ppcd import CDNet as _CDNet
			
 
				 
			
 
				 __all__ = ["CDNet"]
			
@@ -516,7 +516,7 @@ class BaseChangeDetector(BaseModel):
 
				         for im in images:
			
 
				             sample = {'image': im}
			
 
				             if isinstance(sample['image'], str):
			
 
				-                sample = Decode(to_rgb=False)(sample)
			
 
				+                sample = ImgDecoder(to_rgb=False)(sample)
			
 
				             ori_shape = sample['image'].shape[:2]
			
 
				             im = transforms(sample)[0]
			
 
				             batch_im.append(im)
			
--- a/paddlers/tasks/classifier.py
+++ b/paddlers/tasks/classifier.py
@@ -29,7 +29,7 @@ from paddlers.models.ppcls.metric import build_metrics
 
				 from paddlers.models.ppcls.loss import build_loss

			
 
				 from paddlers.models.ppcls.data.postprocess import build_postprocess

			
 
				 from paddlers.utils.checkpoint import cls_pretrain_weights_dict

			
 
				-from paddlers.transforms import Decode, Resize

			
 
				+from paddlers.transforms import ImgDecoder, Resize

			
 
				 

			
 
				 __all__ = ["ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C"]

			
 
				 

			
@@ -433,7 +433,7 @@ class BaseClassifier(BaseModel):
 
				         for im in images:

			
 
				             sample = {'image': im}

			
 
				             if isinstance(sample['image'], str):

			
 
				-                sample = Decode(to_rgb=False)(sample)

			
 
				+                sample = ImgDecoder(to_rgb=False)(sample)

			
 
				             ori_shape = sample['image'].shape[:2]

			
 
				             im = transforms(sample)[0]

			
 
				             batch_im.append(im)

			
--- a/paddlers/tasks/segmenter.py
+++ b/paddlers/tasks/segmenter.py
@@ -28,7 +28,7 @@ import paddlers.utils.logging as logging
 
				 from .base import BaseModel
			
 
				 from .utils import seg_metrics as metrics
			
 
				 from paddlers.utils.checkpoint import seg_pretrain_weights_dict
			
 
				-from paddlers.transforms import Decode, Resize
			
 
				+from paddlers.transforms import ImgDecoder, Resize
			
 
				 
			
 
				 __all__ = ["UNet", "DeepLabV3P", "FastSCNN", "HRNet", "BiSeNetV2", "FarSeg"]
			
 
				 
			
@@ -525,7 +525,7 @@ class BaseSegmenter(BaseModel):
 
				         for im in images:
			
 
				             sample = {'image': im}
			
 
				             if isinstance(sample['image'], str):
			
 
				-                sample = Decode(to_rgb=False)(sample)
			
 
				+                sample = ImgDecode(to_rgb=False)(sample)
			
 
				             ori_shape = sample['image'].shape[:2]
			
 
				             im = transforms(sample)[0]
			
 
				             batch_im.append(im)
			
@@ -679,6 +679,7 @@ class UNet(BaseSegmenter):
 
				 
			
 
				 class DeepLabV3P(BaseSegmenter):
			
 
				     def __init__(self,
			
 
				+                 input_channel=3,
			
 
				                  num_classes=2,
			
 
				                  backbone='ResNet50_vd',
			
 
				                  use_mixed_loss=False,
			
@@ -696,6 +697,7 @@ class DeepLabV3P(BaseSegmenter):
 
				         if params.get('with_net', True):
			
 
				             with DisablePrint():
			
 
				                 backbone = getattr(paddleseg.models, backbone)(
			
 
				+                    input_channel=input_channel,
			
 
				                     output_stride=output_stride)
			
 
				         else:
			
 
				             backbone = None
			
--- a/paddlers/transforms/img_decoder.py
+++ b/paddlers/transforms/img_decoder.py
@@ -1,157 +0,0 @@
 
				-# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-import os.path as osp
			
 
				-import cv2
			
 
				-import copy
			
 
				-import random
			
 
				-import imghdr
			
 
				-from PIL import Image
			
 
				-
			
 
				-try:
			
 
				-    from collections.abc import Sequence
			
 
				-except Exception:
			
 
				-    from collections import Sequence
			
 
				-
			
 
				-# from paddlers.transforms.operators import Transform
			
 
				-
			
 
				-
			
 
				-class Transform(object):
			
 
				-    """
			
 
				-    Parent class of all data augmentation operations
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        pass
			
 
				-
			
 
				-    def apply_im(self, image):
			
 
				-        pass
			
 
				-
			
 
				-    def apply_mask(self, mask):
			
 
				-        pass
			
 
				-
			
 
				-    def apply_bbox(self, bbox):
			
 
				-        pass
			
 
				-
			
 
				-    def apply_segm(self, segms):
			
 
				-        pass
			
 
				-
			
 
				-    def apply(self, sample):
			
 
				-        sample['image'] = self.apply_im(sample['image'])
			
 
				-        if 'mask' in sample:
			
 
				-            sample['mask'] = self.apply_mask(sample['mask'])
			
 
				-        if 'gt_bbox' in sample:
			
 
				-            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'])
			
 
				-
			
 
				-        return sample
			
 
				-
			
 
				-    def __call__(self, sample):
			
 
				-        if isinstance(sample, Sequence):
			
 
				-            sample = [self.apply(s) for s in sample]
			
 
				-        else:
			
 
				-            sample = self.apply(sample)
			
 
				-
			
 
				-        return sample
			
 
				-
			
 
				-
			
 
				-class ImgDecode(Transform):
			
 
				-    """
			
 
				-    Decode image(s) in input.
			
 
				-    Args:
			
 
				-        to_rgb (bool, optional): If True, convert input images from BGR format to RGB format. Defaults to True.
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self, to_rgb=True):
			
 
				-        super(ImgDecode, self).__init__()
			
 
				-        self.to_rgb = to_rgb
			
 
				-
			
 
				-    def read_img(self, img_path, input_channel=3):
			
 
				-        img_format = imghdr.what(img_path)
			
 
				-        name, ext = osp.splitext(img_path)
			
 
				-        if img_format == 'tiff' or ext == '.img':
			
 
				-            try:
			
 
				-                import gdal
			
 
				-            except:
			
 
				-                try:
			
 
				-                    from osgeo import gdal
			
 
				-                except:
			
 
				-                    raise Exception(
			
 
				-                        "Failed to import gdal! You can try use conda to install gdal"
			
 
				-                    )
			
 
				-                    six.reraise(*sys.exc_info())
			
 
				-
			
 
				-            dataset = gdal.Open(img_path)
			
 
				-            if dataset == None:
			
 
				-                raise Exception('Can not open', img_path)
			
 
				-            im_data = dataset.ReadAsArray()
			
 
				-            return im_data.transpose((1, 2, 0))
			
 
				-        elif img_format in ['jpeg', 'bmp', 'png', 'jpg']:
			
 
				-            if input_channel == 3:
			
 
				-                return cv2.imread(img_path, cv2.IMREAD_ANYDEPTH |
			
 
				-                                  cv2.IMREAD_ANYCOLOR | cv2.IMREAD_COLOR)
			
 
				-            else:
			
 
				-                return cv2.imread(img_path, cv2.IMREAD_ANYDEPTH |
			
 
				-                                  cv2.IMREAD_ANYCOLOR)
			
 
				-        elif ext == '.npy':
			
 
				-            return np.load(img_path)
			
 
				-        else:
			
 
				-            raise Exception('Image format {} is not supported!'.format(ext))
			
 
				-
			
 
				-    def apply_im(self, im_path):
			
 
				-        if isinstance(im_path, str):
			
 
				-            try:
			
 
				-                image = self.read_img(im_path)
			
 
				-            except:
			
 
				-                raise ValueError('Cannot read the image file {}!'.format(
			
 
				-                    im_path))
			
 
				-        else:
			
 
				-            image = im_path
			
 
				-
			
 
				-        if self.to_rgb:
			
 
				-            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
			
 
				-
			
 
				-        return image
			
 
				-
			
 
				-    def apply_mask(self, mask):
			
 
				-        try:
			
 
				-            mask = np.asarray(Image.open(mask))
			
 
				-        except:
			
 
				-            raise ValueError("Cannot read the mask file {}!".format(mask))
			
 
				-        if len(mask.shape) != 2:
			
 
				-            raise Exception(
			
 
				-                "Mask should be a 1-channel image, but recevied is a {}-channel image.".
			
 
				-                format(mask.shape[2]))
			
 
				-        return mask
			
 
				-
			
 
				-    def apply(self, sample):
			
 
				-        """
			
 
				-        Args:
			
 
				-            sample (dict): Input sample, containing 'image' at least.
			
 
				-        Returns:
			
 
				-            dict: Decoded sample.
			
 
				-        """
			
 
				-        sample['image'] = self.apply_im(sample['image'])
			
 
				-        if 'mask' in sample:
			
 
				-            sample['mask'] = self.apply_mask(sample['mask'])
			
 
				-            im_height, im_width, _ = sample['image'].shape
			
 
				-            se_height, se_width = sample['mask'].shape
			
 
				-            if im_height != se_height or im_width != se_width:
			
 
				-                raise Exception(
			
 
				-                    "The height or width of the im is not same as the mask")
			
 
				-
			
 
				-        sample['im_shape'] = np.array(
			
 
				-            sample['image'].shape[:2], dtype=np.float32)
			
 
				-        sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
			
 
				-        return sample
			
--- a/paddlers/transforms/operators.py
+++ b/paddlers/transforms/operators.py
@@ -31,7 +31,7 @@ from .functions import normalize, horizontal_flip, permute, vertical_flip, cente
 
				     crop_rle, expand_poly, expand_rle, resize_poly, resize_rle
			
 
				 
			
 
				 __all__ = [
			
 
				-    "Compose", "Decode", "Resize", "RandomResize", "ResizeByShort",
			
 
				+    "Compose", "ImgDecoder", "Resize", "RandomResize", "ResizeByShort",
			
 
				     "RandomResizeByShort", "ResizeByLong", "RandomHorizontalFlip",
			
 
				     "RandomVerticalFlip", "Normalize", "CenterCrop", "RandomCrop",
			
 
				     "RandomScaleAspect", "RandomExpand", "Padding", "MixupImage",
			
@@ -90,66 +90,15 @@ class Transform(object):
 
				         return sample
			
 
				 
			
 
				 
			
 
				-class Compose(Transform):
			
 
				-    """
			
 
				-    Apply a series of data augmentation to the input.
			
 
				-    All input images are in Height-Width-Channel ([H, W, C]) format.
			
 
				-
			
 
				-    Args:
			
 
				-        transforms (List[paddlers.transforms.Transform]): List of data preprocess or augmentations.
			
 
				-    Raises:
			
 
				-        TypeError: Invalid type of transforms.
			
 
				-        ValueError: Invalid length of transforms.
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self, transforms):
			
 
				-        super(Compose, self).__init__()
			
 
				-        if not isinstance(transforms, list):
			
 
				-            raise TypeError(
			
 
				-                'Type of transforms is invalid. Must be List, but received is {}'
			
 
				-                .format(type(transforms)))
			
 
				-        if len(transforms) < 1:
			
 
				-            raise ValueError(
			
 
				-                'Length of transforms must not be less than 1, but received is {}'
			
 
				-                .format(len(transforms)))
			
 
				-        self.transforms = transforms
			
 
				-        self.decode_image = Decode()
			
 
				-        self.arrange_outputs = None
			
 
				-        self.apply_im_only = False
			
 
				-
			
 
				-    def __call__(self, sample):
			
 
				-        if self.apply_im_only and 'mask' in sample:
			
 
				-            mask_backup = copy.deepcopy(sample['mask'])
			
 
				-            del sample['mask']
			
 
				-
			
 
				-        sample = self.decode_image(sample)
			
 
				-
			
 
				-        for op in self.transforms:
			
 
				-            # skip batch transforms amd mixup
			
 
				-            if isinstance(op, (paddlers.transforms.BatchRandomResize,
			
 
				-                               paddlers.transforms.BatchRandomResizeByShort,
			
 
				-                               MixupImage)):
			
 
				-                continue
			
 
				-            sample = op(sample)
			
 
				-
			
 
				-        if self.arrange_outputs is not None:
			
 
				-            if self.apply_im_only:
			
 
				-                sample['mask'] = mask_backup
			
 
				-            sample = self.arrange_outputs(sample)
			
 
				-
			
 
				-        return sample
			
 
				-
			
 
				-
			
 
				-class Decode(Transform):
			
 
				+class ImgDecoder(Transform):
			
 
				     """
			
 
				     Decode image(s) in input.
			
 
				-
			
 
				     Args:
			
 
				         to_rgb (bool, optional): If True, convert input images from BGR format to RGB format. Defaults to True.
			
 
				     """
			
 
				 
			
 
				     def __init__(self, to_rgb=True):
			
 
				-        super(Decode, self).__init__()
			
 
				+        super(ImgDecoder, self).__init__()
			
 
				         self.to_rgb = to_rgb
			
 
				 
			
 
				     def read_img(self, img_path, input_channel=3):
			
@@ -172,7 +121,7 @@ class Decode(Transform):
 
				                 raise Exception('Can not open', img_path)
			
 
				             im_data = dataset.ReadAsArray()
			
 
				             if im_data.ndim == 3:
			
 
				-                im_data.transpose((1, 2, 0))
			
 
				+                im_data = im_data.transpose((1, 2, 0))
			
 
				             return im_data
			
 
				         elif img_format in ['jpeg', 'bmp', 'png', 'jpg']:
			
 
				             if input_channel == 3:
			
@@ -196,7 +145,7 @@ class Decode(Transform):
 
				         else:
			
 
				             image = im_path
			
 
				 
			
 
				-        if self.to_rgb:
			
 
				+        if self.to_rgb and image.shape[-1] == 3:
			
 
				             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
			
 
				 
			
 
				         return image
			
@@ -214,13 +163,10 @@ class Decode(Transform):
 
				 
			
 
				     def apply(self, sample):
			
 
				         """
			
 
				-
			
 
				         Args:
			
 
				             sample (dict): Input sample, containing 'image' at least.
			
 
				-
			
 
				         Returns:
			
 
				             dict: Decoded sample.
			
 
				-
			
 
				         """
			
 
				         if 'image' in sample:
			
 
				             sample['image'] = self.apply_im(sample['image'])
			
@@ -234,12 +180,63 @@ class Decode(Transform):
 
				             if im_height != se_height or im_width != se_width:
			
 
				                 raise Exception(
			
 
				                     "The height or width of the im is not same as the mask")
			
 
				+
			
 
				         sample['im_shape'] = np.array(
			
 
				             sample['image'].shape[:2], dtype=np.float32)
			
 
				         sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
			
 
				         return sample
			
 
				 
			
 
				 
			
 
				+class Compose(Transform):
			
 
				+    """
			
 
				+    Apply a series of data augmentation to the input.
			
 
				+    All input images are in Height-Width-Channel ([H, W, C]) format.
			
 
				+
			
 
				+    Args:
			
 
				+        transforms (List[paddlers.transforms.Transform]): List of data preprocess or augmentations.
			
 
				+    Raises:
			
 
				+        TypeError: Invalid type of transforms.
			
 
				+        ValueError: Invalid length of transforms.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, transforms):
			
 
				+        super(Compose, self).__init__()
			
 
				+        if not isinstance(transforms, list):
			
 
				+            raise TypeError(
			
 
				+                'Type of transforms is invalid. Must be List, but received is {}'
			
 
				+                .format(type(transforms)))
			
 
				+        if len(transforms) < 1:
			
 
				+            raise ValueError(
			
 
				+                'Length of transforms must not be less than 1, but received is {}'
			
 
				+                .format(len(transforms)))
			
 
				+        self.transforms = transforms
			
 
				+        self.decode_image = ImgDecoder()
			
 
				+        self.arrange_outputs = None
			
 
				+        self.apply_im_only = False
			
 
				+
			
 
				+    def __call__(self, sample):
			
 
				+        if self.apply_im_only and 'mask' in sample:
			
 
				+            mask_backup = copy.deepcopy(sample['mask'])
			
 
				+            del sample['mask']
			
 
				+
			
 
				+        sample = self.decode_image(sample)
			
 
				+
			
 
				+        for op in self.transforms:
			
 
				+            # skip batch transforms amd mixup
			
 
				+            if isinstance(op, (paddlers.transforms.BatchRandomResize,
			
 
				+                               paddlers.transforms.BatchRandomResizeByShort,
			
 
				+                               MixupImage)):
			
 
				+                continue
			
 
				+            sample = op(sample)
			
 
				+
			
 
				+        if self.arrange_outputs is not None:
			
 
				+            if self.apply_im_only:
			
 
				+                sample['mask'] = mask_backup
			
 
				+            sample = self.arrange_outputs(sample)
			
 
				+
			
 
				+        return sample
			
 
				+
			
 
				+
			
 
				 class Resize(Transform):
			
 
				     """
			
 
				     Resize input.
			
@@ -618,10 +615,16 @@ class Normalize(Transform):
 
				     def __init__(self,
			
 
				                  mean=[0.485, 0.456, 0.406],
			
 
				                  std=[0.229, 0.224, 0.225],
			
 
				-                 min_val=[0, 0, 0],
			
 
				-                 max_val=[255., 255., 255.],
			
 
				+                 min_val=None,
			
 
				+                 max_val=None,
			
 
				                  is_scale=True):
			
 
				         super(Normalize, self).__init__()
			
 
				+        channel = len(mean)
			
 
				+        if min_val is None:
			
 
				+            min_val = [0] * channel
			
 
				+        if max_val is None:
			
 
				+            max_val = [255.] * channel
			
 
				+
			
 
				         from functools import reduce
			
 
				         if reduce(lambda x, y: x * y, std) == 0:
			
 
				             raise ValueError(
			
@@ -633,7 +636,6 @@ class Normalize(Transform):
 
				                     '(max_val - min_val) should not have 0, but received is {}'.
			
 
				                     format((np.asarray(max_val) - np.asarray(min_val)).tolist(
			
 
				                     )))
			
 
				-
			
 
				         self.mean = mean
			
 
				         self.std = std
			
 
				         self.min_val = min_val
			
--- a/paddlers/utils/utils.py
+++ b/paddlers/utils/utils.py
@@ -14,8 +14,10 @@
 
				 
			
 
				 import sys
			
 
				 import os
			
 
				+import os.path as osp
			
 
				 import time
			
 
				 import math
			
 
				+import imghdr
			
 
				 import chardet
			
 
				 import json
			
 
				 import numpy as np
			
@@ -73,12 +75,16 @@ def path_normalization(path):
 
				     return path
			
 
				 
			
 
				 
			
 
				-def is_pic(img_name):
			
 
				-    valid_suffix = ['JPEG', 'jpeg', 'JPG', 'jpg', 'BMP', 'bmp', 'PNG', 'png', 'tiff']
			
 
				-    suffix = img_name.split('.')[-1]
			
 
				-    if suffix not in valid_suffix:
			
 
				-        return False
			
 
				-    return True
			
 
				+def is_pic(img_path):
			
 
				+    valid_suffix = ['JPEG', 'jpeg', 'JPG', 'jpg', 'BMP', 'bmp', 'PNG', 'png', '.npy']
			
 
				+    suffix = img_path.split('.')[-1]
			
 
				+    if suffix in valid_suffix:
			
 
				+        return True
			
 
				+    img_format = imghdr.what(img_path)
			
 
				+    _, ext = osp.splitext(img_path)
			
 
				+    if img_format == 'tiff' or ext == '.img':
			
 
				+        return True
			
 
				+    return False
			
 
				 
			
 
				 
			
 
				 class MyEncoder(json.JSONEncoder):
			
--- a/tutorials/train/README.md
+++ b/tutorials/train/README.md
@@ -5,7 +5,7 @@
 
				 |代码 | 模型任务 | 数据 |
			
 
				 |------|--------|---------|
			
 
				 |object_detection/ppyolo.py | 目标检测PPYOLO | 昆虫检测 |
			
 
				-|semantic_segmentation/deeplabv3p_resnet50_vd.py | 语义分割DeepLabV3 | 视盘分割 |
			
 
				+|semantic_segmentation/deeplabv3p_resnet50_multi_channel.py | 语义分割DeepLabV3 | 视盘分割 |
			
 
				 |semantic_segmentation/farseg_test.py | 语义分割FarSeg | 遥感建筑分割 |
			
 
				 |change_detection/cdnet_build.py | 变化检测CDNet | 遥感变化检测 |
			
 
				 |classification/resnet50_vd_rs.py | 图像分类ResNet50_vd | 遥感场景分类 |
			
@@ -25,7 +25,7 @@
 
				 <!-- - [PaddleRS安装](../../docs/install.md) -->
			
 
				 
			
 
				 ## 开始训练
			
 
				-* 修改tutorials/train/semantic_segmentation/deeplabv3p_resnet50_vd.py中sys.path路径
			
 
				+* 修改tutorials/train/semantic_segmentation/deeplabv3p_resnet50_multi_channel.py中sys.path路径
			
 
				 ```
			
 
				 sys.path.append("your/PaddleRS/path")
			
 
				 ```
			
@@ -34,13 +34,13 @@ sys.path.append("your/PaddleRS/path")
 
				 
			
 
				 ```commandline
			
 
				 export CUDA_VISIBLE_DEVICES=0
			
 
				-python tutorials/train/semantic_segmentation/deeplabv3p_resnet50_vd.py
			
 
				+python tutorials/train/semantic_segmentation/deeplabv3p_resnet50_multi_channel.py
			
 
				 ```
			
 
				 
			
 
				 * 若需使用多张GPU卡进行训练，例如使用2张卡时执行：
			
 
				 
			
 
				 ```commandline
			
 
				-python -m paddle.distributed.launch --gpus 0,1 tutorials/train/semantic_segmentation/deeplabv3p_resnet50_vd.py
			
 
				+python -m paddle.distributed.launch --gpus 0,1 tutorials/train/semantic_segmentation/deeplabv3p_resnet50_multi_channel.py
			
 
				 ```
			
 
				 使用多卡时，参考[训练参数调整](../../docs/parameters.md)调整学习率和批量大小。
			
 
				 
			
@@ -48,7 +48,7 @@ python -m paddle.distributed.launch --gpus 0,1 tutorials/train/semantic_segmenta
 
				 ## VisualDL可视化训练指标
			
 
				 在模型训练过程，在`train`函数中，将`use_vdl`设为True，则训练过程会自动将训练日志以VisualDL的格式打点在`save_dir`（用户自己指定的路径）下的`vdl_log`目录，用户可以使用如下命令启动VisualDL服务，查看可视化指标
			
 
				 ```commandline
			
 
				-visualdl --logdir output/deeplabv3p_resnet50_vd/vdl_log --port 8001
			
 
				+visualdl --logdir output/deeplabv3p_resnet50_multi_channel/vdl_log --port 8001
			
 
				 ```
			
 
				 
			
 
				 服务启动后，使用浏览器打开 https://0.0.0.0:8001 或 https://localhost:8001
			
--- a/tutorials/train/semantic_segmentation/deeplabv3p_resnet50_multi_channel.py
+++ b/tutorials/train/semantic_segmentation/deeplabv3p_resnet50_multi_channel.py
@@ -5,39 +5,40 @@ sys.path.append("/mnt/chulutao/PaddleRS")
 
				 import paddlers as pdrs
			
 
				 from paddlers import transforms as T
			
 
				 
			
 
				-# 下载和解压视盘分割数据集
			
 
				-optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
			
 
				-pdrs.utils.download_and_decompress(optic_dataset, path='./')
			
 
				+# 下载和解压多光谱地块分类数据集
			
 
				+dataset = 'https://paddleseg.bj.bcebos.com/dataset/remote_sensing_seg.zip'
			
 
				+pdrs.utils.download_and_decompress(dataset, path='./data')
			
 
				 
			
 
				 # 定义训练和验证时的transforms
			
 
				 # API说明：https://github.com/PaddlePaddle/paddlers/blob/develop/docs/apis/transforms/transforms.md
			
 
				+channel = 10
			
 
				 train_transforms = T.Compose([
			
 
				     T.Resize(target_size=512),
			
 
				     T.RandomHorizontalFlip(),
			
 
				     T.Normalize(
			
 
				-        mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
			
 
				+        mean=[0.5] * 10, std=[0.5] * 10),
			
 
				 ])
			
 
				 
			
 
				 eval_transforms = T.Compose([
			
 
				     T.Resize(target_size=512),
			
 
				     T.Normalize(
			
 
				-        mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
			
 
				+        mean=[0.5] * 10, std=[0.5] * 10),
			
 
				 ])
			
 
				 
			
 
				 # 定义训练和验证所用的数据集
			
 
				 # API说明：https://github.com/PaddlePaddle/paddlers/blob/develop/docs/apis/datasets.md
			
 
				 train_dataset = pdrs.datasets.SegDataset(
			
 
				-    data_dir='optic_disc_seg',
			
 
				-    file_list='optic_disc_seg/train_list.txt',
			
 
				-    label_list='optic_disc_seg/labels.txt',
			
 
				+    data_dir='./data/remote_sensing_seg',
			
 
				+    file_list='./data/remote_sensing_seg/train.txt',
			
 
				+    label_list='./data/remote_sensing_seg/labels.txt',
			
 
				     transforms=train_transforms,
			
 
				     num_workers=0,
			
 
				     shuffle=True)
			
 
				 
			
 
				 eval_dataset = pdrs.datasets.SegDataset(
			
 
				-    data_dir='optic_disc_seg',
			
 
				-    file_list='optic_disc_seg/val_list.txt',
			
 
				-    label_list='optic_disc_seg/labels.txt',
			
 
				+    data_dir='./data/remote_sensing_seg',
			
 
				+    file_list='./data/remote_sensing_seg/val.txt',
			
 
				+    label_list='./data/remote_sensing_seg/labels.txt',
			
 
				     transforms=eval_transforms,
			
 
				     num_workers=0,
			
 
				     shuffle=False)
			
@@ -45,7 +46,7 @@ eval_dataset = pdrs.datasets.SegDataset(
 
				 # 初始化模型，并进行训练
			
 
				 # 可使用VisualDL查看训练指标，参考https://github.com/PaddlePaddle/paddlers/blob/develop/docs/visualdl.md
			
 
				 num_classes = len(train_dataset.labels)
			
 
				-model = pdrs.tasks.DeepLabV3P(num_classes=num_classes, backbone='ResNet50_vd')
			
 
				+model = pdrs.tasks.DeepLabV3P(input_channel=channel, num_classes=num_classes, backbone='ResNet50_vd')
			
 
				 
			
 
				 # API说明：https://github.com/PaddlePaddle/paddlers/blob/develop/docs/apis/models/semantic_segmentation.md
			
 
				 # 各参数介绍与调整说明：https://github.com/PaddlePaddle/paddlers/blob/develop/docs/parameters.md