4 年之前 · c02d457501
--- a/paddlers/tasks/__init__.py
+++ b/paddlers/tasks/__init__.py
@@ -12,7 +12,7 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from . import det
			
 
				+from .object_detector import *
			
 
				 from .segmenter import *
			
 
				 from .changedetector import *
			
 
				 from .classifier import *
			
--- a/paddlers/tasks/det/__init__.py
+++ b/paddlers/tasks/det/__init__.py
@@ -1,15 +0,0 @@
 
				-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-from .detector import *
			
--- a/paddlers/tasks/object_detector.py
+++ b/paddlers/tasks/object_detector.py
@@ -29,8 +29,8 @@ from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH
 
				 from paddlers.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, \
			
 
				     _BatchPadding, _Gt2YoloTarget
			
 
				 from paddlers.transforms import arrange_transforms
			
 
				-from ..base import BaseModel
			
 
				-from ..utils.det_metrics import VOCMetric, COCOMetric
			
 
				+from .base import BaseModel
			
 
				+from .utils.det_metrics import VOCMetric, COCOMetric
			
 
				 from paddlers.models.ppdet.optimizer import ModelEMA
			
 
				 from paddlers.utils.checkpoint import det_pretrain_weights_dict
			
 
				 
			
--- a/paddlers/transforms/operators.py
+++ b/paddlers/transforms/operators.py
@@ -120,7 +120,9 @@ class ImgDecoder(Transform):
 
				             if dataset == None:
			
 
				                 raise Exception('Can not open', img_path)
			
 
				             im_data = dataset.ReadAsArray()
			
 
				-            if im_data.ndim == 3:
			
 
				+            if im_data.ndim == 2:
			
 
				+                im_data = im_data[:, :, np.newaxis]
			
 
				+            elif im_data.ndim == 3:
			
 
				                 im_data = im_data.transpose((1, 2, 0))
			
 
				             return im_data
			
 
				         elif img_format in ['jpeg', 'bmp', 'png', 'jpg']:
			
@@ -277,7 +279,10 @@ class Resize(Transform):
 
				         self.keep_ratio = keep_ratio
			
 
				 
			
 
				     def apply_im(self, image, interp, target_size):
			
 
				+        flag = image.shape[2] == 1
			
 
				         image = cv2.resize(image, target_size, interpolation=interp)
			
 
				+        if flag:
			
 
				+            image = image[:, :, np.newaxis]
			
 
				         return image
			
 
				 
			
 
				     def apply_mask(self, mask, target_size):
			
@@ -346,7 +351,6 @@ class Resize(Transform):
 
				             sample['scale_factor'] = np.asarray(
			
 
				                 [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
			
 
				                 dtype=np.float32)
			
 
				-
			
 
				         return sample
			
 
				 
			
 
				 
			
@@ -1001,8 +1005,8 @@ class Padding(Transform):
 
				 
			
 
				     def apply_im(self, image, offsets, target_size):
			
 
				         x, y = offsets
			
 
				-        im_h, im_w, channel = image.shape[:3]
			
 
				         h, w = target_size
			
 
				+        im_h, im_w, channel = image.shape[:3]
			
 
				         canvas = np.ones((h, w, channel), dtype=np.float32)
			
 
				         canvas *= np.array(self.im_padding_value, dtype=np.float32)
			
 
				         canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
			
@@ -1204,7 +1208,6 @@ class RandomDistort(Transform):
 
				         if np.random.uniform(0., 1.) < self.hue_prob:
			
 
				             return image
			
 
				 
			
 
				-        image = image.astype(np.float32)
			
 
				         # it works, but result differ from HSV version
			
 
				         delta = np.random.uniform(low, high)
			
 
				         u = np.cos(delta * np.pi)
			
@@ -1215,22 +1218,45 @@ class RandomDistort(Transform):
 
				         ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
			
 
				                           [1.0, -1.107, 1.705]])
			
 
				         t = np.dot(np.dot(ityiq, bt), tyiq).T
			
 
				-        image = np.dot(image, t)
			
 
				-        return image
			
 
				+
			
 
				+        res_list = []
			
 
				+        channel = image.shape[2]
			
 
				+        for i in range(channel // 3):
			
 
				+            sub_img = image[:, :, 3*i : 3*(i+1)]
			
 
				+            sub_img = sub_img.astype(np.float32)
			
 
				+            sub_img = np.dot(image, t)
			
 
				+            res_list.append(sub_img)
			
 
				+
			
 
				+        if channel % 3 != 0:
			
 
				+            i = channel % 3
			
 
				+            res_list.append(image[:, :, -i:])
			
 
				+
			
 
				+        return np.concatenate(res_list, axis=2)
			
 
				 
			
 
				     def apply_saturation(self, image):
			
 
				         low, high = self.saturation_range
			
 
				+        delta = np.random.uniform(low, high)
			
 
				         if np.random.uniform(0., 1.) < self.saturation_prob:
			
 
				             return image
			
 
				-        delta = np.random.uniform(low, high)
			
 
				-        image = image.astype(np.float32)
			
 
				-        # it works, but result differ from HSV version
			
 
				-        gray = image * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
			
 
				-        gray = gray.sum(axis=2, keepdims=True)
			
 
				-        gray *= (1.0 - delta)
			
 
				-        image *= delta
			
 
				-        image += gray
			
 
				-        return image
			
 
				+
			
 
				+        res_list = []
			
 
				+        channel = image.shape[2]
			
 
				+        for i in range(channel // 3):
			
 
				+            sub_img = image[:, :, 3*i : 3*(i+1)]
			
 
				+            sub_img = sub_img.astype(np.float32)
			
 
				+            # it works, but result differ from HSV version
			
 
				+            gray = sub_img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
			
 
				+            gray = gray.sum(axis=2, keepdims=True)
			
 
				+            gray *= (1.0 - delta)
			
 
				+            sub_img *= delta
			
 
				+            sub_img += gray
			
 
				+            res_list.append(sub_img)
			
 
				+
			
 
				+        if channel % 3 != 0:
			
 
				+            i = channel % 3
			
 
				+            res_list.append(image[:, :, -i:])
			
 
				+
			
 
				+        return np.concatenate(res_list, axis=2)
			
 
				 
			
 
				     def apply_contrast(self, image):
			
 
				         low, high = self.contrast_range
			
--- a/tutorials/train/detection/faster_rcnn_sar_ship.py
+++ b/tutorials/train/detection/faster_rcnn_sar_ship.py
@@ -11,7 +11,7 @@ if not os.path.exists(data_dir):
 
				 # define transforms
			
 
				 train_transforms = T.Compose([
			
 
				     T.RandomDistort(),
			
 
				-    T.RandomExpand(im_padding_value=[123.675, 116.28, 103.53]),
			
 
				+    T.RandomExpand(),
			
 
				     T.RandomCrop(),
			
 
				     T.RandomHorizontalFlip(),
			
 
				     T.BatchRandomResize(
			
@@ -21,9 +21,9 @@ train_transforms = T.Compose([
 
				 ])
			
 
				 
			
 
				 eval_transforms = T.Compose([
			
 
				-    T.Resize(
			
 
				-        target_size=608, interp='CUBIC'), T.Normalize(
			
 
				-            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
			
 
				+    T.Resize(target_size=608, interp='CUBIC'),
			
 
				+    T.Normalize(
			
 
				+        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
			
 
				 ])
			
 
				 
			
 
				 # define dataset
			
@@ -46,7 +46,7 @@ eval_dataset = pdrs.datasets.VOCDetection(
 
				 
			
 
				 # define models
			
 
				 num_classes = len(train_dataset.labels)
			
 
				-model = pdrs.tasks.det.FasterRCNN(num_classes=num_classes)
			
 
				+model = pdrs.tasks.FasterRCNN(num_classes=num_classes)
			
 
				 
			
 
				 # train
			
 
				 model.train(