|
|
@@ -41,7 +41,6 @@ import threading
|
|
|
MUTEX = threading.Lock()
|
|
|
|
|
|
from paddlers.models.ppdet.core.workspace import serializable
|
|
|
-from paddlers.models.ppdet.modeling import bbox_utils
|
|
|
from ..reader import Compose
|
|
|
|
|
|
from .op_helper import (satisfy_sample_constraint, filter_and_process,
|
|
|
@@ -123,12 +122,15 @@ class Decode(BaseOperator):
|
|
|
sample['image'] = f.read()
|
|
|
sample.pop('im_file')
|
|
|
|
|
|
- im = sample['image']
|
|
|
- data = np.frombuffer(im, dtype='uint8')
|
|
|
- im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
|
|
|
- if 'keep_ori_im' in sample and sample['keep_ori_im']:
|
|
|
- sample['ori_image'] = im
|
|
|
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
|
|
+ try:
|
|
|
+ im = sample['image']
|
|
|
+ data = np.frombuffer(im, dtype='uint8')
|
|
|
+ im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
|
|
|
+ if 'keep_ori_im' in sample and sample['keep_ori_im']:
|
|
|
+ sample['ori_image'] = im
|
|
|
+ im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
|
|
+ except:
|
|
|
+ im = sample['image']
|
|
|
|
|
|
sample['image'] = im
|
|
|
if 'h' not in sample:
|
|
|
@@ -357,19 +359,26 @@ class RandomErasingImage(BaseOperator):
|
|
|
|
|
|
@register_op
|
|
|
class NormalizeImage(BaseOperator):
|
|
|
- def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1],
|
|
|
- is_scale=True):
|
|
|
+ def __init__(self,
|
|
|
+ mean=[0.485, 0.456, 0.406],
|
|
|
+ std=[0.229, 0.224, 0.225],
|
|
|
+ is_scale=True,
|
|
|
+ norm_type='mean_std'):
|
|
|
"""
|
|
|
Args:
|
|
|
mean (list): the pixel mean
|
|
|
std (list): the pixel variance
|
|
|
+ is_scale (bool): scale the pixel to [0,1]
|
|
|
+ norm_type (str): type in ['mean_std', 'none']
|
|
|
"""
|
|
|
super(NormalizeImage, self).__init__()
|
|
|
self.mean = mean
|
|
|
self.std = std
|
|
|
self.is_scale = is_scale
|
|
|
+ self.norm_type = norm_type
|
|
|
if not (isinstance(self.mean, list) and isinstance(self.std, list) and
|
|
|
- isinstance(self.is_scale, bool)):
|
|
|
+ isinstance(self.is_scale, bool) and
|
|
|
+ self.norm_type in ['mean_std', 'none']):
|
|
|
raise TypeError("{}: input type is invalid.".format(self))
|
|
|
from functools import reduce
|
|
|
if reduce(lambda x, y: x * y, self.std) == 0:
|
|
|
@@ -378,20 +387,20 @@ class NormalizeImage(BaseOperator):
|
|
|
def apply(self, sample, context=None):
|
|
|
"""Normalize the image.
|
|
|
Operators:
|
|
|
- 1.(optional) Scale the image to [0,1]
|
|
|
- 2. Each pixel minus mean and is divided by std
|
|
|
+ 1.(optional) Scale the pixel to [0,1]
|
|
|
+ 2.(optional) Each pixel minus mean and is divided by std
|
|
|
"""
|
|
|
im = sample['image']
|
|
|
im = im.astype(np.float32, copy=False)
|
|
|
- mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
|
|
|
- std = np.array(self.std)[np.newaxis, np.newaxis, :]
|
|
|
-
|
|
|
if self.is_scale:
|
|
|
- im = im / 255.0
|
|
|
-
|
|
|
- im -= mean
|
|
|
- im /= std
|
|
|
-
|
|
|
+ scale = 1.0 / 255.0
|
|
|
+ im *= scale
|
|
|
+
|
|
|
+ if self.norm_type == 'mean_std':
|
|
|
+ mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
|
|
|
+ std = np.array(self.std)[np.newaxis, np.newaxis, :]
|
|
|
+ im -= mean
|
|
|
+ im /= std
|
|
|
sample['image'] = im
|
|
|
return sample
|
|
|
|
|
|
@@ -448,6 +457,10 @@ class GridMask(BaseOperator):
|
|
|
@register_op
|
|
|
class RandomDistort(BaseOperator):
|
|
|
"""Random color distortion.
|
|
|
+ Note:
|
|
|
+ The 'probability' in [lower, upper, probability] is the probability of not using this transformation,
|
|
|
+ not the probability of using this transformation. And this only applies in this operator(RandomDistort),
|
|
|
+ 'probability' in other BaseOperator means the probability of using that transformation.
|
|
|
Args:
|
|
|
hue (list): hue settings. in [lower, upper, probability] format.
|
|
|
saturation (list): saturation settings. in [lower, upper, probability] format.
|
|
|
@@ -657,18 +670,6 @@ class RandomFlip(BaseOperator):
|
|
|
bbox[:, 2] = width - oldx1
|
|
|
return bbox
|
|
|
|
|
|
- def apply_rbox(self, bbox, width):
|
|
|
- oldx1 = bbox[:, 0].copy()
|
|
|
- oldx2 = bbox[:, 2].copy()
|
|
|
- oldx3 = bbox[:, 4].copy()
|
|
|
- oldx4 = bbox[:, 6].copy()
|
|
|
- bbox[:, 0] = width - oldx1
|
|
|
- bbox[:, 2] = width - oldx2
|
|
|
- bbox[:, 4] = width - oldx3
|
|
|
- bbox[:, 6] = width - oldx4
|
|
|
- bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
|
|
|
- return bbox
|
|
|
-
|
|
|
def apply(self, sample, context=None):
|
|
|
"""Filp the image and bounding box.
|
|
|
Operators:
|
|
|
@@ -700,10 +701,6 @@ class RandomFlip(BaseOperator):
|
|
|
if 'gt_segm' in sample and sample['gt_segm'].any():
|
|
|
sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
|
|
|
|
|
|
- if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
|
|
|
- sample['gt_rbox2poly'] = self.apply_rbox(sample['gt_rbox2poly'],
|
|
|
- width)
|
|
|
-
|
|
|
sample['flipped'] = True
|
|
|
sample['image'] = im
|
|
|
return sample
|
|
|
@@ -713,7 +710,7 @@ class RandomFlip(BaseOperator):
|
|
|
class Resize(BaseOperator):
|
|
|
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
|
|
|
"""
|
|
|
- Resize image to target size. if keep_ratio is True,
|
|
|
+ Resize image to target size. if keep_ratio is True,
|
|
|
resize the image's long side to the maximum of target_size
|
|
|
if keep_ratio is False, resize the image to target size(h, w)
|
|
|
Args:
|
|
|
@@ -824,7 +821,7 @@ class Resize(BaseOperator):
|
|
|
im_scale_x = resize_w / im_shape[1]
|
|
|
|
|
|
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
|
|
|
- sample['image'] = im
|
|
|
+ sample['image'] = im.astype(np.float32)
|
|
|
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
|
|
|
if 'scale_factor' in sample:
|
|
|
scale_factor = sample['scale_factor']
|
|
|
@@ -841,16 +838,6 @@ class Resize(BaseOperator):
|
|
|
[im_scale_x, im_scale_y],
|
|
|
[resize_w, resize_h])
|
|
|
|
|
|
- # apply rbox
|
|
|
- if 'gt_rbox2poly' in sample:
|
|
|
- if np.array(sample['gt_rbox2poly']).shape[1] != 8:
|
|
|
- logger.warning(
|
|
|
- "gt_rbox2poly's length shoule be 8, but actually is {}".
|
|
|
- format(len(sample['gt_rbox2poly'])))
|
|
|
- sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
|
|
|
- [im_scale_x, im_scale_y],
|
|
|
- [resize_w, resize_h])
|
|
|
-
|
|
|
# apply polygon
|
|
|
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
|
|
|
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
|
|
|
@@ -1054,7 +1041,7 @@ class CropWithSampling(BaseOperator):
|
|
|
[max sample, max trial, min scale, max scale,
|
|
|
min aspect ratio, max aspect ratio,
|
|
|
min overlap, max overlap]
|
|
|
- avoid_no_bbox (bool): whether to to avoid the
|
|
|
+ avoid_no_bbox (bool): whether to avoid the
|
|
|
situation where the box does not appear.
|
|
|
"""
|
|
|
super(CropWithSampling, self).__init__()
|
|
|
@@ -1145,7 +1132,7 @@ class CropWithDataAchorSampling(BaseOperator):
|
|
|
das_anchor_scales (list[float]): a list of anchor scales in data
|
|
|
anchor smapling.
|
|
|
min_size (float): minimum size of sampled bbox.
|
|
|
- avoid_no_bbox (bool): whether to to avoid the
|
|
|
+ avoid_no_bbox (bool): whether to avoid the
|
|
|
situation where the box does not appear.
|
|
|
"""
|
|
|
super(CropWithDataAchorSampling, self).__init__()
|
|
|
@@ -1504,6 +1491,11 @@ class RandomCrop(BaseOperator):
|
|
|
if 'is_crowd' in sample:
|
|
|
sample['is_crowd'] = np.take(
|
|
|
sample['is_crowd'], valid_ids, axis=0)
|
|
|
+
|
|
|
+ if 'difficult' in sample:
|
|
|
+ sample['difficult'] = np.take(
|
|
|
+ sample['difficult'], valid_ids, axis=0)
|
|
|
+
|
|
|
return sample
|
|
|
|
|
|
return sample
|
|
|
@@ -1604,7 +1596,7 @@ class RandomScaledCrop(BaseOperator):
|
|
|
@register_op
|
|
|
class Cutmix(BaseOperator):
|
|
|
def __init__(self, alpha=1.5, beta=1.5):
|
|
|
- """
|
|
|
+ """
|
|
|
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
|
|
|
Cutmix image and gt_bbbox/gt_score
|
|
|
Args:
|
|
|
@@ -1747,7 +1739,7 @@ class Mixup(BaseOperator):
|
|
|
gt_score2 = np.ones_like(sample[1]['gt_class'])
|
|
|
gt_score = np.concatenate(
|
|
|
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
|
|
|
- result['gt_score'] = gt_score
|
|
|
+ result['gt_score'] = gt_score.astype('float32')
|
|
|
if 'is_crowd' in sample[0]:
|
|
|
is_crowd1 = sample[0]['is_crowd']
|
|
|
is_crowd2 = sample[1]['is_crowd']
|
|
|
@@ -2029,13 +2021,14 @@ class Pad(BaseOperator):
|
|
|
if self.size:
|
|
|
h, w = self.size
|
|
|
assert (
|
|
|
- im_h < h and im_w < w
|
|
|
+ im_h <= h and im_w <= w
|
|
|
), '(h, w) of target size should be greater than (im_h, im_w)'
|
|
|
else:
|
|
|
- h = np.ceil(im_h / self.size_divisor) * self.size_divisor
|
|
|
- w = np.ceil(im_w / self.size_divisor) * self.size_divisor
|
|
|
+ h = int(np.ceil(im_h / self.size_divisor) * self.size_divisor)
|
|
|
+ w = int(np.ceil(im_w / self.size_divisor) * self.size_divisor)
|
|
|
|
|
|
if h == im_h and w == im_w:
|
|
|
+ sample['image'] = im.astype(np.float32)
|
|
|
return sample
|
|
|
|
|
|
if self.pad_mode == -1:
|
|
|
@@ -2106,44 +2099,30 @@ class Poly2Mask(BaseOperator):
|
|
|
|
|
|
|
|
|
@register_op
|
|
|
-class Rbox2Poly(BaseOperator):
|
|
|
- """
|
|
|
- Convert rbbox format to poly format.
|
|
|
+class AugmentHSV(BaseOperator):
|
|
|
+ """
|
|
|
+ Augment the SV channel of image data.
|
|
|
+ Args:
|
|
|
+ fraction (float): the fraction for augment. Default: 0.5.
|
|
|
+ is_bgr (bool): whether the image is BGR mode. Default: True.
|
|
|
+ hgain (float): H channel gains
|
|
|
+ sgain (float): S channel gains
|
|
|
+ vgain (float): V channel gains
|
|
|
"""
|
|
|
|
|
|
- def __init__(self):
|
|
|
- super(Rbox2Poly, self).__init__()
|
|
|
-
|
|
|
- def apply(self, sample, context=None):
|
|
|
- assert 'gt_rbox' in sample
|
|
|
- assert sample['gt_rbox'].shape[1] == 5
|
|
|
- rrects = sample['gt_rbox']
|
|
|
- x_ctr = rrects[:, 0]
|
|
|
- y_ctr = rrects[:, 1]
|
|
|
- width = rrects[:, 2]
|
|
|
- height = rrects[:, 3]
|
|
|
- x1 = x_ctr - width / 2.0
|
|
|
- y1 = y_ctr - height / 2.0
|
|
|
- x2 = x_ctr + width / 2.0
|
|
|
- y2 = y_ctr + height / 2.0
|
|
|
- sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
|
|
|
- polys = bbox_utils.rbox2poly_np(rrects)
|
|
|
- sample['gt_rbox2poly'] = polys
|
|
|
- return sample
|
|
|
-
|
|
|
-
|
|
|
-@register_op
|
|
|
-class AugmentHSV(BaseOperator):
|
|
|
- def __init__(self, fraction=0.50, is_bgr=True):
|
|
|
- """
|
|
|
- Augment the SV channel of image data.
|
|
|
- Args:
|
|
|
- fraction (float): the fraction for augment. Default: 0.5.
|
|
|
- is_bgr (bool): whether the image is BGR mode. Default: True.
|
|
|
- """
|
|
|
+ def __init__(self,
|
|
|
+ fraction=0.50,
|
|
|
+ is_bgr=True,
|
|
|
+ hgain=None,
|
|
|
+ sgain=None,
|
|
|
+ vgain=None):
|
|
|
super(AugmentHSV, self).__init__()
|
|
|
self.fraction = fraction
|
|
|
self.is_bgr = is_bgr
|
|
|
+ self.hgain = hgain
|
|
|
+ self.sgain = sgain
|
|
|
+ self.vgain = vgain
|
|
|
+ self.use_hsvgain = False if hgain is None else True
|
|
|
|
|
|
def apply(self, sample, context=None):
|
|
|
img = sample['image']
|
|
|
@@ -2151,27 +2130,39 @@ class AugmentHSV(BaseOperator):
|
|
|
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
|
|
else:
|
|
|
img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
|
|
|
- S = img_hsv[:, :, 1].astype(np.float32)
|
|
|
- V = img_hsv[:, :, 2].astype(np.float32)
|
|
|
|
|
|
- a = (random.random() * 2 - 1) * self.fraction + 1
|
|
|
- S *= a
|
|
|
- if a > 1:
|
|
|
- np.clip(S, a_min=0, a_max=255, out=S)
|
|
|
+ if self.use_hsvgain:
|
|
|
+ hsv_augs = np.random.uniform(
|
|
|
+ -1, 1, 3) * [self.hgain, self.sgain, self.vgain]
|
|
|
+ # random selection of h, s, v
|
|
|
+ hsv_augs *= np.random.randint(0, 2, 3)
|
|
|
+ img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
|
|
|
+ img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
|
|
|
+ img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
|
|
|
+
|
|
|
+ else:
|
|
|
+ S = img_hsv[:, :, 1].astype(np.float32)
|
|
|
+ V = img_hsv[:, :, 2].astype(np.float32)
|
|
|
+
|
|
|
+ a = (random.random() * 2 - 1) * self.fraction + 1
|
|
|
+ S *= a
|
|
|
+ if a > 1:
|
|
|
+ np.clip(S, a_min=0, a_max=255, out=S)
|
|
|
+
|
|
|
+ a = (random.random() * 2 - 1) * self.fraction + 1
|
|
|
+ V *= a
|
|
|
+ if a > 1:
|
|
|
+ np.clip(V, a_min=0, a_max=255, out=V)
|
|
|
|
|
|
- a = (random.random() * 2 - 1) * self.fraction + 1
|
|
|
- V *= a
|
|
|
- if a > 1:
|
|
|
- np.clip(V, a_min=0, a_max=255, out=V)
|
|
|
+ img_hsv[:, :, 1] = S.astype(np.uint8)
|
|
|
+ img_hsv[:, :, 2] = V.astype(np.uint8)
|
|
|
|
|
|
- img_hsv[:, :, 1] = S.astype(np.uint8)
|
|
|
- img_hsv[:, :, 2] = V.astype(np.uint8)
|
|
|
if self.is_bgr:
|
|
|
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
|
|
|
else:
|
|
|
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
|
|
|
|
|
|
- sample['image'] = img
|
|
|
+ sample['image'] = img.astype(np.float32)
|
|
|
return sample
|
|
|
|
|
|
|
|
|
@@ -2223,7 +2214,7 @@ class RandomResizeCrop(BaseOperator):
|
|
|
'long', resize the image's long side to the maximum of target_size, if keep_ratio is
|
|
|
True and mode is 'short', resize the image's short side to the minimum of target_size.
|
|
|
cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
|
|
|
- mode (str): resize mode, `long` or `short`. Details see resizes.
|
|
|
+ mode (str): resize mode, `long` or `short`. Details see resizes.
|
|
|
prob (float): probability of this op.
|
|
|
keep_ratio (bool): whether keep_ratio or not, default true
|
|
|
interp (int): the interpolation method
|
|
|
@@ -2425,16 +2416,6 @@ class RandomResizeCrop(BaseOperator):
|
|
|
[im_scale_x, im_scale_y],
|
|
|
[resize_w, resize_h])
|
|
|
|
|
|
- # apply rbox
|
|
|
- if 'gt_rbox2poly' in sample:
|
|
|
- if np.array(sample['gt_rbox2poly']).shape[1] != 8:
|
|
|
- logger.warn(
|
|
|
- "gt_rbox2poly's length shoule be 8, but actually is {}".
|
|
|
- format(len(sample['gt_rbox2poly'])))
|
|
|
- sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
|
|
|
- [im_scale_x, im_scale_y],
|
|
|
- [resize_w, resize_h])
|
|
|
-
|
|
|
# apply polygon
|
|
|
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
|
|
|
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
|
|
|
@@ -2892,7 +2873,7 @@ class FlipWarpAffine(BaseOperator):
|
|
|
"""FlipWarpAffine
|
|
|
1. Random Crop
|
|
|
2. Flip the image horizontal
|
|
|
- 3. Warp affine the image
|
|
|
+ 3. Warp affine the image
|
|
|
"""
|
|
|
super(FlipWarpAffine, self).__init__()
|
|
|
self.keep_res = keep_res
|
|
|
@@ -3013,3 +2994,409 @@ class CenterRandColor(BaseOperator):
|
|
|
img = func(img, img_gray)
|
|
|
sample['image'] = img
|
|
|
return sample
|
|
|
+
|
|
|
+
|
|
|
+@register_op
|
|
|
+class Mosaic(BaseOperator):
|
|
|
+ """ Mosaic operator for image and gt_bboxes
|
|
|
+ The code is based on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/datasets/mosaicdetection.py
|
|
|
+
|
|
|
+ 1. get mosaic coords
|
|
|
+ 2. clip bbox and get mosaic_labels
|
|
|
+ 3. random_affine augment
|
|
|
+ 4. Mixup augment as copypaste (optinal), not used in tiny/nano
|
|
|
+
|
|
|
+ Args:
|
|
|
+ prob (float): probability of using Mosaic, 1.0 as default
|
|
|
+ input_dim (list[int]): input shape
|
|
|
+ degrees (list[2]): the rotate range to apply, transform range is [min, max]
|
|
|
+ translate (list[2]): the translate range to apply, transform range is [min, max]
|
|
|
+ scale (list[2]): the scale range to apply, transform range is [min, max]
|
|
|
+ shear (list[2]): the shear range to apply, transform range is [min, max]
|
|
|
+ enable_mixup (bool): whether to enable Mixup or not
|
|
|
+ mixup_prob (float): probability of using Mixup, 1.0 as default
|
|
|
+ mixup_scale (list[int]): scale range of Mixup
|
|
|
+ remove_outside_box (bool): whether remove outside boxes, False as
|
|
|
+ default in COCO dataset, True in MOT dataset
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self,
|
|
|
+ prob=1.0,
|
|
|
+ input_dim=[640, 640],
|
|
|
+ degrees=[-10, 10],
|
|
|
+ translate=[-0.1, 0.1],
|
|
|
+ scale=[0.1, 2],
|
|
|
+ shear=[-2, 2],
|
|
|
+ enable_mixup=True,
|
|
|
+ mixup_prob=1.0,
|
|
|
+ mixup_scale=[0.5, 1.5],
|
|
|
+ remove_outside_box=False):
|
|
|
+ super(Mosaic, self).__init__()
|
|
|
+ self.prob = prob
|
|
|
+ if isinstance(input_dim, Integral):
|
|
|
+ input_dim = [input_dim, input_dim]
|
|
|
+ self.input_dim = input_dim
|
|
|
+ self.degrees = degrees
|
|
|
+ self.translate = translate
|
|
|
+ self.scale = scale
|
|
|
+ self.shear = shear
|
|
|
+ self.enable_mixup = enable_mixup
|
|
|
+ self.mixup_prob = mixup_prob
|
|
|
+ self.mixup_scale = mixup_scale
|
|
|
+ self.remove_outside_box = remove_outside_box
|
|
|
+
|
|
|
+ def get_mosaic_coords(self, mosaic_idx, xc, yc, w, h, input_h, input_w):
|
|
|
+ # (x1, y1, x2, y2) means coords in large image,
|
|
|
+ # small_coords means coords in small image in mosaic aug.
|
|
|
+ if mosaic_idx == 0:
|
|
|
+ # top left
|
|
|
+ x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
|
|
|
+ small_coords = w - (x2 - x1), h - (y2 - y1), w, h
|
|
|
+ elif mosaic_idx == 1:
|
|
|
+ # top right
|
|
|
+ x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
|
|
|
+ small_coords = 0, h - (y2 - y1), min(w, x2 - x1), h
|
|
|
+ elif mosaic_idx == 2:
|
|
|
+ # bottom left
|
|
|
+ x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
|
|
|
+ small_coords = w - (x2 - x1), 0, w, min(y2 - y1, h)
|
|
|
+ elif mosaic_idx == 3:
|
|
|
+ # bottom right
|
|
|
+ x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2,
|
|
|
+ yc + h)
|
|
|
+ small_coords = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
|
|
|
+
|
|
|
+ return (x1, y1, x2, y2), small_coords
|
|
|
+
|
|
|
+ def random_affine_augment(self,
|
|
|
+ img,
|
|
|
+ labels=[],
|
|
|
+ input_dim=[640, 640],
|
|
|
+ degrees=[-10, 10],
|
|
|
+ scales=[0.1, 2],
|
|
|
+ shears=[-2, 2],
|
|
|
+ translates=[-0.1, 0.1]):
|
|
|
+ # random rotation and scale
|
|
|
+ degree = random.uniform(degrees[0], degrees[1])
|
|
|
+ scale = random.uniform(scales[0], scales[1])
|
|
|
+ assert scale > 0, "Argument scale should be positive."
|
|
|
+ R = cv2.getRotationMatrix2D(angle=degree, center=(0, 0), scale=scale)
|
|
|
+ M = np.ones([2, 3])
|
|
|
+
|
|
|
+ # random shear
|
|
|
+ shear = random.uniform(shears[0], shears[1])
|
|
|
+ shear_x = math.tan(shear * math.pi / 180)
|
|
|
+ shear_y = math.tan(shear * math.pi / 180)
|
|
|
+ M[0] = R[0] + shear_y * R[1]
|
|
|
+ M[1] = R[1] + shear_x * R[0]
|
|
|
+
|
|
|
+ # random translation
|
|
|
+ translate = random.uniform(translates[0], translates[1])
|
|
|
+ translation_x = translate * input_dim[0]
|
|
|
+ translation_y = translate * input_dim[1]
|
|
|
+ M[0, 2] = translation_x
|
|
|
+ M[1, 2] = translation_y
|
|
|
+
|
|
|
+ # warpAffine
|
|
|
+ img = cv2.warpAffine(
|
|
|
+ img, M, dsize=tuple(input_dim), borderValue=(114, 114, 114))
|
|
|
+
|
|
|
+ num_gts = len(labels)
|
|
|
+ if num_gts > 0:
|
|
|
+ # warp corner points
|
|
|
+ corner_points = np.ones((4 * num_gts, 3))
|
|
|
+ corner_points[:, :2] = labels[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
|
|
|
+ 4 * num_gts, 2) # x1y1, x2y2, x1y2, x2y1
|
|
|
+ # apply affine transform
|
|
|
+ corner_points = corner_points @M.T
|
|
|
+ corner_points = corner_points.reshape(num_gts, 8)
|
|
|
+
|
|
|
+ # create new boxes
|
|
|
+ corner_xs = corner_points[:, 0::2]
|
|
|
+ corner_ys = corner_points[:, 1::2]
|
|
|
+ new_bboxes = np.concatenate((corner_xs.min(1), corner_ys.min(1),
|
|
|
+ corner_xs.max(1), corner_ys.max(1)))
|
|
|
+ new_bboxes = new_bboxes.reshape(4, num_gts).T
|
|
|
+
|
|
|
+ # clip boxes
|
|
|
+ new_bboxes[:, 0::2] = np.clip(new_bboxes[:, 0::2], 0, input_dim[0])
|
|
|
+ new_bboxes[:, 1::2] = np.clip(new_bboxes[:, 1::2], 0, input_dim[1])
|
|
|
+ labels[:, :4] = new_bboxes
|
|
|
+
|
|
|
+ return img, labels
|
|
|
+
|
|
|
+ def __call__(self, sample, context=None):
|
|
|
+ if not isinstance(sample, Sequence):
|
|
|
+ return sample
|
|
|
+
|
|
|
+ assert len(
|
|
|
+ sample) == 5, "Mosaic needs 5 samples, 4 for mosaic and 1 for mixup."
|
|
|
+ if np.random.uniform(0., 1.) > self.prob:
|
|
|
+ return sample[0]
|
|
|
+
|
|
|
+ mosaic_gt_bbox, mosaic_gt_class, mosaic_is_crowd, mosaic_difficult = [], [], [], []
|
|
|
+ input_h, input_w = self.input_dim
|
|
|
+ yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
|
|
|
+ xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
|
|
|
+ mosaic_img = np.full((input_h * 2, input_w * 2, 3), 114, dtype=np.uint8)
|
|
|
+
|
|
|
+ # 1. get mosaic coords
|
|
|
+ for mosaic_idx, sp in enumerate(sample[:4]):
|
|
|
+ img = sp['image']
|
|
|
+ gt_bbox = sp['gt_bbox']
|
|
|
+ h0, w0 = img.shape[:2]
|
|
|
+ scale = min(1. * input_h / h0, 1. * input_w / w0)
|
|
|
+ img = cv2.resize(
|
|
|
+ img, (int(w0 * scale), int(h0 * scale)),
|
|
|
+ interpolation=cv2.INTER_LINEAR)
|
|
|
+ (h, w, c) = img.shape[:3]
|
|
|
+
|
|
|
+ # suffix l means large image, while s means small image in mosaic aug.
|
|
|
+ (l_x1, l_y1, l_x2, l_y2), (
|
|
|
+ s_x1, s_y1, s_x2, s_y2) = self.get_mosaic_coords(
|
|
|
+ mosaic_idx, xc, yc, w, h, input_h, input_w)
|
|
|
+
|
|
|
+ mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
|
|
|
+ padw, padh = l_x1 - s_x1, l_y1 - s_y1
|
|
|
+
|
|
|
+ # Normalized xywh to pixel xyxy format
|
|
|
+ _gt_bbox = gt_bbox.copy()
|
|
|
+ if len(gt_bbox) > 0:
|
|
|
+ _gt_bbox[:, 0] = scale * gt_bbox[:, 0] + padw
|
|
|
+ _gt_bbox[:, 1] = scale * gt_bbox[:, 1] + padh
|
|
|
+ _gt_bbox[:, 2] = scale * gt_bbox[:, 2] + padw
|
|
|
+ _gt_bbox[:, 3] = scale * gt_bbox[:, 3] + padh
|
|
|
+
|
|
|
+ mosaic_gt_bbox.append(_gt_bbox)
|
|
|
+ mosaic_gt_class.append(sp['gt_class'])
|
|
|
+ if 'is_crowd' in sp:
|
|
|
+ mosaic_is_crowd.append(sp['is_crowd'])
|
|
|
+ if 'difficult' in sp:
|
|
|
+ mosaic_difficult.append(sp['difficult'])
|
|
|
+
|
|
|
+ # 2. clip bbox and get mosaic_labels([gt_bbox, gt_class, is_crowd])
|
|
|
+ if len(mosaic_gt_bbox):
|
|
|
+ mosaic_gt_bbox = np.concatenate(mosaic_gt_bbox, 0)
|
|
|
+ mosaic_gt_class = np.concatenate(mosaic_gt_class, 0)
|
|
|
+ if mosaic_is_crowd:
|
|
|
+ mosaic_is_crowd = np.concatenate(mosaic_is_crowd, 0)
|
|
|
+ mosaic_labels = np.concatenate([
|
|
|
+ mosaic_gt_bbox,
|
|
|
+ mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
|
|
|
+ mosaic_is_crowd.astype(mosaic_gt_bbox.dtype)
|
|
|
+ ], 1)
|
|
|
+ elif mosaic_difficult:
|
|
|
+ mosaic_difficult = np.concatenate(mosaic_difficult, 0)
|
|
|
+ mosaic_labels = np.concatenate([
|
|
|
+ mosaic_gt_bbox,
|
|
|
+ mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
|
|
|
+ mosaic_difficult.astype(mosaic_gt_bbox.dtype)
|
|
|
+ ], 1)
|
|
|
+ else:
|
|
|
+ mosaic_labels = np.concatenate([
|
|
|
+ mosaic_gt_bbox, mosaic_gt_class.astype(mosaic_gt_bbox.dtype)
|
|
|
+ ], 1)
|
|
|
+ if self.remove_outside_box:
|
|
|
+ # for MOT dataset
|
|
|
+ flag1 = mosaic_gt_bbox[:, 0] < 2 * input_w
|
|
|
+ flag2 = mosaic_gt_bbox[:, 2] > 0
|
|
|
+ flag3 = mosaic_gt_bbox[:, 1] < 2 * input_h
|
|
|
+ flag4 = mosaic_gt_bbox[:, 3] > 0
|
|
|
+ flag_all = flag1 * flag2 * flag3 * flag4
|
|
|
+ mosaic_labels = mosaic_labels[flag_all]
|
|
|
+ else:
|
|
|
+ mosaic_labels[:, 0] = np.clip(mosaic_labels[:, 0], 0,
|
|
|
+ 2 * input_w)
|
|
|
+ mosaic_labels[:, 1] = np.clip(mosaic_labels[:, 1], 0,
|
|
|
+ 2 * input_h)
|
|
|
+ mosaic_labels[:, 2] = np.clip(mosaic_labels[:, 2], 0,
|
|
|
+ 2 * input_w)
|
|
|
+ mosaic_labels[:, 3] = np.clip(mosaic_labels[:, 3], 0,
|
|
|
+ 2 * input_h)
|
|
|
+ else:
|
|
|
+ mosaic_labels = np.zeros((1, 6))
|
|
|
+
|
|
|
+ # 3. random_affine augment
|
|
|
+ mosaic_img, mosaic_labels = self.random_affine_augment(
|
|
|
+ mosaic_img,
|
|
|
+ mosaic_labels,
|
|
|
+ input_dim=self.input_dim,
|
|
|
+ degrees=self.degrees,
|
|
|
+ translates=self.translate,
|
|
|
+ scales=self.scale,
|
|
|
+ shears=self.shear)
|
|
|
+
|
|
|
+ # 4. Mixup augment as copypaste, https://arxiv.org/abs/2012.07177
|
|
|
+ # optinal, not used(enable_mixup=False) in tiny/nano
|
|
|
+ if (self.enable_mixup and not len(mosaic_labels) == 0 and
|
|
|
+ random.random() < self.mixup_prob):
|
|
|
+ sample_mixup = sample[4]
|
|
|
+ mixup_img = sample_mixup['image']
|
|
|
+ if 'is_crowd' in sample_mixup:
|
|
|
+ cp_labels = np.concatenate([
|
|
|
+ sample_mixup['gt_bbox'],
|
|
|
+ sample_mixup['gt_class'].astype(mosaic_labels.dtype),
|
|
|
+ sample_mixup['is_crowd'].astype(mosaic_labels.dtype)
|
|
|
+ ], 1)
|
|
|
+ elif 'difficult' in sample_mixup:
|
|
|
+ cp_labels = np.concatenate([
|
|
|
+ sample_mixup['gt_bbox'],
|
|
|
+ sample_mixup['gt_class'].astype(mosaic_labels.dtype),
|
|
|
+ sample_mixup['difficult'].astype(mosaic_labels.dtype)
|
|
|
+ ], 1)
|
|
|
+ else:
|
|
|
+ cp_labels = np.concatenate([
|
|
|
+ sample_mixup['gt_bbox'],
|
|
|
+ sample_mixup['gt_class'].astype(mosaic_labels.dtype)
|
|
|
+ ], 1)
|
|
|
+ mosaic_img, mosaic_labels = self.mixup_augment(
|
|
|
+ mosaic_img, mosaic_labels, self.input_dim, cp_labels, mixup_img)
|
|
|
+
|
|
|
+ sample0 = sample[0]
|
|
|
+ sample0['image'] = mosaic_img.astype(np.uint8) # can not be float32
|
|
|
+ sample0['h'] = float(mosaic_img.shape[0])
|
|
|
+ sample0['w'] = float(mosaic_img.shape[1])
|
|
|
+ sample0['im_shape'][0] = sample0['h']
|
|
|
+ sample0['im_shape'][1] = sample0['w']
|
|
|
+ sample0['gt_bbox'] = mosaic_labels[:, :4].astype(np.float32)
|
|
|
+ sample0['gt_class'] = mosaic_labels[:, 4:5].astype(np.float32)
|
|
|
+ if 'is_crowd' in sample[0]:
|
|
|
+ sample0['is_crowd'] = mosaic_labels[:, 5:6].astype(np.float32)
|
|
|
+ if 'difficult' in sample[0]:
|
|
|
+ sample0['difficult'] = mosaic_labels[:, 5:6].astype(np.float32)
|
|
|
+ return sample0
|
|
|
+
|
|
|
+ def mixup_augment(self, origin_img, origin_labels, input_dim, cp_labels,
|
|
|
+ img):
|
|
|
+ jit_factor = random.uniform(*self.mixup_scale)
|
|
|
+ FLIP = random.uniform(0, 1) > 0.5
|
|
|
+ if len(img.shape) == 3:
|
|
|
+ cp_img = np.ones(
|
|
|
+ (input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114
|
|
|
+ else:
|
|
|
+ cp_img = np.ones(input_dim, dtype=np.uint8) * 114
|
|
|
+
|
|
|
+ cp_scale_ratio = min(input_dim[0] / img.shape[0],
|
|
|
+ input_dim[1] / img.shape[1])
|
|
|
+ resized_img = cv2.resize(
|
|
|
+ img, (int(img.shape[1] * cp_scale_ratio),
|
|
|
+ int(img.shape[0] * cp_scale_ratio)),
|
|
|
+ interpolation=cv2.INTER_LINEAR)
|
|
|
+
|
|
|
+ cp_img[:int(img.shape[0] * cp_scale_ratio), :int(img.shape[
|
|
|
+ 1] * cp_scale_ratio)] = resized_img
|
|
|
+
|
|
|
+ cp_img = cv2.resize(cp_img, (int(cp_img.shape[1] * jit_factor),
|
|
|
+ int(cp_img.shape[0] * jit_factor)))
|
|
|
+ cp_scale_ratio *= jit_factor
|
|
|
+
|
|
|
+ if FLIP:
|
|
|
+ cp_img = cp_img[:, ::-1, :]
|
|
|
+
|
|
|
+ origin_h, origin_w = cp_img.shape[:2]
|
|
|
+ target_h, target_w = origin_img.shape[:2]
|
|
|
+ padded_img = np.zeros(
|
|
|
+ (max(origin_h, target_h), max(origin_w, target_w), 3),
|
|
|
+ dtype=np.uint8)
|
|
|
+ padded_img[:origin_h, :origin_w] = cp_img
|
|
|
+
|
|
|
+ x_offset, y_offset = 0, 0
|
|
|
+ if padded_img.shape[0] > target_h:
|
|
|
+ y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
|
|
|
+ if padded_img.shape[1] > target_w:
|
|
|
+ x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
|
|
|
+ padded_cropped_img = padded_img[y_offset:y_offset + target_h, x_offset:
|
|
|
+ x_offset + target_w]
|
|
|
+
|
|
|
+ # adjust boxes
|
|
|
+ cp_bboxes_origin_np = cp_labels[:, :4].copy()
|
|
|
+ cp_bboxes_origin_np[:, 0::2] = np.clip(cp_bboxes_origin_np[:, 0::2] *
|
|
|
+ cp_scale_ratio, 0, origin_w)
|
|
|
+ cp_bboxes_origin_np[:, 1::2] = np.clip(cp_bboxes_origin_np[:, 1::2] *
|
|
|
+ cp_scale_ratio, 0, origin_h)
|
|
|
+
|
|
|
+ if FLIP:
|
|
|
+ cp_bboxes_origin_np[:, 0::2] = (
|
|
|
+ origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1])
|
|
|
+ cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
|
|
|
+ if self.remove_outside_box:
|
|
|
+ # for MOT dataset
|
|
|
+ cp_bboxes_transformed_np[:, 0::2] -= x_offset
|
|
|
+ cp_bboxes_transformed_np[:, 1::2] -= y_offset
|
|
|
+ else:
|
|
|
+ cp_bboxes_transformed_np[:, 0::2] = np.clip(
|
|
|
+ cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w)
|
|
|
+ cp_bboxes_transformed_np[:, 1::2] = np.clip(
|
|
|
+ cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h)
|
|
|
+
|
|
|
+ cls_labels = cp_labels[:, 4:5].copy()
|
|
|
+ box_labels = cp_bboxes_transformed_np
|
|
|
+ if cp_labels.shape[-1] == 6:
|
|
|
+ crd_labels = cp_labels[:, 5:6].copy()
|
|
|
+ labels = np.hstack((box_labels, cls_labels, crd_labels))
|
|
|
+ else:
|
|
|
+ labels = np.hstack((box_labels, cls_labels))
|
|
|
+ if self.remove_outside_box:
|
|
|
+ labels = labels[labels[:, 0] < target_w]
|
|
|
+ labels = labels[labels[:, 2] > 0]
|
|
|
+ labels = labels[labels[:, 1] < target_h]
|
|
|
+ labels = labels[labels[:, 3] > 0]
|
|
|
+
|
|
|
+ origin_labels = np.vstack((origin_labels, labels))
|
|
|
+ origin_img = origin_img.astype(np.float32)
|
|
|
+ origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(
|
|
|
+ np.float32)
|
|
|
+
|
|
|
+ return origin_img.astype(np.uint8), origin_labels
|
|
|
+
|
|
|
+
|
|
|
+@register_op
|
|
|
+class PadResize(BaseOperator):
|
|
|
+ """ PadResize for image and gt_bbbox
|
|
|
+
|
|
|
+ Args:
|
|
|
+ target_size (list[int]): input shape
|
|
|
+ fill_value (float): pixel value of padded image
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, target_size, fill_value=114):
|
|
|
+ super(PadResize, self).__init__()
|
|
|
+ if isinstance(target_size, Integral):
|
|
|
+ target_size = [target_size, target_size]
|
|
|
+ self.target_size = target_size
|
|
|
+ self.fill_value = fill_value
|
|
|
+
|
|
|
+ def _resize(self, img, bboxes, labels):
|
|
|
+ ratio = min(self.target_size[0] / img.shape[0],
|
|
|
+ self.target_size[1] / img.shape[1])
|
|
|
+ w, h = int(img.shape[1] * ratio), int(img.shape[0] * ratio)
|
|
|
+ resized_img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR)
|
|
|
+
|
|
|
+ if len(bboxes) > 0:
|
|
|
+ bboxes *= ratio
|
|
|
+ mask = np.minimum(bboxes[:, 2] - bboxes[:, 0],
|
|
|
+ bboxes[:, 3] - bboxes[:, 1]) > 1
|
|
|
+ bboxes = bboxes[mask]
|
|
|
+ labels = labels[mask]
|
|
|
+ return resized_img, bboxes, labels
|
|
|
+
|
|
|
+ def _pad(self, img):
|
|
|
+ h, w, _ = img.shape
|
|
|
+ if h == self.target_size[0] and w == self.target_size[1]:
|
|
|
+ return img
|
|
|
+ padded_img = np.full(
|
|
|
+ (self.target_size[0], self.target_size[1], 3),
|
|
|
+ self.fill_value,
|
|
|
+ dtype=np.uint8)
|
|
|
+ padded_img[:h, :w] = img
|
|
|
+ return padded_img
|
|
|
+
|
|
|
+ def apply(self, sample, context=None):
|
|
|
+ image = sample['image']
|
|
|
+ bboxes = sample['gt_bbox']
|
|
|
+ labels = sample['gt_class']
|
|
|
+ image, bboxes, labels = self._resize(image, bboxes, labels)
|
|
|
+ sample['image'] = self._pad(image).astype(np.float32)
|
|
|
+ sample['gt_bbox'] = bboxes
|
|
|
+ sample['gt_class'] = labels
|
|
|
+ return sample
|