|
@@ -36,19 +36,12 @@ logger = setup_logger(__name__)
|
|
|
registered_ops = []
|
|
|
|
|
|
__all__ = [
|
|
|
- 'RandomAffine',
|
|
|
- 'KeyPointFlip',
|
|
|
- 'TagGenerate',
|
|
|
- 'ToHeatmaps',
|
|
|
- 'NormalizePermute',
|
|
|
- 'EvalAffine',
|
|
|
- 'RandomFlipHalfBodyTransform',
|
|
|
- 'TopDownAffine',
|
|
|
- 'ToHeatmapsTopDown',
|
|
|
- 'ToHeatmapsTopDown_DARK',
|
|
|
- 'ToHeatmapsTopDown_UDP',
|
|
|
- 'TopDownEvalAffine',
|
|
|
- 'AugmentationbyInformantionDropping',
|
|
|
+ 'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps',
|
|
|
+ 'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform',
|
|
|
+ 'TopDownAffine', 'ToHeatmapsTopDown', 'ToHeatmapsTopDown_DARK',
|
|
|
+ 'ToHeatmapsTopDown_UDP', 'TopDownEvalAffine',
|
|
|
+ 'AugmentationbyInformantionDropping', 'SinglePoseAffine', 'NoiseJitter',
|
|
|
+ 'FlipPose', 'PETR_Resize'
|
|
|
]
|
|
|
|
|
|
|
|
@@ -72,38 +65,77 @@ class KeyPointFlip(object):
|
|
|
|
|
|
"""
|
|
|
|
|
|
- def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
|
|
|
+ def __init__(self, flip_permutation, hmsize=None, flip_prob=0.5):
|
|
|
super(KeyPointFlip, self).__init__()
|
|
|
assert isinstance(flip_permutation, Sequence)
|
|
|
self.flip_permutation = flip_permutation
|
|
|
self.flip_prob = flip_prob
|
|
|
self.hmsize = hmsize
|
|
|
|
|
|
- def __call__(self, records):
|
|
|
- image = records['image']
|
|
|
- kpts_lst = records['joints']
|
|
|
- mask_lst = records['mask']
|
|
|
- flip = np.random.random() < self.flip_prob
|
|
|
- if flip:
|
|
|
- image = image[:, ::-1]
|
|
|
- for idx, hmsize in enumerate(self.hmsize):
|
|
|
- if len(mask_lst) > idx:
|
|
|
- mask_lst[idx] = mask_lst[idx][:, ::-1]
|
|
|
+ def _flipjoints(self, records, sizelst):
|
|
|
+ '''
|
|
|
+ records['gt_joints'] is Sequence in higherhrnet
|
|
|
+ '''
|
|
|
+ if not ('gt_joints' in records and len(records['gt_joints']) > 0):
|
|
|
+ return records
|
|
|
+
|
|
|
+ kpts_lst = records['gt_joints']
|
|
|
+ if isinstance(kpts_lst, Sequence):
|
|
|
+ for idx, hmsize in enumerate(sizelst):
|
|
|
if kpts_lst[idx].ndim == 3:
|
|
|
kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
|
|
|
else:
|
|
|
kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
|
|
|
kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
|
|
|
- kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
|
|
|
- kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
|
|
|
- kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
|
|
|
- kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
|
|
|
- kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
|
|
|
- records['image'] = image
|
|
|
- records['joints'] = kpts_lst
|
|
|
+ else:
|
|
|
+ hmsize = sizelst[0]
|
|
|
+ if kpts_lst.ndim == 3:
|
|
|
+ kpts_lst = kpts_lst[:, self.flip_permutation]
|
|
|
+ else:
|
|
|
+ kpts_lst = kpts_lst[self.flip_permutation]
|
|
|
+ kpts_lst[..., 0] = hmsize - kpts_lst[..., 0]
|
|
|
+
|
|
|
+ records['gt_joints'] = kpts_lst
|
|
|
+ return records
|
|
|
+
|
|
|
+ def _flipmask(self, records, sizelst):
|
|
|
+ if not 'mask' in records:
|
|
|
+ return records
|
|
|
+
|
|
|
+ mask_lst = records['mask']
|
|
|
+ for idx, hmsize in enumerate(sizelst):
|
|
|
+ if len(mask_lst) > idx:
|
|
|
+ mask_lst[idx] = mask_lst[idx][:, ::-1]
|
|
|
records['mask'] = mask_lst
|
|
|
return records
|
|
|
|
|
|
+ def _flipbbox(self, records, sizelst):
|
|
|
+ if not 'gt_bbox' in records:
|
|
|
+ return records
|
|
|
+
|
|
|
+ bboxes = records['gt_bbox']
|
|
|
+ hmsize = sizelst[0]
|
|
|
+ bboxes[:, 0::2] = hmsize - bboxes[:, 0::2][:, ::-1]
|
|
|
+ bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, hmsize)
|
|
|
+ records['gt_bbox'] = bboxes
|
|
|
+ return records
|
|
|
+
|
|
|
+ def __call__(self, records):
|
|
|
+ flip = np.random.random() < self.flip_prob
|
|
|
+ if flip:
|
|
|
+ image = records['image']
|
|
|
+ image = image[:, ::-1]
|
|
|
+ records['image'] = image
|
|
|
+ if self.hmsize is None:
|
|
|
+ sizelst = [image.shape[1]]
|
|
|
+ else:
|
|
|
+ sizelst = self.hmsize
|
|
|
+ self._flipjoints(records, sizelst)
|
|
|
+ self._flipmask(records, sizelst)
|
|
|
+ self._flipbbox(records, sizelst)
|
|
|
+
|
|
|
+ return records
|
|
|
+
|
|
|
|
|
|
@register_keypointop
|
|
|
class RandomAffine(object):
|
|
@@ -115,7 +147,7 @@ class RandomAffine(object):
|
|
|
max_scale (list[2]): the scale range to apply, transform range is [min, max]
|
|
|
max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
|
|
|
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
|
|
|
- trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
|
|
|
+ trainsize (list[2]): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
|
|
|
scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
|
|
|
records(dict): the dict contained the image, mask and coords
|
|
|
|
|
@@ -128,9 +160,10 @@ class RandomAffine(object):
|
|
|
max_degree=30,
|
|
|
scale=[0.75, 1.5],
|
|
|
max_shift=0.2,
|
|
|
- hmsize=[128, 256],
|
|
|
- trainsize=512,
|
|
|
- scale_type='short'):
|
|
|
+ hmsize=None,
|
|
|
+ trainsize=[512, 512],
|
|
|
+ scale_type='short',
|
|
|
+ boldervalue=[114, 114, 114]):
|
|
|
super(RandomAffine, self).__init__()
|
|
|
self.max_degree = max_degree
|
|
|
self.min_scale = scale[0]
|
|
@@ -139,8 +172,9 @@ class RandomAffine(object):
|
|
|
self.hmsize = hmsize
|
|
|
self.trainsize = trainsize
|
|
|
self.scale_type = scale_type
|
|
|
+ self.boldervalue = boldervalue
|
|
|
|
|
|
- def _get_affine_matrix(self, center, scale, res, rot=0):
|
|
|
+ def _get_affine_matrix_old(self, center, scale, res, rot=0):
|
|
|
"""Generate transformation matrix."""
|
|
|
h = scale
|
|
|
t = np.zeros((3, 3), dtype=np.float32)
|
|
@@ -166,21 +200,94 @@ class RandomAffine(object):
|
|
|
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
|
|
|
return t
|
|
|
|
|
|
+ def _get_affine_matrix(self, center, scale, res, rot=0):
|
|
|
+ """Generate transformation matrix."""
|
|
|
+ w, h = scale
|
|
|
+ t = np.zeros((3, 3), dtype=np.float32)
|
|
|
+ t[0, 0] = float(res[0]) / w
|
|
|
+ t[1, 1] = float(res[1]) / h
|
|
|
+ t[0, 2] = res[0] * (-float(center[0]) / w + .5)
|
|
|
+ t[1, 2] = res[1] * (-float(center[1]) / h + .5)
|
|
|
+ t[2, 2] = 1
|
|
|
+ if rot != 0:
|
|
|
+ rot = -rot # To match direction of rotation from cropping
|
|
|
+ rot_mat = np.zeros((3, 3), dtype=np.float32)
|
|
|
+ rot_rad = rot * np.pi / 180
|
|
|
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
|
|
|
+ rot_mat[0, :2] = [cs, -sn]
|
|
|
+ rot_mat[1, :2] = [sn, cs]
|
|
|
+ rot_mat[2, 2] = 1
|
|
|
+ # Need to rotate around center
|
|
|
+ t_mat = np.eye(3)
|
|
|
+ t_mat[0, 2] = -res[0] / 2
|
|
|
+ t_mat[1, 2] = -res[1] / 2
|
|
|
+ t_inv = t_mat.copy()
|
|
|
+ t_inv[:2, 2] *= -1
|
|
|
+ t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
|
|
|
+ return t
|
|
|
+
|
|
|
+ def _affine_joints_mask(self,
|
|
|
+ degree,
|
|
|
+ center,
|
|
|
+ roi_size,
|
|
|
+ dsize,
|
|
|
+ keypoints=None,
|
|
|
+ heatmap_mask=None,
|
|
|
+ gt_bbox=None):
|
|
|
+ kpts = None
|
|
|
+ mask = None
|
|
|
+ bbox = None
|
|
|
+ mask_affine_mat = self._get_affine_matrix(center, roi_size, dsize,
|
|
|
+ degree)[:2]
|
|
|
+ if heatmap_mask is not None:
|
|
|
+ mask = cv2.warpAffine(heatmap_mask, mask_affine_mat, dsize)
|
|
|
+ mask = ((mask / 255) > 0.5).astype(np.float32)
|
|
|
+ if keypoints is not None:
|
|
|
+ kpts = copy.deepcopy(keypoints)
|
|
|
+ kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
|
|
|
+ mask_affine_mat)
|
|
|
+ kpts[(kpts[..., 0]) > dsize[0], :] = 0
|
|
|
+ kpts[(kpts[..., 1]) > dsize[1], :] = 0
|
|
|
+ kpts[(kpts[..., 0]) < 0, :] = 0
|
|
|
+ kpts[(kpts[..., 1]) < 0, :] = 0
|
|
|
+ if gt_bbox is not None:
|
|
|
+ temp_bbox = gt_bbox[:, [0, 3, 2, 1]]
|
|
|
+ cat_bbox = np.concatenate((gt_bbox, temp_bbox), axis=-1)
|
|
|
+ gt_bbox_warped = warp_affine_joints(cat_bbox, mask_affine_mat)
|
|
|
+ bbox = np.zeros_like(gt_bbox)
|
|
|
+ bbox[:, 0] = gt_bbox_warped[:, 0::2].min(1).clip(0, dsize[0])
|
|
|
+ bbox[:, 2] = gt_bbox_warped[:, 0::2].max(1).clip(0, dsize[0])
|
|
|
+ bbox[:, 1] = gt_bbox_warped[:, 1::2].min(1).clip(0, dsize[1])
|
|
|
+ bbox[:, 3] = gt_bbox_warped[:, 1::2].max(1).clip(0, dsize[1])
|
|
|
+ return kpts, mask, bbox
|
|
|
+
|
|
|
def __call__(self, records):
|
|
|
image = records['image']
|
|
|
- keypoints = records['joints']
|
|
|
- heatmap_mask = records['mask']
|
|
|
+ shape = np.array(image.shape[:2][::-1])
|
|
|
+ keypoints = None
|
|
|
+ heatmap_mask = None
|
|
|
+ gt_bbox = None
|
|
|
+ if 'gt_joints' in records:
|
|
|
+ keypoints = records['gt_joints']
|
|
|
+
|
|
|
+ if 'mask' in records:
|
|
|
+ heatmap_mask = records['mask']
|
|
|
+ heatmap_mask *= 255
|
|
|
+
|
|
|
+ if 'gt_bbox' in records:
|
|
|
+ gt_bbox = records['gt_bbox']
|
|
|
|
|
|
degree = (np.random.random() * 2 - 1) * self.max_degree
|
|
|
- shape = np.array(image.shape[:2][::-1])
|
|
|
center = center = np.array((np.array(shape) / 2))
|
|
|
|
|
|
aug_scale = np.random.random() * (self.max_scale - self.min_scale
|
|
|
) + self.min_scale
|
|
|
if self.scale_type == 'long':
|
|
|
- scale = max(shape[0], shape[1]) / 1.0
|
|
|
+ scale = np.array([max(shape[0], shape[1]) / 1.0] * 2)
|
|
|
elif self.scale_type == 'short':
|
|
|
- scale = min(shape[0], shape[1]) / 1.0
|
|
|
+ scale = np.array([min(shape[0], shape[1]) / 1.0] * 2)
|
|
|
+ elif self.scale_type == 'wh':
|
|
|
+ scale = shape
|
|
|
else:
|
|
|
raise ValueError('Unknown scale type: {}'.format(self.scale_type))
|
|
|
roi_size = aug_scale * scale
|
|
@@ -188,44 +295,55 @@ class RandomAffine(object):
|
|
|
dy = int(0)
|
|
|
if self.max_shift > 0:
|
|
|
|
|
|
- dx = np.random.randint(-self.max_shift * roi_size,
|
|
|
- self.max_shift * roi_size)
|
|
|
- dy = np.random.randint(-self.max_shift * roi_size,
|
|
|
- self.max_shift * roi_size)
|
|
|
+ dx = np.random.randint(-self.max_shift * roi_size[0],
|
|
|
+ self.max_shift * roi_size[0])
|
|
|
+ dy = np.random.randint(-self.max_shift * roi_size[0],
|
|
|
+ self.max_shift * roi_size[1])
|
|
|
|
|
|
center += np.array([dx, dy])
|
|
|
input_size = 2 * center
|
|
|
+ if self.trainsize != -1:
|
|
|
+ dsize = self.trainsize
|
|
|
+ imgshape = (dsize)
|
|
|
+ else:
|
|
|
+ dsize = scale
|
|
|
+ imgshape = (shape.tolist())
|
|
|
|
|
|
- keypoints[..., :2] *= shape
|
|
|
- heatmap_mask *= 255
|
|
|
- kpts_lst = []
|
|
|
- mask_lst = []
|
|
|
-
|
|
|
- image_affine_mat = self._get_affine_matrix(
|
|
|
- center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
|
|
|
+ image_affine_mat = self._get_affine_matrix(center, roi_size, dsize,
|
|
|
+ degree)[:2]
|
|
|
image = cv2.warpAffine(
|
|
|
image,
|
|
|
- image_affine_mat, (self.trainsize, self.trainsize),
|
|
|
- flags=cv2.INTER_LINEAR)
|
|
|
+ image_affine_mat,
|
|
|
+ imgshape,
|
|
|
+ flags=cv2.INTER_LINEAR,
|
|
|
+ borderValue=self.boldervalue)
|
|
|
+
|
|
|
+ if self.hmsize is None:
|
|
|
+ kpts, mask, gt_bbox = self._affine_joints_mask(
|
|
|
+ degree, center, roi_size, dsize, keypoints, heatmap_mask,
|
|
|
+ gt_bbox)
|
|
|
+ records['image'] = image
|
|
|
+ if kpts is not None: records['gt_joints'] = kpts
|
|
|
+ if mask is not None: records['mask'] = mask
|
|
|
+ if gt_bbox is not None: records['gt_bbox'] = gt_bbox
|
|
|
+ return records
|
|
|
+
|
|
|
+ kpts_lst = []
|
|
|
+ mask_lst = []
|
|
|
for hmsize in self.hmsize:
|
|
|
- kpts = copy.deepcopy(keypoints)
|
|
|
- mask_affine_mat = self._get_affine_matrix(
|
|
|
- center, roi_size, (hmsize, hmsize), degree)[:2]
|
|
|
- if heatmap_mask is not None:
|
|
|
- mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
|
|
|
- (hmsize, hmsize))
|
|
|
- mask = ((mask / 255) > 0.5).astype(np.float32)
|
|
|
- kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
|
|
|
- mask_affine_mat)
|
|
|
- kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
|
|
|
- kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
|
|
|
- kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
|
|
|
- kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
|
|
|
+ kpts, mask, gt_bbox = self._affine_joints_mask(
|
|
|
+ degree, center, roi_size, [hmsize, hmsize], keypoints,
|
|
|
+ heatmap_mask, gt_bbox)
|
|
|
kpts_lst.append(kpts)
|
|
|
mask_lst.append(mask)
|
|
|
records['image'] = image
|
|
|
- records['joints'] = kpts_lst
|
|
|
- records['mask'] = mask_lst
|
|
|
+
|
|
|
+ if 'gt_joints' in records:
|
|
|
+ records['gt_joints'] = kpts_lst
|
|
|
+ if 'mask' in records:
|
|
|
+ records['mask'] = mask_lst
|
|
|
+ if 'gt_bbox' in records:
|
|
|
+ records['gt_bbox'] = gt_bbox
|
|
|
return records
|
|
|
|
|
|
|
|
@@ -258,9 +376,10 @@ class EvalAffine(object):
|
|
|
if mask is not None:
|
|
|
mask = cv2.warpAffine(mask, trans, size_resized)
|
|
|
records['mask'] = mask
|
|
|
- if 'joints' in records:
|
|
|
- del records['joints']
|
|
|
+ if 'gt_joints' in records:
|
|
|
+ del records['gt_joints']
|
|
|
records['image'] = image_resized
|
|
|
+ records['scale_factor'] = self.size / min(h, w)
|
|
|
return records
|
|
|
|
|
|
|
|
@@ -310,7 +429,7 @@ class TagGenerate(object):
|
|
|
self.num_joints = num_joints
|
|
|
|
|
|
def __call__(self, records):
|
|
|
- kpts_lst = records['joints']
|
|
|
+ kpts_lst = records['gt_joints']
|
|
|
kpts = kpts_lst[0]
|
|
|
tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64)
|
|
|
inds = np.where(kpts[..., 2] > 0)
|
|
@@ -322,7 +441,7 @@ class TagGenerate(object):
|
|
|
tagmap[p, j, 2] = visible[..., 0] # x
|
|
|
tagmap[p, j, 3] = 1
|
|
|
records['tagmap'] = tagmap
|
|
|
- del records['joints']
|
|
|
+ del records['gt_joints']
|
|
|
return records
|
|
|
|
|
|
|
|
@@ -356,7 +475,7 @@ class ToHeatmaps(object):
|
|
|
self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
|
|
|
|
|
|
def __call__(self, records):
|
|
|
- kpts_lst = records['joints']
|
|
|
+ kpts_lst = records['gt_joints']
|
|
|
mask_lst = records['mask']
|
|
|
for idx, hmsize in enumerate(self.hmsize):
|
|
|
mask = mask_lst[idx]
|
|
@@ -477,7 +596,7 @@ class RandomFlipHalfBodyTransform(object):
|
|
|
|
|
|
def __call__(self, records):
|
|
|
image = records['image']
|
|
|
- joints = records['joints']
|
|
|
+ joints = records['gt_joints']
|
|
|
joints_vis = records['joints_vis']
|
|
|
c = records['center']
|
|
|
s = records['scale']
|
|
@@ -500,7 +619,7 @@ class RandomFlipHalfBodyTransform(object):
|
|
|
joints, joints_vis, image.shape[1], self.flip_pairs)
|
|
|
c[0] = image.shape[1] - c[0] - 1
|
|
|
records['image'] = image
|
|
|
- records['joints'] = joints
|
|
|
+ records['gt_joints'] = joints
|
|
|
records['joints_vis'] = joints_vis
|
|
|
records['center'] = c
|
|
|
records['scale'] = s
|
|
@@ -560,7 +679,7 @@ class AugmentationbyInformantionDropping(object):
|
|
|
|
|
|
def __call__(self, records):
|
|
|
img = records['image']
|
|
|
- joints = records['joints']
|
|
|
+ joints = records['gt_joints']
|
|
|
joints_vis = records['joints_vis']
|
|
|
if np.random.rand() < self.prob_cutout:
|
|
|
img = self._cutout(img, joints, joints_vis)
|
|
@@ -588,7 +707,7 @@ class TopDownAffine(object):
|
|
|
|
|
|
def __call__(self, records):
|
|
|
image = records['image']
|
|
|
- joints = records['joints']
|
|
|
+ joints = records['gt_joints']
|
|
|
joints_vis = records['joints_vis']
|
|
|
rot = records['rotate'] if "rotate" in records else 0
|
|
|
if self.use_udp:
|
|
@@ -613,8 +732,171 @@ class TopDownAffine(object):
|
|
|
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
|
|
|
|
|
|
records['image'] = image
|
|
|
- records['joints'] = joints
|
|
|
+ records['gt_joints'] = joints
|
|
|
+
|
|
|
+ return records
|
|
|
+
|
|
|
+
|
|
|
+@register_keypointop
|
|
|
+class SinglePoseAffine(object):
|
|
|
+ """apply affine transform to image and coords
|
|
|
+
|
|
|
+ Args:
|
|
|
+ trainsize (list): [w, h], the standard size used to train
|
|
|
+ use_udp (bool): whether to use Unbiased Data Processing.
|
|
|
+ records(dict): the dict contained the image and coords
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ records (dict): contain the image and coords after tranformed
|
|
|
+
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self,
|
|
|
+ trainsize,
|
|
|
+ rotate=[1.0, 30],
|
|
|
+ scale=[1.0, 0.25],
|
|
|
+ use_udp=False):
|
|
|
+ self.trainsize = trainsize
|
|
|
+ self.use_udp = use_udp
|
|
|
+ self.rot_prob = rotate[0]
|
|
|
+ self.rot_range = rotate[1]
|
|
|
+ self.scale_prob = scale[0]
|
|
|
+ self.scale_ratio = scale[1]
|
|
|
+
|
|
|
+ def __call__(self, records):
|
|
|
+ image = records['image']
|
|
|
+ if 'joints_2d' in records:
|
|
|
+ joints = records['joints_2d'] if 'joints_2d' in records else None
|
|
|
+ joints_vis = records[
|
|
|
+ 'joints_vis'] if 'joints_vis' in records else np.ones(
|
|
|
+ (len(joints), 1))
|
|
|
+ rot = 0
|
|
|
+ s = 1.
|
|
|
+ if np.random.random() < self.rot_prob:
|
|
|
+ rot = np.clip(np.random.randn() * self.rot_range,
|
|
|
+ -self.rot_range * 2, self.rot_range * 2)
|
|
|
+ if np.random.random() < self.scale_prob:
|
|
|
+ s = np.clip(np.random.randn() * self.scale_ratio + 1,
|
|
|
+ 1 - self.scale_ratio, 1 + self.scale_ratio)
|
|
|
+
|
|
|
+ if self.use_udp:
|
|
|
+ trans = get_warp_matrix(
|
|
|
+ rot,
|
|
|
+ np.array(records['bbox_center']) * 2.0,
|
|
|
+ [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
|
|
|
+ records['bbox_scale'] * 200.0 * s)
|
|
|
+ image = cv2.warpAffine(
|
|
|
+ image,
|
|
|
+ trans, (int(self.trainsize[0]), int(self.trainsize[1])),
|
|
|
+ flags=cv2.INTER_LINEAR)
|
|
|
+ if 'joints_2d' in records:
|
|
|
+ joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(),
|
|
|
+ trans)
|
|
|
+ else:
|
|
|
+ trans = get_affine_transform(
|
|
|
+ np.array(records['bbox_center']),
|
|
|
+ records['bbox_scale'] * s * 200, rot, self.trainsize)
|
|
|
+ image = cv2.warpAffine(
|
|
|
+ image,
|
|
|
+ trans, (int(self.trainsize[0]), int(self.trainsize[1])),
|
|
|
+ flags=cv2.INTER_LINEAR)
|
|
|
+ if 'joints_2d' in records:
|
|
|
+ for i in range(len(joints)):
|
|
|
+ if joints_vis[i, 0] > 0.0:
|
|
|
+ joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
|
|
|
+
|
|
|
+ if 'joints_3d' in records:
|
|
|
+ pose3d = records['joints_3d']
|
|
|
+ if not rot == 0:
|
|
|
+ trans_3djoints = np.eye(3)
|
|
|
+ rot_rad = -rot * np.pi / 180
|
|
|
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
|
|
|
+ trans_3djoints[0, :2] = [cs, -sn]
|
|
|
+ trans_3djoints[1, :2] = [sn, cs]
|
|
|
+ pose3d[:, :3] = np.einsum('ij,kj->ki', trans_3djoints,
|
|
|
+ pose3d[:, :3])
|
|
|
+ records['joints_3d'] = pose3d
|
|
|
+
|
|
|
+ records['image'] = image
|
|
|
+ if 'joints_2d' in records:
|
|
|
+ records['joints_2d'] = joints
|
|
|
+
|
|
|
+ return records
|
|
|
+
|
|
|
+
|
|
|
+@register_keypointop
|
|
|
+class NoiseJitter(object):
|
|
|
+ """apply NoiseJitter to image
|
|
|
+
|
|
|
+ Args:
|
|
|
+ noise_factor (float): the noise factor ratio used to generate the jitter
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ records (dict): contain the image and coords after tranformed
|
|
|
+
|
|
|
+ """
|
|
|
|
|
|
+ def __init__(self, noise_factor=0.4):
|
|
|
+ self.noise_factor = noise_factor
|
|
|
+
|
|
|
+ def __call__(self, records):
|
|
|
+ self.pn = np.random.uniform(1 - self.noise_factor,
|
|
|
+ 1 + self.noise_factor, 3)
|
|
|
+ rgb_img = records['image']
|
|
|
+ rgb_img[:, :, 0] = np.minimum(
|
|
|
+ 255.0, np.maximum(0.0, rgb_img[:, :, 0] * self.pn[0]))
|
|
|
+ rgb_img[:, :, 1] = np.minimum(
|
|
|
+ 255.0, np.maximum(0.0, rgb_img[:, :, 1] * self.pn[1]))
|
|
|
+ rgb_img[:, :, 2] = np.minimum(
|
|
|
+ 255.0, np.maximum(0.0, rgb_img[:, :, 2] * self.pn[2]))
|
|
|
+ records['image'] = rgb_img
|
|
|
+ return records
|
|
|
+
|
|
|
+
|
|
|
+@register_keypointop
|
|
|
+class FlipPose(object):
|
|
|
+ """random apply flip to image
|
|
|
+
|
|
|
+ Args:
|
|
|
+ noise_factor (float): the noise factor ratio used to generate the jitter
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ records (dict): contain the image and coords after tranformed
|
|
|
+
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, flip_prob=0.5, img_res=224, num_joints=14):
|
|
|
+ self.flip_pob = flip_prob
|
|
|
+ self.img_res = img_res
|
|
|
+ if num_joints == 24:
|
|
|
+ self.perm = [
|
|
|
+ 5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17,
|
|
|
+ 18, 19, 21, 20, 23, 22
|
|
|
+ ]
|
|
|
+ elif num_joints == 14:
|
|
|
+ self.perm = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13]
|
|
|
+ else:
|
|
|
+ print("error num_joints in flip :{}".format(num_joints))
|
|
|
+
|
|
|
+ def __call__(self, records):
|
|
|
+
|
|
|
+ if np.random.random() < self.flip_pob:
|
|
|
+ img = records['image']
|
|
|
+ img = np.fliplr(img)
|
|
|
+
|
|
|
+ if 'joints_2d' in records:
|
|
|
+ joints_2d = records['joints_2d']
|
|
|
+ joints_2d = joints_2d[self.perm]
|
|
|
+ joints_2d[:, 0] = self.img_res - joints_2d[:, 0]
|
|
|
+ records['joints_2d'] = joints_2d
|
|
|
+
|
|
|
+ if 'joints_3d' in records:
|
|
|
+ joints_3d = records['joints_3d']
|
|
|
+ joints_3d = joints_3d[self.perm]
|
|
|
+ joints_3d[:, 0] = -joints_3d[:, 0]
|
|
|
+ records['joints_3d'] = joints_3d
|
|
|
+
|
|
|
+ records['image'] = img
|
|
|
return records
|
|
|
|
|
|
|
|
@@ -686,7 +968,7 @@ class ToHeatmapsTopDown(object):
|
|
|
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
|
|
|
Copyright (c) Microsoft, under the MIT License.
|
|
|
"""
|
|
|
- joints = records['joints']
|
|
|
+ joints = records['gt_joints']
|
|
|
joints_vis = records['joints_vis']
|
|
|
num_joints = joints.shape[0]
|
|
|
image_size = np.array(
|
|
@@ -729,7 +1011,7 @@ class ToHeatmapsTopDown(object):
|
|
|
0]:g_y[1], g_x[0]:g_x[1]]
|
|
|
records['target'] = target
|
|
|
records['target_weight'] = target_weight
|
|
|
- del records['joints'], records['joints_vis']
|
|
|
+ del records['gt_joints'], records['joints_vis']
|
|
|
|
|
|
return records
|
|
|
|
|
@@ -754,7 +1036,7 @@ class ToHeatmapsTopDown_DARK(object):
|
|
|
self.sigma = sigma
|
|
|
|
|
|
def __call__(self, records):
|
|
|
- joints = records['joints']
|
|
|
+ joints = records['gt_joints']
|
|
|
joints_vis = records['joints_vis']
|
|
|
num_joints = joints.shape[0]
|
|
|
image_size = np.array(
|
|
@@ -787,7 +1069,7 @@ class ToHeatmapsTopDown_DARK(object):
|
|
|
(x - mu_x)**2 + (y - mu_y)**2) / (2 * self.sigma**2))
|
|
|
records['target'] = target
|
|
|
records['target_weight'] = target_weight
|
|
|
- del records['joints'], records['joints_vis']
|
|
|
+ del records['gt_joints'], records['joints_vis']
|
|
|
|
|
|
return records
|
|
|
|
|
@@ -816,7 +1098,7 @@ class ToHeatmapsTopDown_UDP(object):
|
|
|
self.sigma = sigma
|
|
|
|
|
|
def __call__(self, records):
|
|
|
- joints = records['joints']
|
|
|
+ joints = records['gt_joints']
|
|
|
joints_vis = records['joints_vis']
|
|
|
num_joints = joints.shape[0]
|
|
|
image_size = np.array(
|
|
@@ -861,6 +1143,471 @@ class ToHeatmapsTopDown_UDP(object):
|
|
|
0]:g_y[1], g_x[0]:g_x[1]]
|
|
|
records['target'] = target
|
|
|
records['target_weight'] = target_weight
|
|
|
- del records['joints'], records['joints_vis']
|
|
|
+ del records['gt_joints'], records['joints_vis']
|
|
|
|
|
|
return records
|
|
|
+
|
|
|
+
|
|
|
+from typing import Optional, Tuple, Union, List
|
|
|
+import numbers
|
|
|
+
|
|
|
+
|
|
|
+def _scale_size(
|
|
|
+ size: Tuple[int, int],
|
|
|
+ scale: Union[float, int, tuple], ) -> Tuple[int, int]:
|
|
|
+ """Rescale a size by a ratio.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ size (tuple[int]): (w, h).
|
|
|
+ scale (float | tuple(float)): Scaling factor.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ tuple[int]: scaled size.
|
|
|
+ """
|
|
|
+ if isinstance(scale, (float, int)):
|
|
|
+ scale = (scale, scale)
|
|
|
+ w, h = size
|
|
|
+ return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)
|
|
|
+
|
|
|
+
|
|
|
+def rescale_size(old_size: tuple,
|
|
|
+ scale: Union[float, int, tuple],
|
|
|
+ return_scale: bool=False) -> tuple:
|
|
|
+ """Calculate the new size to be rescaled to.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ old_size (tuple[int]): The old size (w, h) of image.
|
|
|
+ scale (float | tuple[int]): The scaling factor or maximum size.
|
|
|
+ If it is a float number, then the image will be rescaled by this
|
|
|
+ factor, else if it is a tuple of 2 integers, then the image will
|
|
|
+ be rescaled as large as possible within the scale.
|
|
|
+ return_scale (bool): Whether to return the scaling factor besides the
|
|
|
+ rescaled image size.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ tuple[int]: The new rescaled image size.
|
|
|
+ """
|
|
|
+ w, h = old_size
|
|
|
+ if isinstance(scale, (float, int)):
|
|
|
+ if scale <= 0:
|
|
|
+ raise ValueError(f'Invalid scale {scale}, must be positive.')
|
|
|
+ scale_factor = scale
|
|
|
+ elif isinstance(scale, list):
|
|
|
+ max_long_edge = max(scale)
|
|
|
+ max_short_edge = min(scale)
|
|
|
+ scale_factor = min(max_long_edge / max(h, w),
|
|
|
+ max_short_edge / min(h, w))
|
|
|
+ else:
|
|
|
+ raise TypeError(
|
|
|
+ f'Scale must be a number or tuple of int, but got {type(scale)}')
|
|
|
+
|
|
|
+ new_size = _scale_size((w, h), scale_factor)
|
|
|
+
|
|
|
+ if return_scale:
|
|
|
+ return new_size, scale_factor
|
|
|
+ else:
|
|
|
+ return new_size
|
|
|
+
|
|
|
+
|
|
|
+def imrescale(img: np.ndarray,
|
|
|
+ scale: Union[float, Tuple[int, int]],
|
|
|
+ return_scale: bool=False,
|
|
|
+ interpolation: str='bilinear',
|
|
|
+ backend: Optional[str]=None) -> Union[np.ndarray, Tuple[
|
|
|
+ np.ndarray, float]]:
|
|
|
+ """Resize image while keeping the aspect ratio.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ img (ndarray): The input image.
|
|
|
+ scale (float | tuple[int]): The scaling factor or maximum size.
|
|
|
+ If it is a float number, then the image will be rescaled by this
|
|
|
+ factor, else if it is a tuple of 2 integers, then the image will
|
|
|
+ be rescaled as large as possible within the scale.
|
|
|
+ return_scale (bool): Whether to return the scaling factor besides the
|
|
|
+ rescaled image.
|
|
|
+ interpolation (str): Same as :func:`resize`.
|
|
|
+ backend (str | None): Same as :func:`resize`.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ ndarray: The rescaled image.
|
|
|
+ """
|
|
|
+ h, w = img.shape[:2]
|
|
|
+ new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
|
|
|
+ rescaled_img = imresize(
|
|
|
+ img, new_size, interpolation=interpolation, backend=backend)
|
|
|
+ if return_scale:
|
|
|
+ return rescaled_img, scale_factor
|
|
|
+ else:
|
|
|
+ return rescaled_img
|
|
|
+
|
|
|
+
|
|
|
+def imresize(
|
|
|
+ img: np.ndarray,
|
|
|
+ size: Tuple[int, int],
|
|
|
+ return_scale: bool=False,
|
|
|
+ interpolation: str='bilinear',
|
|
|
+ out: Optional[np.ndarray]=None,
|
|
|
+ backend: Optional[str]=None,
|
|
|
+ interp=cv2.INTER_LINEAR, ) -> Union[Tuple[np.ndarray, float, float],
|
|
|
+ np.ndarray]:
|
|
|
+ """Resize image to a given size.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ img (ndarray): The input image.
|
|
|
+ size (tuple[int]): Target size (w, h).
|
|
|
+ return_scale (bool): Whether to return `w_scale` and `h_scale`.
|
|
|
+ interpolation (str): Interpolation method, accepted values are
|
|
|
+ "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
|
|
|
+ backend, "nearest", "bilinear" for 'pillow' backend.
|
|
|
+ out (ndarray): The output destination.
|
|
|
+ backend (str | None): The image resize backend type. Options are `cv2`,
|
|
|
+ `pillow`, `None`. If backend is None, the global imread_backend
|
|
|
+ specified by ``mmcv.use_backend()`` will be used. Default: None.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
|
|
|
+ `resized_img`.
|
|
|
+ """
|
|
|
+ h, w = img.shape[:2]
|
|
|
+ if backend is None:
|
|
|
+ backend = imread_backend
|
|
|
+ if backend not in ['cv2', 'pillow']:
|
|
|
+ raise ValueError(f'backend: {backend} is not supported for resize.'
|
|
|
+ f"Supported backends are 'cv2', 'pillow'")
|
|
|
+
|
|
|
+ if backend == 'pillow':
|
|
|
+ assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
|
|
|
+ pil_image = Image.fromarray(img)
|
|
|
+ pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
|
|
|
+ resized_img = np.array(pil_image)
|
|
|
+ else:
|
|
|
+ resized_img = cv2.resize(img, size, dst=out, interpolation=interp)
|
|
|
+ if not return_scale:
|
|
|
+ return resized_img
|
|
|
+ else:
|
|
|
+ w_scale = size[0] / w
|
|
|
+ h_scale = size[1] / h
|
|
|
+ return resized_img, w_scale, h_scale
|
|
|
+
|
|
|
+
|
|
|
+class PETR_Resize:
|
|
|
+ """Resize images & bbox & mask.
|
|
|
+
|
|
|
+ This transform resizes the input image to some scale. Bboxes and masks are
|
|
|
+ then resized with the same scale factor. If the input dict contains the key
|
|
|
+ "scale", then the scale in the input dict is used, otherwise the specified
|
|
|
+ scale in the init method is used. If the input dict contains the key
|
|
|
+ "scale_factor" (if MultiScaleFlipAug does not give img_scale but
|
|
|
+ scale_factor), the actual scale will be computed by image shape and
|
|
|
+ scale_factor.
|
|
|
+
|
|
|
+ `img_scale` can either be a tuple (single-scale) or a list of tuple
|
|
|
+ (multi-scale). There are 3 multiscale modes:
|
|
|
+
|
|
|
+ - ``ratio_range is not None``: randomly sample a ratio from the ratio \
|
|
|
+ range and multiply it with the image scale.
|
|
|
+ - ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly \
|
|
|
+ sample a scale from the multiscale range.
|
|
|
+ - ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly \
|
|
|
+ sample a scale from multiple scales.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ img_scale (tuple or list[tuple]): Images scales for resizing.
|
|
|
+ multiscale_mode (str): Either "range" or "value".
|
|
|
+ ratio_range (tuple[float]): (min_ratio, max_ratio)
|
|
|
+ keep_ratio (bool): Whether to keep the aspect ratio when resizing the
|
|
|
+ image.
|
|
|
+ bbox_clip_border (bool, optional): Whether to clip the objects outside
|
|
|
+ the border of the image. In some dataset like MOT17, the gt bboxes
|
|
|
+ are allowed to cross the border of images. Therefore, we don't
|
|
|
+ need to clip the gt bboxes in these cases. Defaults to True.
|
|
|
+ backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
|
|
|
+ These two backends generates slightly different results. Defaults
|
|
|
+ to 'cv2'.
|
|
|
+ interpolation (str): Interpolation method, accepted values are
|
|
|
+ "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
|
|
|
+ backend, "nearest", "bilinear" for 'pillow' backend.
|
|
|
+ override (bool, optional): Whether to override `scale` and
|
|
|
+ `scale_factor` so as to call resize twice. Default False. If True,
|
|
|
+ after the first resizing, the existed `scale` and `scale_factor`
|
|
|
+ will be ignored so the second resizing can be allowed.
|
|
|
+ This option is a work-around for multiple times of resize in DETR.
|
|
|
+ Defaults to False.
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self,
|
|
|
+ img_scale=None,
|
|
|
+ multiscale_mode='range',
|
|
|
+ ratio_range=None,
|
|
|
+ keep_ratio=True,
|
|
|
+ bbox_clip_border=True,
|
|
|
+ backend='cv2',
|
|
|
+ interpolation='bilinear',
|
|
|
+ override=False,
|
|
|
+ keypoint_clip_border=True):
|
|
|
+ if img_scale is None:
|
|
|
+ self.img_scale = None
|
|
|
+ else:
|
|
|
+ if isinstance(img_scale, list):
|
|
|
+ self.img_scale = img_scale
|
|
|
+ else:
|
|
|
+ self.img_scale = [img_scale]
|
|
|
+ assert isinstance(self.img_scale, list)
|
|
|
+
|
|
|
+ if ratio_range is not None:
|
|
|
+ # mode 1: given a scale and a range of image ratio
|
|
|
+ assert len(self.img_scale) == 1
|
|
|
+ else:
|
|
|
+ # mode 2: given multiple scales or a range of scales
|
|
|
+ assert multiscale_mode in ['value', 'range']
|
|
|
+
|
|
|
+ self.backend = backend
|
|
|
+ self.multiscale_mode = multiscale_mode
|
|
|
+ self.ratio_range = ratio_range
|
|
|
+ self.keep_ratio = keep_ratio
|
|
|
+ # TODO: refactor the override option in Resize
|
|
|
+ self.interpolation = interpolation
|
|
|
+ self.override = override
|
|
|
+ self.bbox_clip_border = bbox_clip_border
|
|
|
+ self.keypoint_clip_border = keypoint_clip_border
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def random_select(img_scales):
|
|
|
+ """Randomly select an img_scale from given candidates.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ img_scales (list[tuple]): Images scales for selection.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \
|
|
|
+ where ``img_scale`` is the selected image scale and \
|
|
|
+ ``scale_idx`` is the selected index in the given candidates.
|
|
|
+ """
|
|
|
+
|
|
|
+ assert isinstance(img_scales, list)
|
|
|
+ scale_idx = np.random.randint(len(img_scales))
|
|
|
+ img_scale = img_scales[scale_idx]
|
|
|
+ return img_scale, scale_idx
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def random_sample(img_scales):
|
|
|
+ """Randomly sample an img_scale when ``multiscale_mode=='range'``.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ img_scales (list[tuple]): Images scale range for sampling.
|
|
|
+ There must be two tuples in img_scales, which specify the lower
|
|
|
+ and upper bound of image scales.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ (tuple, None): Returns a tuple ``(img_scale, None)``, where \
|
|
|
+ ``img_scale`` is sampled scale and None is just a placeholder \
|
|
|
+ to be consistent with :func:`random_select`.
|
|
|
+ """
|
|
|
+
|
|
|
+ assert isinstance(img_scales, list) and len(img_scales) == 2
|
|
|
+ img_scale_long = [max(s) for s in img_scales]
|
|
|
+ img_scale_short = [min(s) for s in img_scales]
|
|
|
+ long_edge = np.random.randint(
|
|
|
+ min(img_scale_long), max(img_scale_long) + 1)
|
|
|
+ short_edge = np.random.randint(
|
|
|
+ min(img_scale_short), max(img_scale_short) + 1)
|
|
|
+ img_scale = (long_edge, short_edge)
|
|
|
+ return img_scale, None
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def random_sample_ratio(img_scale, ratio_range):
|
|
|
+ """Randomly sample an img_scale when ``ratio_range`` is specified.
|
|
|
+
|
|
|
+ A ratio will be randomly sampled from the range specified by
|
|
|
+ ``ratio_range``. Then it would be multiplied with ``img_scale`` to
|
|
|
+ generate sampled scale.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ img_scale (list): Images scale base to multiply with ratio.
|
|
|
+ ratio_range (tuple[float]): The minimum and maximum ratio to scale
|
|
|
+ the ``img_scale``.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ (tuple, None): Returns a tuple ``(scale, None)``, where \
|
|
|
+ ``scale`` is sampled ratio multiplied with ``img_scale`` and \
|
|
|
+ None is just a placeholder to be consistent with \
|
|
|
+ :func:`random_select`.
|
|
|
+ """
|
|
|
+
|
|
|
+ assert isinstance(img_scale, list) and len(img_scale) == 2
|
|
|
+ min_ratio, max_ratio = ratio_range
|
|
|
+ assert min_ratio <= max_ratio
|
|
|
+ ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
|
|
|
+ scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
|
|
|
+ return scale, None
|
|
|
+
|
|
|
+ def _random_scale(self, results):
|
|
|
+ """Randomly sample an img_scale according to ``ratio_range`` and
|
|
|
+ ``multiscale_mode``.
|
|
|
+
|
|
|
+ If ``ratio_range`` is specified, a ratio will be sampled and be
|
|
|
+ multiplied with ``img_scale``.
|
|
|
+ If multiple scales are specified by ``img_scale``, a scale will be
|
|
|
+ sampled according to ``multiscale_mode``.
|
|
|
+ Otherwise, single scale will be used.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ results (dict): Result dict from :obj:`dataset`.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ dict: Two new keys 'scale` and 'scale_idx` are added into \
|
|
|
+ ``results``, which would be used by subsequent pipelines.
|
|
|
+ """
|
|
|
+
|
|
|
+ if self.ratio_range is not None:
|
|
|
+ scale, scale_idx = self.random_sample_ratio(self.img_scale[0],
|
|
|
+ self.ratio_range)
|
|
|
+ elif len(self.img_scale) == 1:
|
|
|
+ scale, scale_idx = self.img_scale[0], 0
|
|
|
+ elif self.multiscale_mode == 'range':
|
|
|
+ scale, scale_idx = self.random_sample(self.img_scale)
|
|
|
+ elif self.multiscale_mode == 'value':
|
|
|
+ scale, scale_idx = self.random_select(self.img_scale)
|
|
|
+ else:
|
|
|
+ raise NotImplementedError
|
|
|
+ results['scale'] = scale
|
|
|
+ results['scale_idx'] = scale_idx
|
|
|
+
|
|
|
+ def _resize_img(self, results):
|
|
|
+ """Resize images with ``results['scale']``."""
|
|
|
+ for key in ['image'] if 'image' in results else []:
|
|
|
+ if self.keep_ratio:
|
|
|
+ img, scale_factor = imrescale(
|
|
|
+ results[key],
|
|
|
+ results['scale'],
|
|
|
+ return_scale=True,
|
|
|
+ interpolation=self.interpolation,
|
|
|
+ backend=self.backend)
|
|
|
+ # the w_scale and h_scale has minor difference
|
|
|
+ # a real fix should be done in the imrescale in the future
|
|
|
+ new_h, new_w = img.shape[:2]
|
|
|
+ h, w = results[key].shape[:2]
|
|
|
+ w_scale = new_w / w
|
|
|
+ h_scale = new_h / h
|
|
|
+ else:
|
|
|
+ img, w_scale, h_scale = imresize(
|
|
|
+ results[key],
|
|
|
+ results['scale'],
|
|
|
+ return_scale=True,
|
|
|
+ interpolation=self.interpolation,
|
|
|
+ backend=self.backend)
|
|
|
+
|
|
|
+ scale_factor = np.array(
|
|
|
+ [w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
|
|
|
+ results['im_shape'] = np.array(img.shape)
|
|
|
+ # in case that there is no padding
|
|
|
+ results['pad_shape'] = img.shape
|
|
|
+ results['scale_factor'] = scale_factor
|
|
|
+ results['keep_ratio'] = self.keep_ratio
|
|
|
+ # img_pad = self.impad(img, shape=results['scale'])
|
|
|
+ results[key] = img
|
|
|
+
|
|
|
+ def _resize_bboxes(self, results):
|
|
|
+ """Resize bounding boxes with ``results['scale_factor']``."""
|
|
|
+ for key in ['gt_bbox'] if 'gt_bbox' in results else []:
|
|
|
+ bboxes = results[key] * results['scale_factor']
|
|
|
+ if self.bbox_clip_border:
|
|
|
+ img_shape = results['im_shape']
|
|
|
+ bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
|
|
|
+ bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
|
|
|
+ results[key] = bboxes
|
|
|
+
|
|
|
+ def _resize_masks(self, results):
|
|
|
+ """Resize masks with ``results['scale']``"""
|
|
|
+ for key in ['mask'] if 'mask' in results else []:
|
|
|
+ if results[key] is None:
|
|
|
+ continue
|
|
|
+ if self.keep_ratio:
|
|
|
+ results[key] = results[key].rescale(results['scale'])
|
|
|
+ else:
|
|
|
+ results[key] = results[key].resize(results['im_shape'][:2])
|
|
|
+
|
|
|
+ def _resize_seg(self, results):
|
|
|
+ """Resize semantic segmentation map with ``results['scale']``."""
|
|
|
+ for key in ['seg'] if 'seg' in results else []:
|
|
|
+ if self.keep_ratio:
|
|
|
+ gt_seg = imrescale(
|
|
|
+ results[key],
|
|
|
+ results['scale'],
|
|
|
+ interpolation='nearest',
|
|
|
+ backend=self.backend)
|
|
|
+ else:
|
|
|
+ gt_seg = imresize(
|
|
|
+ results[key],
|
|
|
+ results['scale'],
|
|
|
+ interpolation='nearest',
|
|
|
+ backend=self.backend)
|
|
|
+ results[key] = gt_seg
|
|
|
+
|
|
|
+ def _resize_keypoints(self, results):
|
|
|
+ """Resize keypoints with ``results['scale_factor']``."""
|
|
|
+ for key in ['gt_joints'] if 'gt_joints' in results else []:
|
|
|
+ keypoints = results[key].copy()
|
|
|
+ keypoints[..., 0] = keypoints[..., 0] * results['scale_factor'][0]
|
|
|
+ keypoints[..., 1] = keypoints[..., 1] * results['scale_factor'][1]
|
|
|
+ if self.keypoint_clip_border:
|
|
|
+ img_shape = results['im_shape']
|
|
|
+ keypoints[..., 0] = np.clip(keypoints[..., 0], 0, img_shape[1])
|
|
|
+ keypoints[..., 1] = np.clip(keypoints[..., 1], 0, img_shape[0])
|
|
|
+ results[key] = keypoints
|
|
|
+
|
|
|
+ def _resize_areas(self, results):
|
|
|
+ """Resize mask areas with ``results['scale_factor']``."""
|
|
|
+ for key in ['gt_areas'] if 'gt_areas' in results else []:
|
|
|
+ areas = results[key].copy()
|
|
|
+ areas = areas * results['scale_factor'][0] * results[
|
|
|
+ 'scale_factor'][1]
|
|
|
+ results[key] = areas
|
|
|
+
|
|
|
+ def __call__(self, results):
|
|
|
+ """Call function to resize images, bounding boxes, masks, semantic
|
|
|
+ segmentation map.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ results (dict): Result dict from loading pipeline.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ dict: Resized results, 'im_shape', 'pad_shape', 'scale_factor', \
|
|
|
+ 'keep_ratio' keys are added into result dict.
|
|
|
+ """
|
|
|
+ if 'scale' not in results:
|
|
|
+ if 'scale_factor' in results:
|
|
|
+ img_shape = results['image'].shape[:2]
|
|
|
+ scale_factor = results['scale_factor'][0]
|
|
|
+ # assert isinstance(scale_factor, float)
|
|
|
+ results['scale'] = [int(x * scale_factor)
|
|
|
+ for x in img_shape][::-1]
|
|
|
+ else:
|
|
|
+ self._random_scale(results)
|
|
|
+ else:
|
|
|
+ if not self.override:
|
|
|
+ assert 'scale_factor' not in results, (
|
|
|
+ 'scale and scale_factor cannot be both set.')
|
|
|
+ else:
|
|
|
+ results.pop('scale')
|
|
|
+ if 'scale_factor' in results:
|
|
|
+ results.pop('scale_factor')
|
|
|
+ self._random_scale(results)
|
|
|
+
|
|
|
+ self._resize_img(results)
|
|
|
+ self._resize_bboxes(results)
|
|
|
+ self._resize_masks(results)
|
|
|
+ self._resize_seg(results)
|
|
|
+ self._resize_keypoints(results)
|
|
|
+ self._resize_areas(results)
|
|
|
+ return results
|
|
|
+
|
|
|
+ def __repr__(self):
|
|
|
+ repr_str = self.__class__.__name__
|
|
|
+ repr_str += f'(img_scale={self.img_scale}, '
|
|
|
+ repr_str += f'multiscale_mode={self.multiscale_mode}, '
|
|
|
+ repr_str += f'ratio_range={self.ratio_range}, '
|
|
|
+ repr_str += f'keep_ratio={self.keep_ratio}, '
|
|
|
+ repr_str += f'bbox_clip_border={self.bbox_clip_border})'
|
|
|
+ repr_str += f'keypoint_clip_border={self.keypoint_clip_border})'
|
|
|
+ return repr_str
|