object_detector.py 90 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import collections
  15. import copy
  16. import os
  17. import os.path as osp
  18. import numpy as np
  19. import paddle
  20. from paddle.static import InputSpec
  21. import paddlers
  22. import paddlers.models.ppdet as ppdet
  23. from paddlers.models.ppdet.modeling.proposal_generator.target_layer import BBoxAssigner, MaskAssigner
  24. from paddlers.transforms import decode_image
  25. from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Pad
  26. from paddlers.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, \
  27. _BatchPad, _Gt2YoloTarget
  28. from paddlers.models.ppdet.optimizer import ModelEMA
  29. import paddlers.utils.logging as logging
  30. from paddlers.utils.checkpoint import det_pretrain_weights_dict
  31. from .base import BaseModel
  32. from .utils.det_metrics import VOCMetric, COCOMetric
  33. __all__ = [
  34. "YOLOv3", "FasterRCNN", "PPYOLO", "PPYOLOTiny", "PPYOLOv2", "MaskRCNN"
  35. ]
  36. class BaseDetector(BaseModel):
  37. def __init__(self, model_name, num_classes=80, **params):
  38. self.init_params.update(locals())
  39. if 'with_net' in self.init_params:
  40. del self.init_params['with_net']
  41. super(BaseDetector, self).__init__('detector')
  42. if not hasattr(ppdet.modeling, model_name):
  43. raise ValueError("ERROR: There is no model named {}.".format(
  44. model_name))
  45. self.model_name = model_name
  46. self.num_classes = num_classes
  47. self.labels = None
  48. if params.get('with_net', True):
  49. params.pop('with_net', None)
  50. self.net = self.build_net(**params)
  51. def build_net(self, **params):
  52. with paddle.utils.unique_name.guard():
  53. net = ppdet.modeling.__dict__[self.model_name](**params)
  54. return net
  55. def _build_inference_net(self):
  56. infer_net = self.net
  57. infer_net.eval()
  58. return infer_net
  59. def _fix_transforms_shape(self, image_shape):
  60. raise NotImplementedError("_fix_transforms_shape: not implemented!")
  61. def _define_input_spec(self, image_shape):
  62. input_spec = [{
  63. "image": InputSpec(
  64. shape=image_shape, name='image', dtype='float32'),
  65. "im_shape": InputSpec(
  66. shape=[image_shape[0], 2], name='im_shape', dtype='float32'),
  67. "scale_factor": InputSpec(
  68. shape=[image_shape[0], 2], name='scale_factor', dtype='float32')
  69. }]
  70. return input_spec
  71. def _check_image_shape(self, image_shape):
  72. if len(image_shape) == 2:
  73. image_shape = [1, 3] + image_shape
  74. if image_shape[-2] % 32 > 0 or image_shape[-1] % 32 > 0:
  75. raise ValueError(
  76. "Height and width in fixed_input_shape must be a multiple of 32, but received {}.".
  77. format(image_shape[-2:]))
  78. return image_shape
  79. def _get_test_inputs(self, image_shape):
  80. if image_shape is not None:
  81. image_shape = self._check_image_shape(image_shape)
  82. self._fix_transforms_shape(image_shape[-2:])
  83. else:
  84. image_shape = [None, 3, -1, -1]
  85. self.fixed_input_shape = image_shape
  86. return self._define_input_spec(image_shape)
  87. def _get_backbone(self, backbone_name, **params):
  88. backbone = getattr(ppdet.modeling, backbone_name)(**params)
  89. return backbone
  90. def run(self, net, inputs, mode):
  91. net_out = net(inputs)
  92. if mode in ['train', 'eval']:
  93. outputs = net_out
  94. else:
  95. outputs = dict()
  96. for key in net_out:
  97. outputs[key] = net_out[key].numpy()
  98. return outputs
  99. def default_optimizer(self,
  100. parameters,
  101. learning_rate,
  102. warmup_steps,
  103. warmup_start_lr,
  104. lr_decay_epochs,
  105. lr_decay_gamma,
  106. num_steps_each_epoch,
  107. reg_coeff=1e-04,
  108. scheduler='Piecewise',
  109. num_epochs=None):
  110. if scheduler.lower() == 'piecewise':
  111. if warmup_steps > 0 and warmup_steps > lr_decay_epochs[
  112. 0] * num_steps_each_epoch:
  113. logging.error(
  114. "In function train(), parameters must satisfy: "
  115. "warmup_steps <= lr_decay_epochs[0] * num_samples_in_train_dataset. "
  116. "See this doc for more information: "
  117. "https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/parameters.md",
  118. exit=False)
  119. logging.error(
  120. "Either `warmup_steps` be less than {} or lr_decay_epochs[0] be greater than {} "
  121. "must be satisfied, please modify 'warmup_steps' or 'lr_decay_epochs' in train function".
  122. format(lr_decay_epochs[0] * num_steps_each_epoch,
  123. warmup_steps // num_steps_each_epoch),
  124. exit=True)
  125. boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
  126. values = [(lr_decay_gamma**i) * learning_rate
  127. for i in range(len(lr_decay_epochs) + 1)]
  128. scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries, values)
  129. elif scheduler.lower() == 'cosine':
  130. if num_epochs is None:
  131. logging.error(
  132. "`num_epochs` must be set while using cosine annealing decay scheduler, but received {}".
  133. format(num_epochs),
  134. exit=False)
  135. if warmup_steps > 0 and warmup_steps > num_epochs * num_steps_each_epoch:
  136. logging.error(
  137. "In function train(), parameters must satisfy: "
  138. "warmup_steps <= num_epochs * num_samples_in_train_dataset. "
  139. "See this doc for more information: "
  140. "https://github.com/PaddlePaddle/PaddleRS/blob/develop/docs/parameters.md",
  141. exit=False)
  142. logging.error(
  143. "`warmup_steps` must be less than the total number of steps({}), "
  144. "please modify 'num_epochs' or 'warmup_steps' in train function".
  145. format(num_epochs * num_steps_each_epoch),
  146. exit=True)
  147. T_max = num_epochs * num_steps_each_epoch - warmup_steps
  148. scheduler = paddle.optimizer.lr.CosineAnnealingDecay(
  149. learning_rate=learning_rate,
  150. T_max=T_max,
  151. eta_min=0.0,
  152. last_epoch=-1)
  153. else:
  154. logging.error(
  155. "Invalid learning rate scheduler: {}!".format(scheduler),
  156. exit=True)
  157. if warmup_steps > 0:
  158. scheduler = paddle.optimizer.lr.LinearWarmup(
  159. learning_rate=scheduler,
  160. warmup_steps=warmup_steps,
  161. start_lr=warmup_start_lr,
  162. end_lr=learning_rate)
  163. optimizer = paddle.optimizer.Momentum(
  164. scheduler,
  165. momentum=.9,
  166. weight_decay=paddle.regularizer.L2Decay(coeff=reg_coeff),
  167. parameters=parameters)
  168. return optimizer
  169. def train(self,
  170. num_epochs,
  171. train_dataset,
  172. train_batch_size=64,
  173. eval_dataset=None,
  174. optimizer=None,
  175. save_interval_epochs=1,
  176. log_interval_steps=10,
  177. save_dir='output',
  178. pretrain_weights='IMAGENET',
  179. learning_rate=.001,
  180. warmup_steps=0,
  181. warmup_start_lr=0.0,
  182. lr_decay_epochs=(216, 243),
  183. lr_decay_gamma=0.1,
  184. metric=None,
  185. use_ema=False,
  186. early_stop=False,
  187. early_stop_patience=5,
  188. use_vdl=True,
  189. resume_checkpoint=None):
  190. """
  191. Train the model.
  192. Args:
  193. num_epochs (int): Number of epochs.
  194. train_dataset (paddlers.datasets.COCODetDataset|paddlers.datasets.VOCDetDataset):
  195. Training dataset.
  196. train_batch_size (int, optional): Total batch size among all cards used in
  197. training. Defaults to 64.
  198. eval_dataset (paddlers.datasets.COCODetDataset|paddlers.datasets.VOCDetDataset|None, optional):
  199. Evaluation dataset. If None, the model will not be evaluated during training
  200. process. Defaults to None.
  201. optimizer (paddle.optimizer.Optimizer|None, optional): Optimizer used for
  202. training. If None, a default optimizer will be used. Defaults to None.
  203. save_interval_epochs (int, optional): Epoch interval for saving the model.
  204. Defaults to 1.
  205. log_interval_steps (int, optional): Step interval for printing training
  206. information. Defaults to 10.
  207. save_dir (str, optional): Directory to save the model. Defaults to 'output'.
  208. pretrain_weights (str|None, optional): None or name/path of pretrained
  209. weights. If None, no pretrained weights will be loaded.
  210. Defaults to 'IMAGENET'.
  211. learning_rate (float, optional): Learning rate for training. Defaults to .001.
  212. warmup_steps (int, optional): Number of steps of warm-up training.
  213. Defaults to 0.
  214. warmup_start_lr (float, optional): Start learning rate of warm-up training.
  215. Defaults to 0..
  216. lr_decay_epochs (list|tuple, optional): Epoch milestones for learning
  217. rate decay. Defaults to (216, 243).
  218. lr_decay_gamma (float, optional): Gamma coefficient of learning rate decay.
  219. Defaults to .1.
  220. metric (str|None, optional): Evaluation metric. Choices are {'VOC', 'COCO', None}.
  221. If None, determine the metric according to the dataset format.
  222. Defaults to None.
  223. use_ema (bool, optional): Whether to use exponential moving average
  224. strategy. Defaults to False.
  225. early_stop (bool, optional): Whether to adopt early stop strategy.
  226. Defaults to False.
  227. early_stop_patience (int, optional): Early stop patience. Defaults to 5.
  228. use_vdl(bool, optional): Whether to use VisualDL to monitor the training
  229. process. Defaults to True.
  230. resume_checkpoint (str|None, optional): Path of the checkpoint to resume
  231. training from. If None, no training checkpoint will be resumed. At most
  232. Aone of `resume_checkpoint` and `pretrain_weights` can be set simultaneously.
  233. Defaults to None.
  234. """
  235. args = self._pre_train(locals())
  236. args.pop('self')
  237. return self._real_train(**args)
  238. def _pre_train(self, in_args):
  239. return in_args
  240. def _real_train(
  241. self, num_epochs, train_dataset, train_batch_size, eval_dataset,
  242. optimizer, save_interval_epochs, log_interval_steps, save_dir,
  243. pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
  244. lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
  245. early_stop_patience, use_vdl, resume_checkpoint):
  246. if self.status == 'Infer':
  247. logging.error(
  248. "Exported inference model does not support training.",
  249. exit=True)
  250. if pretrain_weights is not None and resume_checkpoint is not None:
  251. logging.error(
  252. "`pretrain_weights` and `resume_checkpoint` cannot be set simultaneously.",
  253. exit=True)
  254. if train_dataset.__class__.__name__ == 'VOCDetDataset':
  255. train_dataset.data_fields = {
  256. 'im_id', 'image_shape', 'image', 'gt_bbox', 'gt_class',
  257. 'difficult'
  258. }
  259. elif train_dataset.__class__.__name__ == 'CocoDetection':
  260. if self.__class__.__name__ == 'MaskRCNN':
  261. train_dataset.data_fields = {
  262. 'im_id', 'image_shape', 'image', 'gt_bbox', 'gt_class',
  263. 'gt_poly', 'is_crowd'
  264. }
  265. else:
  266. train_dataset.data_fields = {
  267. 'im_id', 'image_shape', 'image', 'gt_bbox', 'gt_class',
  268. 'is_crowd'
  269. }
  270. if metric is None:
  271. if eval_dataset.__class__.__name__ == 'VOCDetDataset':
  272. self.metric = 'voc'
  273. elif eval_dataset.__class__.__name__ == 'COCODetDataset':
  274. self.metric = 'coco'
  275. else:
  276. assert metric.lower() in ['coco', 'voc'], \
  277. "Evaluation metric {} is not supported. Please choose from 'COCO' and 'VOC'."
  278. self.metric = metric.lower()
  279. self.labels = train_dataset.labels
  280. self.num_max_boxes = train_dataset.num_max_boxes
  281. train_dataset.batch_transforms = self._compose_batch_transform(
  282. train_dataset.transforms, mode='train')
  283. # Build optimizer if not defined
  284. if optimizer is None:
  285. num_steps_each_epoch = len(train_dataset) // train_batch_size
  286. self.optimizer = self.default_optimizer(
  287. parameters=self.net.parameters(),
  288. learning_rate=learning_rate,
  289. warmup_steps=warmup_steps,
  290. warmup_start_lr=warmup_start_lr,
  291. lr_decay_epochs=lr_decay_epochs,
  292. lr_decay_gamma=lr_decay_gamma,
  293. num_steps_each_epoch=num_steps_each_epoch)
  294. else:
  295. self.optimizer = optimizer
  296. # Initiate weights
  297. if pretrain_weights is not None:
  298. if not osp.exists(pretrain_weights):
  299. key = '_'.join([self.model_name, self.backbone_name])
  300. if key not in det_pretrain_weights_dict:
  301. logging.warning(
  302. "Path of pretrained weights ('{}') does not exist!".
  303. format(pretrain_weights))
  304. pretrain_weights = None
  305. elif pretrain_weights not in det_pretrain_weights_dict[key]:
  306. logging.warning(
  307. "Path of pretrained weights ('{}') does not exist!".
  308. format(pretrain_weights))
  309. pretrain_weights = det_pretrain_weights_dict[key][0]
  310. logging.warning(
  311. "`pretrain_weights` is forcibly set to '{}'. "
  312. "If you don't want to use pretrained weights, "
  313. "please set `pretrain_weights` to None.".format(
  314. pretrain_weights))
  315. else:
  316. if osp.splitext(pretrain_weights)[-1] != '.pdparams':
  317. logging.error(
  318. "Invalid pretrained weights. Please specify a .pdparams file.",
  319. exit=True)
  320. pretrained_dir = osp.join(save_dir, 'pretrain')
  321. self.initialize_net(
  322. pretrain_weights=pretrain_weights,
  323. save_dir=pretrained_dir,
  324. resume_checkpoint=resume_checkpoint,
  325. is_backbone_weights=(pretrain_weights == 'IMAGENET' and
  326. 'ESNet_' in self.backbone_name))
  327. if use_ema:
  328. ema = ModelEMA(model=self.net, decay=.9998, use_thres_step=True)
  329. else:
  330. ema = None
  331. # Start train loop
  332. self.train_loop(
  333. num_epochs=num_epochs,
  334. train_dataset=train_dataset,
  335. train_batch_size=train_batch_size,
  336. eval_dataset=eval_dataset,
  337. save_interval_epochs=save_interval_epochs,
  338. log_interval_steps=log_interval_steps,
  339. save_dir=save_dir,
  340. ema=ema,
  341. early_stop=early_stop,
  342. early_stop_patience=early_stop_patience,
  343. use_vdl=use_vdl)
  344. def quant_aware_train(self,
  345. num_epochs,
  346. train_dataset,
  347. train_batch_size=64,
  348. eval_dataset=None,
  349. optimizer=None,
  350. save_interval_epochs=1,
  351. log_interval_steps=10,
  352. save_dir='output',
  353. learning_rate=.00001,
  354. warmup_steps=0,
  355. warmup_start_lr=0.0,
  356. lr_decay_epochs=(216, 243),
  357. lr_decay_gamma=0.1,
  358. metric=None,
  359. use_ema=False,
  360. early_stop=False,
  361. early_stop_patience=5,
  362. use_vdl=True,
  363. resume_checkpoint=None,
  364. quant_config=None):
  365. """
  366. Quantization-aware training.
  367. Args:
  368. num_epochs (int): Number of epochs.
  369. train_dataset (paddlers.datasets.COCODetDataset|paddlers.datasets.VOCDetDataset):
  370. Training dataset.
  371. train_batch_size (int, optional): Total batch size among all cards used in
  372. training. Defaults to 64.
  373. eval_dataset (paddlers.datasets.COCODetDataset|paddlers.datasets.VOCDetDataset|None, optional):
  374. Evaluation dataset. If None, the model will not be evaluated during training
  375. process. Defaults to None.
  376. optimizer (paddle.optimizer.Optimizer or None, optional): Optimizer used for
  377. training. If None, a default optimizer will be used. Defaults to None.
  378. save_interval_epochs (int, optional): Epoch interval for saving the model.
  379. Defaults to 1.
  380. log_interval_steps (int, optional): Step interval for printing training
  381. information. Defaults to 10.
  382. save_dir (str, optional): Directory to save the model. Defaults to 'output'.
  383. learning_rate (float, optional): Learning rate for training.
  384. Defaults to .00001.
  385. warmup_steps (int, optional): Number of steps of warm-up training.
  386. Defaults to 0.
  387. warmup_start_lr (float, optional): Start learning rate of warm-up training.
  388. Defaults to 0..
  389. lr_decay_epochs (list or tuple, optional): Epoch milestones for learning rate
  390. decay. Defaults to (216, 243).
  391. lr_decay_gamma (float, optional): Gamma coefficient of learning rate decay.
  392. Defaults to .1.
  393. metric (str|None, optional): Evaluation metric. Choices are {'VOC', 'COCO', None}.
  394. If None, determine the metric according to the dataset format.
  395. Defaults to None.
  396. use_ema (bool, optional): Whether to use exponential moving average strategy.
  397. Defaults to False.
  398. early_stop (bool, optional): Whether to adopt early stop strategy.
  399. Defaults to False.
  400. early_stop_patience (int, optional): Early stop patience. Defaults to 5.
  401. use_vdl (bool, optional): Whether to use VisualDL to monitor the training
  402. process. Defaults to True.
  403. quant_config (dict or None, optional): Quantization configuration. If None,
  404. a default rule of thumb configuration will be used. Defaults to None.
  405. resume_checkpoint (str|None, optional): Path of the checkpoint to resume
  406. quantization-aware training from. If None, no training checkpoint will
  407. be resumed. Defaults to None.
  408. """
  409. self._prepare_qat(quant_config)
  410. self.train(
  411. num_epochs=num_epochs,
  412. train_dataset=train_dataset,
  413. train_batch_size=train_batch_size,
  414. eval_dataset=eval_dataset,
  415. optimizer=optimizer,
  416. save_interval_epochs=save_interval_epochs,
  417. log_interval_steps=log_interval_steps,
  418. save_dir=save_dir,
  419. pretrain_weights=None,
  420. learning_rate=learning_rate,
  421. warmup_steps=warmup_steps,
  422. warmup_start_lr=warmup_start_lr,
  423. lr_decay_epochs=lr_decay_epochs,
  424. lr_decay_gamma=lr_decay_gamma,
  425. metric=metric,
  426. use_ema=use_ema,
  427. early_stop=early_stop,
  428. early_stop_patience=early_stop_patience,
  429. use_vdl=use_vdl,
  430. resume_checkpoint=resume_checkpoint)
  431. def evaluate(self,
  432. eval_dataset,
  433. batch_size=1,
  434. metric=None,
  435. return_details=False):
  436. """
  437. Evaluate the model.
  438. Args:
  439. eval_dataset (paddlers.datasets.COCODetDataset|paddlers.datasets.VOCDetDataset):
  440. Evaluation dataset.
  441. batch_size (int, optional): Total batch size among all cards used for
  442. evaluation. Defaults to 1.
  443. metric (str|None, optional): Evaluation metric. Choices are {'VOC', 'COCO', None}.
  444. If None, determine the metric according to the dataset format.
  445. Defaults to None.
  446. return_details (bool, optional): Whether to return evaluation details.
  447. Defaults to False.
  448. Returns:
  449. If `return_details` is False, return collections.OrderedDict with key-value pairs:
  450. {"bbox_mmap": mean average precision (0.50, 11point)}.
  451. """
  452. if metric is None:
  453. if not hasattr(self, 'metric'):
  454. if eval_dataset.__class__.__name__ == 'VOCDetDataset':
  455. self.metric = 'voc'
  456. elif eval_dataset.__class__.__name__ == 'COCODetDataset':
  457. self.metric = 'coco'
  458. else:
  459. assert metric.lower() in ['coco', 'voc'], \
  460. "Evaluation metric {} is not supported. Please choose from 'COCO' and 'VOC'."
  461. self.metric = metric.lower()
  462. if self.metric == 'voc':
  463. eval_dataset.data_fields = {
  464. 'im_id', 'image_shape', 'image', 'gt_bbox', 'gt_class',
  465. 'difficult'
  466. }
  467. elif self.metric == 'coco':
  468. if self.__class__.__name__ == 'MaskRCNN':
  469. eval_dataset.data_fields = {
  470. 'im_id', 'image_shape', 'image', 'gt_bbox', 'gt_class',
  471. 'gt_poly', 'is_crowd'
  472. }
  473. else:
  474. eval_dataset.data_fields = {
  475. 'im_id', 'image_shape', 'image', 'gt_bbox', 'gt_class',
  476. 'is_crowd'
  477. }
  478. eval_dataset.batch_transforms = self._compose_batch_transform(
  479. eval_dataset.transforms, mode='eval')
  480. self._check_transforms(eval_dataset.transforms, 'eval')
  481. self.net.eval()
  482. nranks = paddle.distributed.get_world_size()
  483. local_rank = paddle.distributed.get_rank()
  484. if nranks > 1:
  485. # Initialize parallel environment if not done.
  486. if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
  487. ):
  488. paddle.distributed.init_parallel_env()
  489. if batch_size > 1:
  490. logging.warning(
  491. "Detector only supports single card evaluation with batch_size=1 "
  492. "during evaluation, so batch_size is forcibly set to 1.")
  493. batch_size = 1
  494. if nranks < 2 or local_rank == 0:
  495. self.eval_data_loader = self.build_data_loader(
  496. eval_dataset, batch_size=batch_size, mode='eval')
  497. is_bbox_normalized = False
  498. if eval_dataset.batch_transforms is not None:
  499. is_bbox_normalized = any(
  500. isinstance(t, _NormalizeBox)
  501. for t in eval_dataset.batch_transforms.batch_transforms)
  502. if self.metric == 'voc':
  503. eval_metric = VOCMetric(
  504. labels=eval_dataset.labels,
  505. coco_gt=copy.deepcopy(eval_dataset.coco_gt),
  506. is_bbox_normalized=is_bbox_normalized,
  507. classwise=False)
  508. else:
  509. eval_metric = COCOMetric(
  510. coco_gt=copy.deepcopy(eval_dataset.coco_gt),
  511. classwise=False)
  512. scores = collections.OrderedDict()
  513. logging.info(
  514. "Start to evaluate(total_samples={}, total_steps={})...".format(
  515. eval_dataset.num_samples, eval_dataset.num_samples))
  516. with paddle.no_grad():
  517. for step, data in enumerate(self.eval_data_loader):
  518. outputs = self.run(self.net, data, 'eval')
  519. eval_metric.update(data, outputs)
  520. eval_metric.accumulate()
  521. self.eval_details = eval_metric.details
  522. scores.update(eval_metric.get())
  523. eval_metric.reset()
  524. if return_details:
  525. return scores, self.eval_details
  526. return scores
  527. def predict(self, img_file, transforms=None):
  528. """
  529. Do inference.
  530. Args:
  531. img_file (list[np.ndarray|str] | str | np.ndarray): Image path or decoded
  532. image data, which also could constitute a list, meaning all images to be
  533. predicted as a mini-batch.
  534. transforms (paddlers.transforms.Compose|None, optional): Transforms for
  535. inputs. If None, the transforms for evaluation process will be used.
  536. Defaults to None.
  537. Returns:
  538. If `img_file` is a string or np.array, the result is a list of dict with
  539. the following key-value pairs:
  540. category_id (int): Predicted category ID. 0 represents the first
  541. category in the dataset, and so on.
  542. category (str): Category name.
  543. bbox (list): Bounding box in [x, y, w, h] format.
  544. score (str): Confidence.
  545. mask (dict): Only for instance segmentation task. Mask of the object in
  546. RLE format.
  547. If `img_file` is a list, the result is a list composed of list of dicts
  548. with the above keys.
  549. """
  550. if transforms is None and not hasattr(self, 'test_transforms'):
  551. raise ValueError("transforms need to be defined, now is None.")
  552. if transforms is None:
  553. transforms = self.test_transforms
  554. if isinstance(img_file, (str, np.ndarray)):
  555. images = [img_file]
  556. else:
  557. images = img_file
  558. batch_samples = self.preprocess(images, transforms)
  559. self.net.eval()
  560. outputs = self.run(self.net, batch_samples, 'test')
  561. prediction = self.postprocess(outputs)
  562. if isinstance(img_file, (str, np.ndarray)):
  563. prediction = prediction[0]
  564. return prediction
  565. def preprocess(self, images, transforms, to_tensor=True):
  566. self._check_transforms(transforms, 'test')
  567. batch_samples = list()
  568. for im in images:
  569. if isinstance(im, str):
  570. im = decode_image(im, read_raw=True)
  571. sample = {'image': im}
  572. sample = transforms(sample)
  573. batch_samples.append(sample)
  574. batch_transforms = self._compose_batch_transform(transforms, 'test')
  575. batch_samples = batch_transforms(batch_samples)
  576. if to_tensor:
  577. for k in batch_samples:
  578. batch_samples[k] = paddle.to_tensor(batch_samples[k])
  579. return batch_samples
  580. def postprocess(self, batch_pred):
  581. infer_result = {}
  582. if 'bbox' in batch_pred:
  583. bboxes = batch_pred['bbox']
  584. bbox_nums = batch_pred['bbox_num']
  585. det_res = []
  586. k = 0
  587. for i in range(len(bbox_nums)):
  588. det_nums = bbox_nums[i]
  589. for j in range(det_nums):
  590. dt = bboxes[k]
  591. k = k + 1
  592. num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
  593. if int(num_id) < 0:
  594. continue
  595. category = self.labels[int(num_id)]
  596. w = xmax - xmin
  597. h = ymax - ymin
  598. bbox = [xmin, ymin, w, h]
  599. dt_res = {
  600. 'category_id': int(num_id),
  601. 'category': category,
  602. 'bbox': bbox,
  603. 'score': score
  604. }
  605. det_res.append(dt_res)
  606. infer_result['bbox'] = det_res
  607. if 'mask' in batch_pred:
  608. masks = batch_pred['mask']
  609. bboxes = batch_pred['bbox']
  610. mask_nums = batch_pred['bbox_num']
  611. seg_res = []
  612. k = 0
  613. for i in range(len(mask_nums)):
  614. det_nums = mask_nums[i]
  615. for j in range(det_nums):
  616. mask = masks[k].astype(np.uint8)
  617. score = float(bboxes[k][1])
  618. label = int(bboxes[k][0])
  619. k = k + 1
  620. if label == -1:
  621. continue
  622. category = self.labels[int(label)]
  623. sg_res = {
  624. 'category_id': int(label),
  625. 'category': category,
  626. 'mask': mask.astype('uint8'),
  627. 'score': score
  628. }
  629. seg_res.append(sg_res)
  630. infer_result['mask'] = seg_res
  631. bbox_num = batch_pred['bbox_num']
  632. results = []
  633. start = 0
  634. for num in bbox_num:
  635. end = start + num
  636. curr_res = infer_result['bbox'][start:end]
  637. if 'mask' in infer_result:
  638. mask_res = infer_result['mask'][start:end]
  639. for box, mask in zip(curr_res, mask_res):
  640. box.update(mask)
  641. results.append(curr_res)
  642. start = end
  643. return results
  644. def _check_transforms(self, transforms, mode):
  645. super()._check_transforms(transforms, mode)
  646. if not isinstance(transforms.arrange,
  647. paddlers.transforms.ArrangeDetector):
  648. raise TypeError(
  649. "`transforms.arrange` must be an ArrangeDetector object.")
  650. class PicoDet(BaseDetector):
  651. def __init__(self,
  652. num_classes=80,
  653. backbone='ESNet_m',
  654. nms_score_threshold=.025,
  655. nms_topk=1000,
  656. nms_keep_topk=100,
  657. nms_iou_threshold=.6,
  658. **params):
  659. self.init_params = locals()
  660. if backbone not in {
  661. 'ESNet_s', 'ESNet_m', 'ESNet_l', 'LCNet', 'MobileNetV3',
  662. 'ResNet18_vd'
  663. }:
  664. raise ValueError(
  665. "backbone: {} is not supported. Please choose one of "
  666. "{'ESNet_s', 'ESNet_m', 'ESNet_l', 'LCNet', 'MobileNetV3', 'ResNet18_vd'}.".
  667. format(backbone))
  668. self.backbone_name = backbone
  669. if params.get('with_net', True):
  670. if backbone == 'ESNet_s':
  671. backbone = self._get_backbone(
  672. 'ESNet',
  673. scale=.75,
  674. feature_maps=[4, 11, 14],
  675. act="hard_swish",
  676. channel_ratio=[
  677. 0.875, 0.5, 0.5, 0.5, 0.625, 0.5, 0.625, 0.5, 0.5, 0.5,
  678. 0.5, 0.5, 0.5
  679. ])
  680. neck_out_channels = 96
  681. head_num_convs = 2
  682. elif backbone == 'ESNet_m':
  683. backbone = self._get_backbone(
  684. 'ESNet',
  685. scale=1.0,
  686. feature_maps=[4, 11, 14],
  687. act="hard_swish",
  688. channel_ratio=[
  689. 0.875, 0.5, 1.0, 0.625, 0.5, 0.75, 0.625, 0.625, 0.5,
  690. 0.625, 1.0, 0.625, 0.75
  691. ])
  692. neck_out_channels = 128
  693. head_num_convs = 4
  694. elif backbone == 'ESNet_l':
  695. backbone = self._get_backbone(
  696. 'ESNet',
  697. scale=1.25,
  698. feature_maps=[4, 11, 14],
  699. act="hard_swish",
  700. channel_ratio=[
  701. 0.875, 0.5, 1.0, 0.625, 0.5, 0.75, 0.625, 0.625, 0.5,
  702. 0.625, 1.0, 0.625, 0.75
  703. ])
  704. neck_out_channels = 160
  705. head_num_convs = 4
  706. elif backbone == 'LCNet':
  707. backbone = self._get_backbone(
  708. 'LCNet', scale=1.5, feature_maps=[3, 4, 5])
  709. neck_out_channels = 128
  710. head_num_convs = 4
  711. elif backbone == 'MobileNetV3':
  712. backbone = self._get_backbone(
  713. 'MobileNetV3',
  714. scale=1.0,
  715. with_extra_blocks=False,
  716. extra_block_filters=[],
  717. feature_maps=[7, 13, 16])
  718. neck_out_channels = 128
  719. head_num_convs = 4
  720. else:
  721. backbone = self._get_backbone(
  722. 'ResNet',
  723. depth=18,
  724. variant='d',
  725. return_idx=[1, 2, 3],
  726. freeze_at=-1,
  727. freeze_norm=False,
  728. norm_decay=0.)
  729. neck_out_channels = 128
  730. head_num_convs = 4
  731. neck = ppdet.modeling.CSPPAN(
  732. in_channels=[i.channels for i in backbone.out_shape],
  733. out_channels=neck_out_channels,
  734. num_features=4,
  735. num_csp_blocks=1,
  736. use_depthwise=True)
  737. head_conv_feat = ppdet.modeling.PicoFeat(
  738. feat_in=neck_out_channels,
  739. feat_out=neck_out_channels,
  740. num_fpn_stride=4,
  741. num_convs=head_num_convs,
  742. norm_type='bn',
  743. share_cls_reg=True, )
  744. loss_class = ppdet.modeling.VarifocalLoss(
  745. use_sigmoid=True, iou_weighted=True, loss_weight=1.0)
  746. loss_dfl = ppdet.modeling.DistributionFocalLoss(loss_weight=.25)
  747. loss_bbox = ppdet.modeling.GIoULoss(loss_weight=2.0)
  748. assigner = ppdet.modeling.SimOTAAssigner(
  749. candidate_topk=10, iou_weight=6, num_classes=num_classes)
  750. nms = ppdet.modeling.MultiClassNMS(
  751. nms_top_k=nms_topk,
  752. keep_top_k=nms_keep_topk,
  753. score_threshold=nms_score_threshold,
  754. nms_threshold=nms_iou_threshold)
  755. head = ppdet.modeling.PicoHead(
  756. conv_feat=head_conv_feat,
  757. num_classes=num_classes,
  758. fpn_stride=[8, 16, 32, 64],
  759. prior_prob=0.01,
  760. reg_max=7,
  761. cell_offset=.5,
  762. loss_class=loss_class,
  763. loss_dfl=loss_dfl,
  764. loss_bbox=loss_bbox,
  765. assigner=assigner,
  766. feat_in_chan=neck_out_channels,
  767. nms=nms)
  768. params.update({
  769. 'backbone': backbone,
  770. 'neck': neck,
  771. 'head': head,
  772. })
  773. super(PicoDet, self).__init__(
  774. model_name='PicoDet', num_classes=num_classes, **params)
  775. def _compose_batch_transform(self, transforms, mode='train'):
  776. default_batch_transforms = [_BatchPad(pad_to_stride=32)]
  777. if mode == 'eval':
  778. collate_batch = True
  779. else:
  780. collate_batch = False
  781. custom_batch_transforms = []
  782. for i, op in enumerate(transforms.transforms):
  783. if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
  784. if mode != 'train':
  785. raise ValueError(
  786. "{} cannot be present in the {} transforms.".format(
  787. op.__class__.__name__, mode) +
  788. "Please check the {} transforms.".format(mode))
  789. custom_batch_transforms.insert(0, copy.deepcopy(op))
  790. batch_transforms = BatchCompose(
  791. custom_batch_transforms + default_batch_transforms,
  792. collate_batch=collate_batch)
  793. return batch_transforms
  794. def _fix_transforms_shape(self, image_shape):
  795. if getattr(self, 'test_transforms', None):
  796. has_resize_op = False
  797. resize_op_idx = -1
  798. normalize_op_idx = len(self.test_transforms.transforms)
  799. for idx, op in enumerate(self.test_transforms.transforms):
  800. name = op.__class__.__name__
  801. if name == 'Resize':
  802. has_resize_op = True
  803. resize_op_idx = idx
  804. if name == 'Normalize':
  805. normalize_op_idx = idx
  806. if not has_resize_op:
  807. self.test_transforms.transforms.insert(
  808. normalize_op_idx,
  809. Resize(
  810. target_size=image_shape, interp='CUBIC'))
  811. else:
  812. self.test_transforms.transforms[
  813. resize_op_idx].target_size = image_shape
  814. def _get_test_inputs(self, image_shape):
  815. if image_shape is not None:
  816. image_shape = self._check_image_shape(image_shape)
  817. self._fix_transforms_shape(image_shape[-2:])
  818. else:
  819. image_shape = [None, 3, 320, 320]
  820. if getattr(self, 'test_transforms', None):
  821. for idx, op in enumerate(self.test_transforms.transforms):
  822. name = op.__class__.__name__
  823. if name == 'Resize':
  824. image_shape = [None, 3] + list(
  825. self.test_transforms.transforms[idx].target_size)
  826. logging.warning(
  827. '[Important!!!] When exporting inference model for {}, '
  828. 'if fixed_input_shape is not set, it will be forcibly set to {}. '
  829. 'Please ensure image shape after transforms is {}, if not, '
  830. 'fixed_input_shape should be specified manually.'
  831. .format(self.__class__.__name__, image_shape, image_shape[1:]))
  832. self.fixed_input_shape = image_shape
  833. return self._define_input_spec(image_shape)
  834. def _pre_train(self, in_args):
  835. optimizer = in_args['optimizer']
  836. if optimizer is None:
  837. num_steps_each_epoch = len(in_args['train_dataset']) // in_args[
  838. 'train_batch_size']
  839. optimizer = self.default_optimizer(
  840. parameters=self.net.parameters(),
  841. learning_rate=in_args['learning_rate'],
  842. warmup_steps=in_args['warmup_steps'],
  843. warmup_start_lr=in_args['warmup_start_lr'],
  844. lr_decay_epochs=in_args['lr_decay_epochs'],
  845. lr_decay_gamma=in_args['lr_decay_gamma'],
  846. num_steps_each_epoch=in_args['num_steps_each_epoch'],
  847. reg_coeff=4e-05,
  848. scheduler='Cosine',
  849. num_epochs=in_args['num_epochs'])
  850. in_args['optimizer'] = optimizer
  851. return in_args
  852. def build_data_loader(self, dataset, batch_size, mode='train'):
  853. if dataset.num_samples < batch_size:
  854. raise ValueError(
  855. 'The volume of dataset({}) must be larger than batch size({}).'
  856. .format(dataset.num_samples, batch_size))
  857. if mode != 'train':
  858. return paddle.io.DataLoader(
  859. dataset,
  860. batch_size=batch_size,
  861. shuffle=dataset.shuffle,
  862. drop_last=False,
  863. collate_fn=dataset.batch_transforms,
  864. num_workers=dataset.num_workers,
  865. return_list=True,
  866. use_shared_memory=False)
  867. else:
  868. return super(BaseDetector, self).build_data_loader(dataset,
  869. batch_size, mode)
  870. class YOLOv3(BaseDetector):
  871. def __init__(self,
  872. num_classes=80,
  873. backbone='MobileNetV1',
  874. anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
  875. [59, 119], [116, 90], [156, 198], [373, 326]],
  876. anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
  877. ignore_threshold=0.7,
  878. nms_score_threshold=0.01,
  879. nms_topk=1000,
  880. nms_keep_topk=100,
  881. nms_iou_threshold=0.45,
  882. label_smooth=False,
  883. **params):
  884. self.init_params = locals()
  885. if backbone not in {
  886. 'MobileNetV1', 'MobileNetV1_ssld', 'MobileNetV3',
  887. 'MobileNetV3_ssld', 'DarkNet53', 'ResNet50_vd_dcn', 'ResNet34'
  888. }:
  889. raise ValueError(
  890. "backbone: {} is not supported. Please choose one of "
  891. "{'MobileNetV1', 'MobileNetV1_ssld', 'MobileNetV3', 'MobileNetV3_ssld', 'DarkNet53', "
  892. "'ResNet50_vd_dcn', 'ResNet34'}.".format(backbone))
  893. self.backbone_name = backbone
  894. if params.get('with_net', True):
  895. if paddlers.env_info['place'] == 'gpu' and paddlers.env_info[
  896. 'num'] > 1 and not os.environ.get('PADDLERS_EXPORT_STAGE'):
  897. norm_type = 'sync_bn'
  898. else:
  899. norm_type = 'bn'
  900. if 'MobileNetV1' in backbone:
  901. norm_type = 'bn'
  902. backbone = self._get_backbone('MobileNet', norm_type=norm_type)
  903. elif 'MobileNetV3' in backbone:
  904. backbone = self._get_backbone(
  905. 'MobileNetV3',
  906. norm_type=norm_type,
  907. feature_maps=[7, 13, 16])
  908. elif backbone == 'ResNet50_vd_dcn':
  909. backbone = self._get_backbone(
  910. 'ResNet',
  911. norm_type=norm_type,
  912. variant='d',
  913. return_idx=[1, 2, 3],
  914. dcn_v2_stages=[3],
  915. freeze_at=-1,
  916. freeze_norm=False)
  917. elif backbone == 'ResNet34':
  918. backbone = self._get_backbone(
  919. 'ResNet',
  920. depth=34,
  921. norm_type=norm_type,
  922. return_idx=[1, 2, 3],
  923. freeze_at=-1,
  924. freeze_norm=False,
  925. norm_decay=0.)
  926. else:
  927. backbone = self._get_backbone('DarkNet', norm_type=norm_type)
  928. neck = ppdet.modeling.YOLOv3FPN(
  929. norm_type=norm_type,
  930. in_channels=[i.channels for i in backbone.out_shape])
  931. loss = ppdet.modeling.YOLOv3Loss(
  932. num_classes=num_classes,
  933. ignore_thresh=ignore_threshold,
  934. label_smooth=label_smooth)
  935. yolo_head = ppdet.modeling.YOLOv3Head(
  936. in_channels=[i.channels for i in neck.out_shape],
  937. anchors=anchors,
  938. anchor_masks=anchor_masks,
  939. num_classes=num_classes,
  940. loss=loss)
  941. post_process = ppdet.modeling.BBoxPostProcess(
  942. decode=ppdet.modeling.YOLOBox(num_classes=num_classes),
  943. nms=ppdet.modeling.MultiClassNMS(
  944. score_threshold=nms_score_threshold,
  945. nms_top_k=nms_topk,
  946. keep_top_k=nms_keep_topk,
  947. nms_threshold=nms_iou_threshold))
  948. params.update({
  949. 'backbone': backbone,
  950. 'neck': neck,
  951. 'yolo_head': yolo_head,
  952. 'post_process': post_process
  953. })
  954. super(YOLOv3, self).__init__(
  955. model_name='YOLOv3', num_classes=num_classes, **params)
  956. self.anchors = anchors
  957. self.anchor_masks = anchor_masks
  958. def _compose_batch_transform(self, transforms, mode='train'):
  959. if mode == 'train':
  960. default_batch_transforms = [
  961. _BatchPad(pad_to_stride=-1), _NormalizeBox(),
  962. _PadBox(getattr(self, 'num_max_boxes', 50)), _BboxXYXY2XYWH(),
  963. _Gt2YoloTarget(
  964. anchor_masks=self.anchor_masks,
  965. anchors=self.anchors,
  966. downsample_ratios=getattr(self, 'downsample_ratios',
  967. [32, 16, 8]),
  968. num_classes=self.num_classes)
  969. ]
  970. else:
  971. default_batch_transforms = [_BatchPad(pad_to_stride=-1)]
  972. if mode == 'eval' and self.metric == 'voc':
  973. collate_batch = False
  974. else:
  975. collate_batch = True
  976. custom_batch_transforms = []
  977. for i, op in enumerate(transforms.transforms):
  978. if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
  979. if mode != 'train':
  980. raise ValueError(
  981. "{} cannot be present in the {} transforms. ".format(
  982. op.__class__.__name__, mode) +
  983. "Please check the {} transforms.".format(mode))
  984. custom_batch_transforms.insert(0, copy.deepcopy(op))
  985. batch_transforms = BatchCompose(
  986. custom_batch_transforms + default_batch_transforms,
  987. collate_batch=collate_batch)
  988. return batch_transforms
  989. def _fix_transforms_shape(self, image_shape):
  990. if getattr(self, 'test_transforms', None):
  991. has_resize_op = False
  992. resize_op_idx = -1
  993. normalize_op_idx = len(self.test_transforms.transforms)
  994. for idx, op in enumerate(self.test_transforms.transforms):
  995. name = op.__class__.__name__
  996. if name == 'Resize':
  997. has_resize_op = True
  998. resize_op_idx = idx
  999. if name == 'Normalize':
  1000. normalize_op_idx = idx
  1001. if not has_resize_op:
  1002. self.test_transforms.transforms.insert(
  1003. normalize_op_idx,
  1004. Resize(
  1005. target_size=image_shape, interp='CUBIC'))
  1006. else:
  1007. self.test_transforms.transforms[
  1008. resize_op_idx].target_size = image_shape
  1009. class FasterRCNN(BaseDetector):
  1010. def __init__(self,
  1011. num_classes=80,
  1012. backbone='ResNet50',
  1013. with_fpn=True,
  1014. with_dcn=False,
  1015. aspect_ratios=[0.5, 1.0, 2.0],
  1016. anchor_sizes=[[32], [64], [128], [256], [512]],
  1017. keep_top_k=100,
  1018. nms_threshold=0.5,
  1019. score_threshold=0.05,
  1020. fpn_num_channels=256,
  1021. rpn_batch_size_per_im=256,
  1022. rpn_fg_fraction=0.5,
  1023. test_pre_nms_top_n=None,
  1024. test_post_nms_top_n=1000,
  1025. **params):
  1026. self.init_params = locals()
  1027. if backbone not in {
  1028. 'ResNet50', 'ResNet50_vd', 'ResNet50_vd_ssld', 'ResNet34',
  1029. 'ResNet34_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18'
  1030. }:
  1031. raise ValueError(
  1032. "backbone: {} is not supported. Please choose one of "
  1033. "{'ResNet50', 'ResNet50_vd', 'ResNet50_vd_ssld', 'ResNet34', 'ResNet34_vd', "
  1034. "'ResNet101', 'ResNet101_vd', 'HRNet_W18'}.".format(backbone))
  1035. self.backbone_name = backbone
  1036. if params.get('with_net', True):
  1037. dcn_v2_stages = [1, 2, 3] if with_dcn else [-1]
  1038. if backbone == 'HRNet_W18':
  1039. if not with_fpn:
  1040. logging.warning(
  1041. "Backbone {} should be used along with fpn enabled, 'with_fpn' is forcibly set to True".
  1042. format(backbone))
  1043. with_fpn = True
  1044. if with_dcn:
  1045. logging.warning(
  1046. "Backbone {} should be used along with dcn disabled, 'with_dcn' is forcibly set to False".
  1047. format(backbone))
  1048. backbone = self._get_backbone(
  1049. 'HRNet', width=18, freeze_at=0, return_idx=[0, 1, 2, 3])
  1050. elif backbone == 'ResNet50_vd_ssld':
  1051. if not with_fpn:
  1052. logging.warning(
  1053. "Backbone {} should be used along with fpn enabled, 'with_fpn' is forcibly set to True".
  1054. format(backbone))
  1055. with_fpn = True
  1056. backbone = self._get_backbone(
  1057. 'ResNet',
  1058. variant='d',
  1059. norm_type='bn',
  1060. freeze_at=0,
  1061. return_idx=[0, 1, 2, 3],
  1062. num_stages=4,
  1063. lr_mult_list=[0.05, 0.05, 0.1, 0.15],
  1064. dcn_v2_stages=dcn_v2_stages)
  1065. elif 'ResNet50' in backbone:
  1066. if with_fpn:
  1067. backbone = self._get_backbone(
  1068. 'ResNet',
  1069. variant='d' if '_vd' in backbone else 'b',
  1070. norm_type='bn',
  1071. freeze_at=0,
  1072. return_idx=[0, 1, 2, 3],
  1073. num_stages=4,
  1074. dcn_v2_stages=dcn_v2_stages)
  1075. else:
  1076. if with_dcn:
  1077. logging.warning(
  1078. "Backbone {} without fpn should be used along with dcn disabled, 'with_dcn' is forcibly set to False".
  1079. format(backbone))
  1080. backbone = self._get_backbone(
  1081. 'ResNet',
  1082. variant='d' if '_vd' in backbone else 'b',
  1083. norm_type='bn',
  1084. freeze_at=0,
  1085. return_idx=[2],
  1086. num_stages=3)
  1087. elif 'ResNet34' in backbone:
  1088. if not with_fpn:
  1089. logging.warning(
  1090. "Backbone {} should be used along with fpn enabled, 'with_fpn' is forcibly set to True".
  1091. format(backbone))
  1092. with_fpn = True
  1093. backbone = self._get_backbone(
  1094. 'ResNet',
  1095. depth=34,
  1096. variant='d' if 'vd' in backbone else 'b',
  1097. norm_type='bn',
  1098. freeze_at=0,
  1099. return_idx=[0, 1, 2, 3],
  1100. num_stages=4,
  1101. dcn_v2_stages=dcn_v2_stages)
  1102. else:
  1103. if not with_fpn:
  1104. logging.warning(
  1105. "Backbone {} should be used along with fpn enabled, 'with_fpn' is forcibly set to True".
  1106. format(backbone))
  1107. with_fpn = True
  1108. backbone = self._get_backbone(
  1109. 'ResNet',
  1110. depth=101,
  1111. variant='d' if 'vd' in backbone else 'b',
  1112. norm_type='bn',
  1113. freeze_at=0,
  1114. return_idx=[0, 1, 2, 3],
  1115. num_stages=4,
  1116. dcn_v2_stages=dcn_v2_stages)
  1117. rpn_in_channel = backbone.out_shape[0].channels
  1118. if with_fpn:
  1119. self.backbone_name = self.backbone_name + '_fpn'
  1120. if 'HRNet' in self.backbone_name:
  1121. neck = ppdet.modeling.HRFPN(
  1122. in_channels=[i.channels for i in backbone.out_shape],
  1123. out_channel=fpn_num_channels,
  1124. spatial_scales=[
  1125. 1.0 / i.stride for i in backbone.out_shape
  1126. ],
  1127. share_conv=False)
  1128. else:
  1129. neck = ppdet.modeling.FPN(
  1130. in_channels=[i.channels for i in backbone.out_shape],
  1131. out_channel=fpn_num_channels,
  1132. spatial_scales=[
  1133. 1.0 / i.stride for i in backbone.out_shape
  1134. ])
  1135. rpn_in_channel = neck.out_shape[0].channels
  1136. anchor_generator_cfg = {
  1137. 'aspect_ratios': aspect_ratios,
  1138. 'anchor_sizes': anchor_sizes,
  1139. 'strides': [4, 8, 16, 32, 64]
  1140. }
  1141. train_proposal_cfg = {
  1142. 'min_size': 0.0,
  1143. 'nms_thresh': .7,
  1144. 'pre_nms_top_n': 2000,
  1145. 'post_nms_top_n': 1000,
  1146. 'topk_after_collect': True
  1147. }
  1148. test_proposal_cfg = {
  1149. 'min_size': 0.0,
  1150. 'nms_thresh': .7,
  1151. 'pre_nms_top_n': 1000
  1152. if test_pre_nms_top_n is None else test_pre_nms_top_n,
  1153. 'post_nms_top_n': test_post_nms_top_n
  1154. }
  1155. head = ppdet.modeling.TwoFCHead(
  1156. in_channel=neck.out_shape[0].channels, out_channel=1024)
  1157. roi_extractor_cfg = {
  1158. 'resolution': 7,
  1159. 'spatial_scale': [1. / i.stride for i in neck.out_shape],
  1160. 'sampling_ratio': 0,
  1161. 'aligned': True
  1162. }
  1163. with_pool = False
  1164. else:
  1165. neck = None
  1166. anchor_generator_cfg = {
  1167. 'aspect_ratios': aspect_ratios,
  1168. 'anchor_sizes': anchor_sizes,
  1169. 'strides': [16]
  1170. }
  1171. train_proposal_cfg = {
  1172. 'min_size': 0.0,
  1173. 'nms_thresh': .7,
  1174. 'pre_nms_top_n': 12000,
  1175. 'post_nms_top_n': 2000,
  1176. 'topk_after_collect': False
  1177. }
  1178. test_proposal_cfg = {
  1179. 'min_size': 0.0,
  1180. 'nms_thresh': .7,
  1181. 'pre_nms_top_n': 6000
  1182. if test_pre_nms_top_n is None else test_pre_nms_top_n,
  1183. 'post_nms_top_n': test_post_nms_top_n
  1184. }
  1185. head = ppdet.modeling.Res5Head()
  1186. roi_extractor_cfg = {
  1187. 'resolution': 14,
  1188. 'spatial_scale':
  1189. [1. / i.stride for i in backbone.out_shape],
  1190. 'sampling_ratio': 0,
  1191. 'aligned': True
  1192. }
  1193. with_pool = True
  1194. rpn_target_assign_cfg = {
  1195. 'batch_size_per_im': rpn_batch_size_per_im,
  1196. 'fg_fraction': rpn_fg_fraction,
  1197. 'negative_overlap': .3,
  1198. 'positive_overlap': .7,
  1199. 'use_random': True
  1200. }
  1201. rpn_head = ppdet.modeling.RPNHead(
  1202. anchor_generator=anchor_generator_cfg,
  1203. rpn_target_assign=rpn_target_assign_cfg,
  1204. train_proposal=train_proposal_cfg,
  1205. test_proposal=test_proposal_cfg,
  1206. in_channel=rpn_in_channel)
  1207. bbox_assigner = BBoxAssigner(num_classes=num_classes)
  1208. bbox_head = ppdet.modeling.BBoxHead(
  1209. head=head,
  1210. in_channel=head.out_shape[0].channels,
  1211. roi_extractor=roi_extractor_cfg,
  1212. with_pool=with_pool,
  1213. bbox_assigner=bbox_assigner,
  1214. num_classes=num_classes)
  1215. bbox_post_process = ppdet.modeling.BBoxPostProcess(
  1216. num_classes=num_classes,
  1217. decode=ppdet.modeling.RCNNBox(num_classes=num_classes),
  1218. nms=ppdet.modeling.MultiClassNMS(
  1219. score_threshold=score_threshold,
  1220. keep_top_k=keep_top_k,
  1221. nms_threshold=nms_threshold))
  1222. params.update({
  1223. 'backbone': backbone,
  1224. 'neck': neck,
  1225. 'rpn_head': rpn_head,
  1226. 'bbox_head': bbox_head,
  1227. 'bbox_post_process': bbox_post_process
  1228. })
  1229. else:
  1230. if backbone not in {'ResNet50', 'ResNet50_vd'}:
  1231. with_fpn = True
  1232. self.with_fpn = with_fpn
  1233. super(FasterRCNN, self).__init__(
  1234. model_name='FasterRCNN', num_classes=num_classes, **params)
  1235. def _pre_train(self, in_args):
  1236. train_dataset = in_args['train_dataset']
  1237. if train_dataset.pos_num < len(train_dataset.file_list):
  1238. # In-place modification
  1239. train_dataset.num_workers = 0
  1240. return in_args
  1241. def _compose_batch_transform(self, transforms, mode='train'):
  1242. if mode == 'train':
  1243. default_batch_transforms = [
  1244. _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
  1245. ]
  1246. else:
  1247. default_batch_transforms = [
  1248. _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
  1249. ]
  1250. custom_batch_transforms = []
  1251. for i, op in enumerate(transforms.transforms):
  1252. if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
  1253. if mode != 'train':
  1254. raise ValueError(
  1255. "{} cannot be present in the {} transforms. ".format(
  1256. op.__class__.__name__, mode) +
  1257. "Please check the {} transforms.".format(mode))
  1258. custom_batch_transforms.insert(0, copy.deepcopy(op))
  1259. batch_transforms = BatchCompose(
  1260. custom_batch_transforms + default_batch_transforms,
  1261. collate_batch=False)
  1262. return batch_transforms
  1263. def _fix_transforms_shape(self, image_shape):
  1264. if getattr(self, 'test_transforms', None):
  1265. has_resize_op = False
  1266. resize_op_idx = -1
  1267. normalize_op_idx = len(self.test_transforms.transforms)
  1268. for idx, op in enumerate(self.test_transforms.transforms):
  1269. name = op.__class__.__name__
  1270. if name == 'ResizeByShort':
  1271. has_resize_op = True
  1272. resize_op_idx = idx
  1273. if name == 'Normalize':
  1274. normalize_op_idx = idx
  1275. if not has_resize_op:
  1276. self.test_transforms.transforms.insert(
  1277. normalize_op_idx,
  1278. Resize(
  1279. target_size=image_shape,
  1280. keep_ratio=True,
  1281. interp='CUBIC'))
  1282. else:
  1283. self.test_transforms.transforms[resize_op_idx] = Resize(
  1284. target_size=image_shape, keep_ratio=True, interp='CUBIC')
  1285. self.test_transforms.transforms.append(
  1286. Pad(im_padding_value=[0., 0., 0.]))
  1287. def _get_test_inputs(self, image_shape):
  1288. if image_shape is not None:
  1289. image_shape = self._check_image_shape(image_shape)
  1290. self._fix_transforms_shape(image_shape[-2:])
  1291. else:
  1292. image_shape = [None, 3, -1, -1]
  1293. if self.with_fpn:
  1294. self.test_transforms.transforms.append(
  1295. Pad(im_padding_value=[0., 0., 0.]))
  1296. self.fixed_input_shape = image_shape
  1297. return self._define_input_spec(image_shape)
  1298. class PPYOLO(YOLOv3):
  1299. def __init__(self,
  1300. num_classes=80,
  1301. backbone='ResNet50_vd_dcn',
  1302. anchors=None,
  1303. anchor_masks=None,
  1304. use_coord_conv=True,
  1305. use_iou_aware=True,
  1306. use_spp=True,
  1307. use_drop_block=True,
  1308. scale_x_y=1.05,
  1309. ignore_threshold=0.7,
  1310. label_smooth=False,
  1311. use_iou_loss=True,
  1312. use_matrix_nms=True,
  1313. nms_score_threshold=0.01,
  1314. nms_topk=-1,
  1315. nms_keep_topk=100,
  1316. nms_iou_threshold=0.45,
  1317. **params):
  1318. self.init_params = locals()
  1319. if backbone not in {
  1320. 'ResNet50_vd_dcn', 'ResNet18_vd', 'MobileNetV3_large',
  1321. 'MobileNetV3_small'
  1322. }:
  1323. raise ValueError(
  1324. "backbone: {} is not supported. Please choose one of "
  1325. "{'ResNet50_vd_dcn', 'ResNet18_vd', 'MobileNetV3_large', 'MobileNetV3_small'}.".
  1326. format(backbone))
  1327. self.backbone_name = backbone
  1328. self.downsample_ratios = [
  1329. 32, 16, 8
  1330. ] if backbone == 'ResNet50_vd_dcn' else [32, 16]
  1331. if params.get('with_net', True):
  1332. if paddlers.env_info['place'] == 'gpu' and paddlers.env_info[
  1333. 'num'] > 1 and not os.environ.get('PADDLERS_EXPORT_STAGE'):
  1334. norm_type = 'sync_bn'
  1335. else:
  1336. norm_type = 'bn'
  1337. if anchors is None and anchor_masks is None:
  1338. if 'MobileNetV3' in backbone:
  1339. anchors = [[11, 18], [34, 47], [51, 126], [115, 71],
  1340. [120, 195], [254, 235]]
  1341. anchor_masks = [[3, 4, 5], [0, 1, 2]]
  1342. elif backbone == 'ResNet50_vd_dcn':
  1343. anchors = [[10, 13], [16, 30], [33, 23], [30, 61],
  1344. [62, 45], [59, 119], [116, 90], [156, 198],
  1345. [373, 326]]
  1346. anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  1347. else:
  1348. anchors = [[10, 14], [23, 27], [37, 58], [81, 82],
  1349. [135, 169], [344, 319]]
  1350. anchor_masks = [[3, 4, 5], [0, 1, 2]]
  1351. elif anchors is None or anchor_masks is None:
  1352. raise ValueError("Please define both anchors and anchor_masks.")
  1353. if backbone == 'ResNet50_vd_dcn':
  1354. backbone = self._get_backbone(
  1355. 'ResNet',
  1356. variant='d',
  1357. norm_type=norm_type,
  1358. return_idx=[1, 2, 3],
  1359. dcn_v2_stages=[3],
  1360. freeze_at=-1,
  1361. freeze_norm=False,
  1362. norm_decay=0.)
  1363. elif backbone == 'ResNet18_vd':
  1364. backbone = self._get_backbone(
  1365. 'ResNet',
  1366. depth=18,
  1367. variant='d',
  1368. norm_type=norm_type,
  1369. return_idx=[2, 3],
  1370. freeze_at=-1,
  1371. freeze_norm=False,
  1372. norm_decay=0.)
  1373. elif backbone == 'MobileNetV3_large':
  1374. backbone = self._get_backbone(
  1375. 'MobileNetV3',
  1376. model_name='large',
  1377. norm_type=norm_type,
  1378. scale=1,
  1379. with_extra_blocks=False,
  1380. extra_block_filters=[],
  1381. feature_maps=[13, 16])
  1382. elif backbone == 'MobileNetV3_small':
  1383. backbone = self._get_backbone(
  1384. 'MobileNetV3',
  1385. model_name='small',
  1386. norm_type=norm_type,
  1387. scale=1,
  1388. with_extra_blocks=False,
  1389. extra_block_filters=[],
  1390. feature_maps=[9, 12])
  1391. neck = ppdet.modeling.PPYOLOFPN(
  1392. norm_type=norm_type,
  1393. in_channels=[i.channels for i in backbone.out_shape],
  1394. coord_conv=use_coord_conv,
  1395. drop_block=use_drop_block,
  1396. spp=use_spp,
  1397. conv_block_num=0
  1398. if ('MobileNetV3' in self.backbone_name or
  1399. self.backbone_name == 'ResNet18_vd') else 2)
  1400. loss = ppdet.modeling.YOLOv3Loss(
  1401. num_classes=num_classes,
  1402. ignore_thresh=ignore_threshold,
  1403. downsample=self.downsample_ratios,
  1404. label_smooth=label_smooth,
  1405. scale_x_y=scale_x_y,
  1406. iou_loss=ppdet.modeling.IouLoss(
  1407. loss_weight=2.5, loss_square=True)
  1408. if use_iou_loss else None,
  1409. iou_aware_loss=ppdet.modeling.IouAwareLoss(loss_weight=1.0)
  1410. if use_iou_aware else None)
  1411. yolo_head = ppdet.modeling.YOLOv3Head(
  1412. in_channels=[i.channels for i in neck.out_shape],
  1413. anchors=anchors,
  1414. anchor_masks=anchor_masks,
  1415. num_classes=num_classes,
  1416. loss=loss,
  1417. iou_aware=use_iou_aware)
  1418. if use_matrix_nms:
  1419. nms = ppdet.modeling.MatrixNMS(
  1420. keep_top_k=nms_keep_topk,
  1421. score_threshold=nms_score_threshold,
  1422. post_threshold=.05
  1423. if 'MobileNetV3' in self.backbone_name else .01,
  1424. nms_top_k=nms_topk,
  1425. background_label=-1)
  1426. else:
  1427. nms = ppdet.modeling.MultiClassNMS(
  1428. score_threshold=nms_score_threshold,
  1429. nms_top_k=nms_topk,
  1430. keep_top_k=nms_keep_topk,
  1431. nms_threshold=nms_iou_threshold)
  1432. post_process = ppdet.modeling.BBoxPostProcess(
  1433. decode=ppdet.modeling.YOLOBox(
  1434. num_classes=num_classes,
  1435. conf_thresh=.005
  1436. if 'MobileNetV3' in self.backbone_name else .01,
  1437. scale_x_y=scale_x_y),
  1438. nms=nms)
  1439. params.update({
  1440. 'backbone': backbone,
  1441. 'neck': neck,
  1442. 'yolo_head': yolo_head,
  1443. 'post_process': post_process
  1444. })
  1445. super(YOLOv3, self).__init__(
  1446. model_name='YOLOv3', num_classes=num_classes, **params)
  1447. self.anchors = anchors
  1448. self.anchor_masks = anchor_masks
  1449. self.model_name = 'PPYOLO'
  1450. def _get_test_inputs(self, image_shape):
  1451. if image_shape is not None:
  1452. image_shape = self._check_image_shape(image_shape)
  1453. self._fix_transforms_shape(image_shape[-2:])
  1454. else:
  1455. image_shape = [None, 3, 608, 608]
  1456. if getattr(self, 'test_transforms', None):
  1457. for idx, op in enumerate(self.test_transforms.transforms):
  1458. name = op.__class__.__name__
  1459. if name == 'Resize':
  1460. image_shape = [None, 3] + list(
  1461. self.test_transforms.transforms[idx].target_size)
  1462. logging.warning(
  1463. '[Important!!!] When exporting inference model for {}, '
  1464. 'if fixed_input_shape is not set, it will be forcibly set to {}. '
  1465. 'Please ensure image shape after transforms is {}, if not, '
  1466. 'fixed_input_shape should be specified manually.'
  1467. .format(self.__class__.__name__, image_shape, image_shape[1:]))
  1468. self.fixed_input_shape = image_shape
  1469. return self._define_input_spec(image_shape)
  1470. class PPYOLOTiny(YOLOv3):
  1471. def __init__(self,
  1472. num_classes=80,
  1473. backbone='MobileNetV3',
  1474. anchors=[[10, 15], [24, 36], [72, 42], [35, 87], [102, 96],
  1475. [60, 170], [220, 125], [128, 222], [264, 266]],
  1476. anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
  1477. use_iou_aware=False,
  1478. use_spp=True,
  1479. use_drop_block=True,
  1480. scale_x_y=1.05,
  1481. ignore_threshold=0.5,
  1482. label_smooth=False,
  1483. use_iou_loss=True,
  1484. use_matrix_nms=False,
  1485. nms_score_threshold=0.005,
  1486. nms_topk=1000,
  1487. nms_keep_topk=100,
  1488. nms_iou_threshold=0.45,
  1489. **params):
  1490. self.init_params = locals()
  1491. if backbone != 'MobileNetV3':
  1492. logging.warning("PPYOLOTiny only supports MobileNetV3 as backbone. "
  1493. "Backbone is forcibly set to MobileNetV3.")
  1494. self.backbone_name = 'MobileNetV3'
  1495. self.downsample_ratios = [32, 16, 8]
  1496. if params.get('with_net', True):
  1497. if paddlers.env_info['place'] == 'gpu' and paddlers.env_info[
  1498. 'num'] > 1 and not os.environ.get('PADDLERS_EXPORT_STAGE'):
  1499. norm_type = 'sync_bn'
  1500. else:
  1501. norm_type = 'bn'
  1502. backbone = self._get_backbone(
  1503. 'MobileNetV3',
  1504. model_name='large',
  1505. norm_type=norm_type,
  1506. scale=.5,
  1507. with_extra_blocks=False,
  1508. extra_block_filters=[],
  1509. feature_maps=[7, 13, 16])
  1510. neck = ppdet.modeling.PPYOLOTinyFPN(
  1511. detection_block_channels=[160, 128, 96],
  1512. in_channels=[i.channels for i in backbone.out_shape],
  1513. spp=use_spp,
  1514. drop_block=use_drop_block)
  1515. loss = ppdet.modeling.YOLOv3Loss(
  1516. num_classes=num_classes,
  1517. ignore_thresh=ignore_threshold,
  1518. downsample=self.downsample_ratios,
  1519. label_smooth=label_smooth,
  1520. scale_x_y=scale_x_y,
  1521. iou_loss=ppdet.modeling.IouLoss(
  1522. loss_weight=2.5, loss_square=True)
  1523. if use_iou_loss else None,
  1524. iou_aware_loss=ppdet.modeling.IouAwareLoss(loss_weight=1.0)
  1525. if use_iou_aware else None)
  1526. yolo_head = ppdet.modeling.YOLOv3Head(
  1527. in_channels=[i.channels for i in neck.out_shape],
  1528. anchors=anchors,
  1529. anchor_masks=anchor_masks,
  1530. num_classes=num_classes,
  1531. loss=loss,
  1532. iou_aware=use_iou_aware)
  1533. if use_matrix_nms:
  1534. nms = ppdet.modeling.MatrixNMS(
  1535. keep_top_k=nms_keep_topk,
  1536. score_threshold=nms_score_threshold,
  1537. post_threshold=.05,
  1538. nms_top_k=nms_topk,
  1539. background_label=-1)
  1540. else:
  1541. nms = ppdet.modeling.MultiClassNMS(
  1542. score_threshold=nms_score_threshold,
  1543. nms_top_k=nms_topk,
  1544. keep_top_k=nms_keep_topk,
  1545. nms_threshold=nms_iou_threshold)
  1546. post_process = ppdet.modeling.BBoxPostProcess(
  1547. decode=ppdet.modeling.YOLOBox(
  1548. num_classes=num_classes,
  1549. conf_thresh=.005,
  1550. downsample_ratio=32,
  1551. clip_bbox=True,
  1552. scale_x_y=scale_x_y),
  1553. nms=nms)
  1554. params.update({
  1555. 'backbone': backbone,
  1556. 'neck': neck,
  1557. 'yolo_head': yolo_head,
  1558. 'post_process': post_process
  1559. })
  1560. super(YOLOv3, self).__init__(
  1561. model_name='YOLOv3', num_classes=num_classes, **params)
  1562. self.anchors = anchors
  1563. self.anchor_masks = anchor_masks
  1564. self.model_name = 'PPYOLOTiny'
  1565. def _get_test_inputs(self, image_shape):
  1566. if image_shape is not None:
  1567. image_shape = self._check_image_shape(image_shape)
  1568. self._fix_transforms_shape(image_shape[-2:])
  1569. else:
  1570. image_shape = [None, 3, 320, 320]
  1571. if getattr(self, 'test_transforms', None):
  1572. for idx, op in enumerate(self.test_transforms.transforms):
  1573. name = op.__class__.__name__
  1574. if name == 'Resize':
  1575. image_shape = [None, 3] + list(
  1576. self.test_transforms.transforms[idx].target_size)
  1577. logging.warning(
  1578. '[Important!!!] When exporting inference model for {},'.format(
  1579. self.__class__.__name__) +
  1580. ' if fixed_input_shape is not set, it will be forcibly set to {}. '.
  1581. format(image_shape) +
  1582. 'Please check image shape after transforms is {}, if not, fixed_input_shape '.
  1583. format(image_shape[1:]) + 'should be specified manually.')
  1584. self.fixed_input_shape = image_shape
  1585. return self._define_input_spec(image_shape)
  1586. class PPYOLOv2(YOLOv3):
  1587. def __init__(self,
  1588. num_classes=80,
  1589. backbone='ResNet50_vd_dcn',
  1590. anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
  1591. [59, 119], [116, 90], [156, 198], [373, 326]],
  1592. anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]],
  1593. use_iou_aware=True,
  1594. use_spp=True,
  1595. use_drop_block=True,
  1596. scale_x_y=1.05,
  1597. ignore_threshold=0.7,
  1598. label_smooth=False,
  1599. use_iou_loss=True,
  1600. use_matrix_nms=True,
  1601. nms_score_threshold=0.01,
  1602. nms_topk=-1,
  1603. nms_keep_topk=100,
  1604. nms_iou_threshold=0.45,
  1605. **params):
  1606. self.init_params = locals()
  1607. if backbone not in {'ResNet50_vd_dcn', 'ResNet101_vd_dcn'}:
  1608. raise ValueError(
  1609. "backbone: {} is not supported. Please choose one of "
  1610. "{'ResNet50_vd_dcn', 'ResNet101_vd_dcn'}.".format(backbone))
  1611. self.backbone_name = backbone
  1612. self.downsample_ratios = [32, 16, 8]
  1613. if params.get('with_net', True):
  1614. if paddlers.env_info['place'] == 'gpu' and paddlers.env_info[
  1615. 'num'] > 1 and not os.environ.get('PADDLERS_EXPORT_STAGE'):
  1616. norm_type = 'sync_bn'
  1617. else:
  1618. norm_type = 'bn'
  1619. if backbone == 'ResNet50_vd_dcn':
  1620. backbone = self._get_backbone(
  1621. 'ResNet',
  1622. variant='d',
  1623. norm_type=norm_type,
  1624. return_idx=[1, 2, 3],
  1625. dcn_v2_stages=[3],
  1626. freeze_at=-1,
  1627. freeze_norm=False,
  1628. norm_decay=0.)
  1629. elif backbone == 'ResNet101_vd_dcn':
  1630. backbone = self._get_backbone(
  1631. 'ResNet',
  1632. depth=101,
  1633. variant='d',
  1634. norm_type=norm_type,
  1635. return_idx=[1, 2, 3],
  1636. dcn_v2_stages=[3],
  1637. freeze_at=-1,
  1638. freeze_norm=False,
  1639. norm_decay=0.)
  1640. neck = ppdet.modeling.PPYOLOPAN(
  1641. norm_type=norm_type,
  1642. in_channels=[i.channels for i in backbone.out_shape],
  1643. drop_block=use_drop_block,
  1644. block_size=3,
  1645. keep_prob=.9,
  1646. spp=use_spp)
  1647. loss = ppdet.modeling.YOLOv3Loss(
  1648. num_classes=num_classes,
  1649. ignore_thresh=ignore_threshold,
  1650. downsample=self.downsample_ratios,
  1651. label_smooth=label_smooth,
  1652. scale_x_y=scale_x_y,
  1653. iou_loss=ppdet.modeling.IouLoss(
  1654. loss_weight=2.5, loss_square=True)
  1655. if use_iou_loss else None,
  1656. iou_aware_loss=ppdet.modeling.IouAwareLoss(loss_weight=1.0)
  1657. if use_iou_aware else None)
  1658. yolo_head = ppdet.modeling.YOLOv3Head(
  1659. in_channels=[i.channels for i in neck.out_shape],
  1660. anchors=anchors,
  1661. anchor_masks=anchor_masks,
  1662. num_classes=num_classes,
  1663. loss=loss,
  1664. iou_aware=use_iou_aware,
  1665. iou_aware_factor=.5)
  1666. if use_matrix_nms:
  1667. nms = ppdet.modeling.MatrixNMS(
  1668. keep_top_k=nms_keep_topk,
  1669. score_threshold=nms_score_threshold,
  1670. post_threshold=.01,
  1671. nms_top_k=nms_topk,
  1672. background_label=-1)
  1673. else:
  1674. nms = ppdet.modeling.MultiClassNMS(
  1675. score_threshold=nms_score_threshold,
  1676. nms_top_k=nms_topk,
  1677. keep_top_k=nms_keep_topk,
  1678. nms_threshold=nms_iou_threshold)
  1679. post_process = ppdet.modeling.BBoxPostProcess(
  1680. decode=ppdet.modeling.YOLOBox(
  1681. num_classes=num_classes,
  1682. conf_thresh=.01,
  1683. downsample_ratio=32,
  1684. clip_bbox=True,
  1685. scale_x_y=scale_x_y),
  1686. nms=nms)
  1687. params.update({
  1688. 'backbone': backbone,
  1689. 'neck': neck,
  1690. 'yolo_head': yolo_head,
  1691. 'post_process': post_process
  1692. })
  1693. super(YOLOv3, self).__init__(
  1694. model_name='YOLOv3', num_classes=num_classes, **params)
  1695. self.anchors = anchors
  1696. self.anchor_masks = anchor_masks
  1697. self.model_name = 'PPYOLOv2'
  1698. def _get_test_inputs(self, image_shape):
  1699. if image_shape is not None:
  1700. image_shape = self._check_image_shape(image_shape)
  1701. self._fix_transforms_shape(image_shape[-2:])
  1702. else:
  1703. image_shape = [None, 3, 640, 640]
  1704. if getattr(self, 'test_transforms', None):
  1705. for idx, op in enumerate(self.test_transforms.transforms):
  1706. name = op.__class__.__name__
  1707. if name == 'Resize':
  1708. image_shape = [None, 3] + list(
  1709. self.test_transforms.transforms[idx].target_size)
  1710. logging.warning(
  1711. '[Important!!!] When exporting inference model for {},'.format(
  1712. self.__class__.__name__) +
  1713. ' if fixed_input_shape is not set, it will be forcibly set to {}. '.
  1714. format(image_shape) +
  1715. 'Please check image shape after transforms is {}, if not, fixed_input_shape '.
  1716. format(image_shape[1:]) + 'should be specified manually.')
  1717. self.fixed_input_shape = image_shape
  1718. return self._define_input_spec(image_shape)
  1719. class MaskRCNN(BaseDetector):
  1720. def __init__(self,
  1721. num_classes=80,
  1722. backbone='ResNet50_vd',
  1723. with_fpn=True,
  1724. with_dcn=False,
  1725. aspect_ratios=[0.5, 1.0, 2.0],
  1726. anchor_sizes=[[32], [64], [128], [256], [512]],
  1727. keep_top_k=100,
  1728. nms_threshold=0.5,
  1729. score_threshold=0.05,
  1730. fpn_num_channels=256,
  1731. rpn_batch_size_per_im=256,
  1732. rpn_fg_fraction=0.5,
  1733. test_pre_nms_top_n=None,
  1734. test_post_nms_top_n=1000,
  1735. **params):
  1736. self.init_params = locals()
  1737. if backbone not in {
  1738. 'ResNet50', 'ResNet50_vd', 'ResNet50_vd_ssld', 'ResNet101',
  1739. 'ResNet101_vd'
  1740. }:
  1741. raise ValueError(
  1742. "backbone: {} is not supported. Please choose one of "
  1743. "{'ResNet50', 'ResNet50_vd', 'ResNet50_vd_ssld', 'ResNet101', 'ResNet101_vd'}.".
  1744. format(backbone))
  1745. self.backbone_name = backbone + '_fpn' if with_fpn else backbone
  1746. dcn_v2_stages = [1, 2, 3] if with_dcn else [-1]
  1747. if params.get('with_net', True):
  1748. if backbone == 'ResNet50':
  1749. if with_fpn:
  1750. backbone = self._get_backbone(
  1751. 'ResNet',
  1752. norm_type='bn',
  1753. freeze_at=0,
  1754. return_idx=[0, 1, 2, 3],
  1755. num_stages=4,
  1756. dcn_v2_stages=dcn_v2_stages)
  1757. else:
  1758. if with_dcn:
  1759. logging.warning(
  1760. "Backbone {} should be used along with dcn disabled, 'with_dcn' is forcibly set to False".
  1761. format(backbone))
  1762. backbone = self._get_backbone(
  1763. 'ResNet',
  1764. norm_type='bn',
  1765. freeze_at=0,
  1766. return_idx=[2],
  1767. num_stages=3)
  1768. elif 'ResNet50_vd' in backbone:
  1769. if not with_fpn:
  1770. logging.warning(
  1771. "Backbone {} should be used along with fpn enabled, 'with_fpn' is forcibly set to True".
  1772. format(backbone))
  1773. with_fpn = True
  1774. backbone = self._get_backbone(
  1775. 'ResNet',
  1776. variant='d',
  1777. norm_type='bn',
  1778. freeze_at=0,
  1779. return_idx=[0, 1, 2, 3],
  1780. num_stages=4,
  1781. lr_mult_list=[0.05, 0.05, 0.1, 0.15]
  1782. if '_ssld' in backbone else [1.0, 1.0, 1.0, 1.0],
  1783. dcn_v2_stages=dcn_v2_stages)
  1784. else:
  1785. if not with_fpn:
  1786. logging.warning(
  1787. "Backbone {} should be used along with fpn enabled, 'with_fpn' is forcibly set to True".
  1788. format(backbone))
  1789. with_fpn = True
  1790. backbone = self._get_backbone(
  1791. 'ResNet',
  1792. variant='d' if '_vd' in backbone else 'b',
  1793. depth=101,
  1794. norm_type='bn',
  1795. freeze_at=0,
  1796. return_idx=[0, 1, 2, 3],
  1797. num_stages=4,
  1798. dcn_v2_stages=dcn_v2_stages)
  1799. rpn_in_channel = backbone.out_shape[0].channels
  1800. if with_fpn:
  1801. neck = ppdet.modeling.FPN(
  1802. in_channels=[i.channels for i in backbone.out_shape],
  1803. out_channel=fpn_num_channels,
  1804. spatial_scales=[
  1805. 1.0 / i.stride for i in backbone.out_shape
  1806. ])
  1807. rpn_in_channel = neck.out_shape[0].channels
  1808. anchor_generator_cfg = {
  1809. 'aspect_ratios': aspect_ratios,
  1810. 'anchor_sizes': anchor_sizes,
  1811. 'strides': [4, 8, 16, 32, 64]
  1812. }
  1813. train_proposal_cfg = {
  1814. 'min_size': 0.0,
  1815. 'nms_thresh': .7,
  1816. 'pre_nms_top_n': 2000,
  1817. 'post_nms_top_n': 1000,
  1818. 'topk_after_collect': True
  1819. }
  1820. test_proposal_cfg = {
  1821. 'min_size': 0.0,
  1822. 'nms_thresh': .7,
  1823. 'pre_nms_top_n': 1000
  1824. if test_pre_nms_top_n is None else test_pre_nms_top_n,
  1825. 'post_nms_top_n': test_post_nms_top_n
  1826. }
  1827. bb_head = ppdet.modeling.TwoFCHead(
  1828. in_channel=neck.out_shape[0].channels, out_channel=1024)
  1829. bb_roi_extractor_cfg = {
  1830. 'resolution': 7,
  1831. 'spatial_scale': [1. / i.stride for i in neck.out_shape],
  1832. 'sampling_ratio': 0,
  1833. 'aligned': True
  1834. }
  1835. with_pool = False
  1836. m_head = ppdet.modeling.MaskFeat(
  1837. in_channel=neck.out_shape[0].channels,
  1838. out_channel=256,
  1839. num_convs=4)
  1840. m_roi_extractor_cfg = {
  1841. 'resolution': 14,
  1842. 'spatial_scale': [1. / i.stride for i in neck.out_shape],
  1843. 'sampling_ratio': 0,
  1844. 'aligned': True
  1845. }
  1846. mask_assigner = MaskAssigner(
  1847. num_classes=num_classes, mask_resolution=28)
  1848. share_bbox_feat = False
  1849. else:
  1850. neck = None
  1851. anchor_generator_cfg = {
  1852. 'aspect_ratios': aspect_ratios,
  1853. 'anchor_sizes': anchor_sizes,
  1854. 'strides': [16]
  1855. }
  1856. train_proposal_cfg = {
  1857. 'min_size': 0.0,
  1858. 'nms_thresh': .7,
  1859. 'pre_nms_top_n': 12000,
  1860. 'post_nms_top_n': 2000,
  1861. 'topk_after_collect': False
  1862. }
  1863. test_proposal_cfg = {
  1864. 'min_size': 0.0,
  1865. 'nms_thresh': .7,
  1866. 'pre_nms_top_n': 6000
  1867. if test_pre_nms_top_n is None else test_pre_nms_top_n,
  1868. 'post_nms_top_n': test_post_nms_top_n
  1869. }
  1870. bb_head = ppdet.modeling.Res5Head()
  1871. bb_roi_extractor_cfg = {
  1872. 'resolution': 14,
  1873. 'spatial_scale':
  1874. [1. / i.stride for i in backbone.out_shape],
  1875. 'sampling_ratio': 0,
  1876. 'aligned': True
  1877. }
  1878. with_pool = True
  1879. m_head = ppdet.modeling.MaskFeat(
  1880. in_channel=bb_head.out_shape[0].channels,
  1881. out_channel=256,
  1882. num_convs=0)
  1883. m_roi_extractor_cfg = {
  1884. 'resolution': 14,
  1885. 'spatial_scale':
  1886. [1. / i.stride for i in backbone.out_shape],
  1887. 'sampling_ratio': 0,
  1888. 'aligned': True
  1889. }
  1890. mask_assigner = MaskAssigner(
  1891. num_classes=num_classes, mask_resolution=14)
  1892. share_bbox_feat = True
  1893. rpn_target_assign_cfg = {
  1894. 'batch_size_per_im': rpn_batch_size_per_im,
  1895. 'fg_fraction': rpn_fg_fraction,
  1896. 'negative_overlap': .3,
  1897. 'positive_overlap': .7,
  1898. 'use_random': True
  1899. }
  1900. rpn_head = ppdet.modeling.RPNHead(
  1901. anchor_generator=anchor_generator_cfg,
  1902. rpn_target_assign=rpn_target_assign_cfg,
  1903. train_proposal=train_proposal_cfg,
  1904. test_proposal=test_proposal_cfg,
  1905. in_channel=rpn_in_channel)
  1906. bbox_assigner = BBoxAssigner(num_classes=num_classes)
  1907. bbox_head = ppdet.modeling.BBoxHead(
  1908. head=bb_head,
  1909. in_channel=bb_head.out_shape[0].channels,
  1910. roi_extractor=bb_roi_extractor_cfg,
  1911. with_pool=with_pool,
  1912. bbox_assigner=bbox_assigner,
  1913. num_classes=num_classes)
  1914. mask_head = ppdet.modeling.MaskHead(
  1915. head=m_head,
  1916. roi_extractor=m_roi_extractor_cfg,
  1917. mask_assigner=mask_assigner,
  1918. share_bbox_feat=share_bbox_feat,
  1919. num_classes=num_classes)
  1920. bbox_post_process = ppdet.modeling.BBoxPostProcess(
  1921. num_classes=num_classes,
  1922. decode=ppdet.modeling.RCNNBox(num_classes=num_classes),
  1923. nms=ppdet.modeling.MultiClassNMS(
  1924. score_threshold=score_threshold,
  1925. keep_top_k=keep_top_k,
  1926. nms_threshold=nms_threshold))
  1927. mask_post_process = ppdet.modeling.MaskPostProcess(binary_thresh=.5)
  1928. params.update({
  1929. 'backbone': backbone,
  1930. 'neck': neck,
  1931. 'rpn_head': rpn_head,
  1932. 'bbox_head': bbox_head,
  1933. 'mask_head': mask_head,
  1934. 'bbox_post_process': bbox_post_process,
  1935. 'mask_post_process': mask_post_process
  1936. })
  1937. self.with_fpn = with_fpn
  1938. super(MaskRCNN, self).__init__(
  1939. model_name='MaskRCNN', num_classes=num_classes, **params)
  1940. def _pre_train(self, in_args):
  1941. train_dataset = in_args['train_dataset']
  1942. if train_dataset.pos_num < len(train_dataset.file_list):
  1943. # In-place modification
  1944. train_dataset.num_workers = 0
  1945. return in_args
  1946. def _compose_batch_transform(self, transforms, mode='train'):
  1947. if mode == 'train':
  1948. default_batch_transforms = [
  1949. _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
  1950. ]
  1951. else:
  1952. default_batch_transforms = [
  1953. _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
  1954. ]
  1955. custom_batch_transforms = []
  1956. for i, op in enumerate(transforms.transforms):
  1957. if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
  1958. if mode != 'train':
  1959. raise ValueError(
  1960. "{} cannot be present in the {} transforms. ".format(
  1961. op.__class__.__name__, mode) +
  1962. "Please check the {} transforms.".format(mode))
  1963. custom_batch_transforms.insert(0, copy.deepcopy(op))
  1964. batch_transforms = BatchCompose(
  1965. custom_batch_transforms + default_batch_transforms,
  1966. collate_batch=False)
  1967. return batch_transforms
  1968. def _fix_transforms_shape(self, image_shape):
  1969. if getattr(self, 'test_transforms', None):
  1970. has_resize_op = False
  1971. resize_op_idx = -1
  1972. normalize_op_idx = len(self.test_transforms.transforms)
  1973. for idx, op in enumerate(self.test_transforms.transforms):
  1974. name = op.__class__.__name__
  1975. if name == 'ResizeByShort':
  1976. has_resize_op = True
  1977. resize_op_idx = idx
  1978. if name == 'Normalize':
  1979. normalize_op_idx = idx
  1980. if not has_resize_op:
  1981. self.test_transforms.transforms.insert(
  1982. normalize_op_idx,
  1983. Resize(
  1984. target_size=image_shape,
  1985. keep_ratio=True,
  1986. interp='CUBIC'))
  1987. else:
  1988. self.test_transforms.transforms[resize_op_idx] = Resize(
  1989. target_size=image_shape, keep_ratio=True, interp='CUBIC')
  1990. self.test_transforms.transforms.append(
  1991. Pad(im_padding_value=[0., 0., 0.]))
  1992. def _get_test_inputs(self, image_shape):
  1993. if image_shape is not None:
  1994. image_shape = self._check_image_shape(image_shape)
  1995. self._fix_transforms_shape(image_shape[-2:])
  1996. else:
  1997. image_shape = [None, 3, -1, -1]
  1998. if self.with_fpn:
  1999. self.test_transforms.transforms.append(
  2000. Pad(im_padding_value=[0., 0., 0.]))
  2001. self.fixed_input_shape = image_shape
  2002. return self._define_input_spec(image_shape)