infer.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. #!/usr/bin/env python
  2. import os
  3. import os.path as osp
  4. import argparse
  5. from operator import itemgetter
  6. import numpy as np
  7. import paddle
  8. from paddle.inference import Config
  9. from paddle.inference import create_predictor
  10. from paddle.inference import PrecisionType
  11. from paddlers.tasks import load_model
  12. from paddlers.utils import logging
  13. class _bool(object):
  14. def __new__(cls, x):
  15. if isinstance(x, str):
  16. if x.lower() == 'false':
  17. return False
  18. elif x.lower() == 'true':
  19. return True
  20. return bool.__new__(x)
  21. class TIPCPredictor(object):
  22. def __init__(self,
  23. model_dir,
  24. device='cpu',
  25. gpu_id=0,
  26. cpu_thread_num=1,
  27. use_mkl=True,
  28. mkl_thread_num=4,
  29. use_trt=False,
  30. memory_optimize=True,
  31. trt_precision_mode='fp32',
  32. benchmark=False,
  33. model_name='',
  34. batch_size=1):
  35. self.model_dir = model_dir
  36. self._model = load_model(model_dir, with_net=False)
  37. if trt_precision_mode.lower() == 'fp32':
  38. trt_precision_mode = PrecisionType.Float32
  39. elif trt_precision_mode.lower() == 'fp16':
  40. trt_precision_mode = PrecisionType.Float16
  41. else:
  42. logging.error(
  43. "TensorRT precision mode {} is invalid. Supported modes are fp32 and fp16."
  44. .format(trt_precision_mode),
  45. exit=True)
  46. self.config = self.get_config(
  47. device=device,
  48. gpu_id=gpu_id,
  49. cpu_thread_num=cpu_thread_num,
  50. use_mkl=use_mkl,
  51. mkl_thread_num=mkl_thread_num,
  52. use_trt=use_trt,
  53. use_glog=False,
  54. memory_optimize=memory_optimize,
  55. max_trt_batch_size=1,
  56. trt_precision_mode=trt_precision_mode)
  57. self.predictor = create_predictor(self.config)
  58. self.batch_size = batch_size
  59. if benchmark:
  60. import auto_log
  61. pid = os.getpid()
  62. self.autolog = auto_log.AutoLogger(
  63. model_name=model_name,
  64. model_precision=trt_precision_mode,
  65. batch_size=batch_size,
  66. data_shape='dynamic',
  67. save_path=None,
  68. inference_config=self.config,
  69. pids=pid,
  70. process_name=None,
  71. gpu_ids=0,
  72. time_keys=[
  73. 'preprocess_time', 'inference_time', 'postprocess_time'
  74. ],
  75. warmup=0,
  76. logger=logging)
  77. self.benchmark = benchmark
  78. def get_config(self, device, gpu_id, cpu_thread_num, use_mkl,
  79. mkl_thread_num, use_trt, use_glog, memory_optimize,
  80. max_trt_batch_size, trt_precision_mode):
  81. config = Config(
  82. osp.join(self.model_dir, 'model.pdmodel'),
  83. osp.join(self.model_dir, 'model.pdiparams'))
  84. if device == 'gpu':
  85. config.enable_use_gpu(200, gpu_id)
  86. config.switch_ir_optim(True)
  87. if use_trt:
  88. if self._model.model_type == 'segmenter':
  89. logging.warning(
  90. "Semantic segmentation models do not support TensorRT acceleration, "
  91. "TensorRT is forcibly disabled.")
  92. elif 'RCNN' in self._model.__class__.__name__:
  93. logging.warning(
  94. "RCNN models do not support TensorRT acceleration, "
  95. "TensorRT is forcibly disabled.")
  96. else:
  97. config.enable_tensorrt_engine(
  98. workspace_size=1 << 10,
  99. max_batch_size=max_trt_batch_size,
  100. min_subgraph_size=3,
  101. precision_mode=trt_precision_mode,
  102. use_static=False,
  103. use_calib_mode=False)
  104. else:
  105. config.disable_gpu()
  106. config.set_cpu_math_library_num_threads(cpu_thread_num)
  107. if use_mkl:
  108. if self._model.__class__.__name__ == 'MaskRCNN':
  109. logging.warning(
  110. "MaskRCNN does not support MKL-DNN, MKL-DNN is forcibly disabled"
  111. )
  112. else:
  113. try:
  114. # Cache 10 different shapes for mkldnn to avoid memory leak
  115. config.set_mkldnn_cache_capacity(10)
  116. config.enable_mkldnn()
  117. config.set_cpu_math_library_num_threads(mkl_thread_num)
  118. except Exception as e:
  119. logging.warning(
  120. "The current environment does not support MKL-DNN, MKL-DNN is disabled."
  121. )
  122. pass
  123. if not use_glog:
  124. config.disable_glog_info()
  125. if memory_optimize:
  126. config.enable_memory_optim()
  127. config.switch_use_feed_fetch_ops(False)
  128. return config
  129. def preprocess(self, images, transforms):
  130. preprocessed_samples = self._model.preprocess(
  131. images, transforms, to_tensor=False)
  132. if self._model.model_type == 'classifier':
  133. preprocessed_samples = {'image': preprocessed_samples[0]}
  134. elif self._model.model_type == 'segmenter':
  135. preprocessed_samples = {
  136. 'image': preprocessed_samples[0],
  137. 'ori_shape': preprocessed_samples[1]
  138. }
  139. elif self._model.model_type == 'detector':
  140. pass
  141. elif self._model.model_type == 'change_detector':
  142. preprocessed_samples = {
  143. 'image': preprocessed_samples[0],
  144. 'image2': preprocessed_samples[1],
  145. 'ori_shape': preprocessed_samples[2]
  146. }
  147. else:
  148. logging.error(
  149. "Invalid model type {}".format(self._model.model_type),
  150. exit=True)
  151. return preprocessed_samples
  152. def postprocess(self, net_outputs, topk=1, ori_shape=None, transforms=None):
  153. if self._model.model_type == 'classifier':
  154. true_topk = min(self._model.num_classes, topk)
  155. if self._model.postprocess is None:
  156. self._model.build_postprocess_from_labels(topk)
  157. # XXX: Convert ndarray to tensor as self._model.postprocess requires
  158. assert len(net_outputs) == 1
  159. net_outputs = paddle.to_tensor(net_outputs[0])
  160. outputs = self._model.postprocess(net_outputs)
  161. class_ids = map(itemgetter('class_ids'), outputs)
  162. scores = map(itemgetter('scores'), outputs)
  163. label_names = map(itemgetter('label_names'), outputs)
  164. preds = [{
  165. 'class_ids_map': l,
  166. 'scores_map': s,
  167. 'label_names_map': n,
  168. } for l, s, n in zip(class_ids, scores, label_names)]
  169. elif self._model.model_type in ('segmenter', 'change_detector'):
  170. label_map, score_map = self._model.postprocess(
  171. net_outputs,
  172. batch_origin_shape=ori_shape,
  173. transforms=transforms.transforms)
  174. preds = [{
  175. 'label_map': l,
  176. 'score_map': s
  177. } for l, s in zip(label_map, score_map)]
  178. elif self._model.model_type == 'detector':
  179. net_outputs = {
  180. k: v
  181. for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
  182. }
  183. preds = self._model.postprocess(net_outputs)
  184. else:
  185. logging.error(
  186. "Invalid model type {}.".format(self._model.model_type),
  187. exit=True)
  188. return preds
  189. def _run(self, images, topk=1, transforms=None, time_it=False):
  190. if self.benchmark and time_it:
  191. self.autolog.times.start()
  192. preprocessed_input = self.preprocess(images, transforms)
  193. input_names = self.predictor.get_input_names()
  194. for name in input_names:
  195. input_tensor = self.predictor.get_input_handle(name)
  196. input_tensor.copy_from_cpu(preprocessed_input[name])
  197. if self.benchmark and time_it:
  198. self.autolog.times.stamp()
  199. self.predictor.run()
  200. output_names = self.predictor.get_output_names()
  201. net_outputs = []
  202. for name in output_names:
  203. output_tensor = self.predictor.get_output_handle(name)
  204. net_outputs.append(output_tensor.copy_to_cpu())
  205. if self.benchmark and time_it:
  206. self.autolog.times.stamp()
  207. res = self.postprocess(
  208. net_outputs,
  209. topk,
  210. ori_shape=preprocessed_input.get('ori_shape', None),
  211. transforms=transforms)
  212. if self.benchmark and time_it:
  213. self.autolog.times.end(stamp=True)
  214. return res
  215. def predict(self, data_dir, file_list, topk=1, warmup_iters=5):
  216. transforms = self._model.test_transforms
  217. # Warm up
  218. iters = 0
  219. while True:
  220. for images in self._parse_lines(data_dir, file_list):
  221. if iters >= warmup_iters:
  222. break
  223. self._run(
  224. images=images,
  225. topk=topk,
  226. transforms=transforms,
  227. time_it=False)
  228. iters += 1
  229. else:
  230. continue
  231. break
  232. results = []
  233. for images in self._parse_lines(data_dir, file_list):
  234. res = self._run(
  235. images=images, topk=topk, transforms=transforms, time_it=True)
  236. results.append(res)
  237. return results
  238. def _parse_lines(self, data_dir, file_list):
  239. with open(file_list, 'r') as f:
  240. batch = []
  241. for line in f:
  242. items = line.strip().split()
  243. items = [osp.join(data_dir, item) for item in items]
  244. if self._model.model_type == 'change_detector':
  245. batch.append((items[0], items[1]))
  246. else:
  247. batch.append(items[0])
  248. if len(batch) == self.batch_size:
  249. yield batch
  250. batch.clear()
  251. if 0 < len(batch) < self.batch_size:
  252. yield batch
  253. if __name__ == '__main__':
  254. parser = argparse.ArgumentParser()
  255. parser.add_argument('--file_list', type=str, nargs=2)
  256. parser.add_argument('--model_dir', type=str, default='./')
  257. parser.add_argument(
  258. '--device', type=str, choices=['cpu', 'gpu'], default='cpu')
  259. parser.add_argument('--enable_mkldnn', type=_bool, default=False)
  260. parser.add_argument('--cpu_threads', type=int, default=10)
  261. parser.add_argument('--use_trt', type=_bool, default=False)
  262. parser.add_argument(
  263. '--precision', type=str, choices=['fp32', 'fp16'], default='fp16')
  264. parser.add_argument('--batch_size', type=int, default=1)
  265. parser.add_argument('--benchmark', type=_bool, default=False)
  266. parser.add_argument('--model_name', type=str, default='')
  267. args = parser.parse_args()
  268. predictor = TIPCPredictor(
  269. args.model_dir,
  270. device=args.device,
  271. cpu_thread_num=args.cpu_threads,
  272. use_mkl=args.enable_mkldnn,
  273. mkl_thread_num=args.cpu_threads,
  274. use_trt=args.use_trt,
  275. trt_precision_mode=args.precision,
  276. benchmark=args.benchmark)
  277. predictor.predict(args.file_list[0], args.file_list[1])
  278. if args.benchmark:
  279. predictor.autolog.report()