123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341 |
- #!/usr/bin/env python
- import os
- import os.path as osp
- import argparse
- from operator import itemgetter
- import numpy as np
- import paddle
- from paddle.inference import Config
- from paddle.inference import create_predictor
- from paddle.inference import PrecisionType
- from paddlers.tasks import load_model
- from paddlers.utils import logging
- from config_utils import parse_configs
- class _bool(object):
- def __new__(cls, x):
- if isinstance(x, str):
- if x.lower() == 'false':
- return False
- elif x.lower() == 'true':
- return True
- return bool.__new__(x)
- class TIPCPredictor(object):
- def __init__(self,
- model_dir,
- device='cpu',
- gpu_id=0,
- cpu_thread_num=1,
- use_mkl=True,
- mkl_thread_num=4,
- use_trt=False,
- memory_optimize=True,
- trt_precision_mode='fp32',
- benchmark=False,
- model_name='',
- batch_size=1):
- self.model_dir = model_dir
- self._model = load_model(model_dir, with_net=False)
- if trt_precision_mode.lower() == 'fp32':
- trt_precision_mode = PrecisionType.Float32
- elif trt_precision_mode.lower() == 'fp16':
- trt_precision_mode = PrecisionType.Float16
- else:
- logging.error(
- "TensorRT precision mode {} is invalid. Supported modes are fp32 and fp16."
- .format(trt_precision_mode),
- exit=True)
- self.config = self.get_config(
- device=device,
- gpu_id=gpu_id,
- cpu_thread_num=cpu_thread_num,
- use_mkl=use_mkl,
- mkl_thread_num=mkl_thread_num,
- use_trt=use_trt,
- use_glog=False,
- memory_optimize=memory_optimize,
- max_trt_batch_size=1,
- trt_precision_mode=trt_precision_mode)
- self.predictor = create_predictor(self.config)
- self.batch_size = batch_size
- if benchmark:
- import auto_log
- pid = os.getpid()
- self.autolog = auto_log.AutoLogger(
- model_name=model_name,
- model_precision=trt_precision_mode,
- batch_size=batch_size,
- data_shape='dynamic',
- save_path=None,
- inference_config=self.config,
- pids=pid,
- process_name=None,
- gpu_ids=0,
- time_keys=[
- 'preprocess_time', 'inference_time', 'postprocess_time'
- ],
- warmup=0,
- logger=logging)
- self.benchmark = benchmark
- def get_config(self, device, gpu_id, cpu_thread_num, use_mkl,
- mkl_thread_num, use_trt, use_glog, memory_optimize,
- max_trt_batch_size, trt_precision_mode):
- config = Config(
- osp.join(self.model_dir, 'model.pdmodel'),
- osp.join(self.model_dir, 'model.pdiparams'))
- if device == 'gpu':
- config.enable_use_gpu(200, gpu_id)
- config.switch_ir_optim(True)
- if use_trt:
- if self._model.model_type == 'segmenter':
- logging.warning(
- "Semantic segmentation models do not support TensorRT acceleration, "
- "TensorRT is forcibly disabled.")
- elif self._model.model_type == 'detector' and 'RCNN' in self._model.__class__.__name__:
- logging.warning(
- "RCNN models do not support TensorRT acceleration, "
- "TensorRT is forcibly disabled.")
- else:
- config.enable_tensorrt_engine(
- workspace_size=1 << 10,
- max_batch_size=max_trt_batch_size,
- min_subgraph_size=3,
- precision_mode=trt_precision_mode,
- use_static=False,
- use_calib_mode=False)
- else:
- config.disable_gpu()
- config.set_cpu_math_library_num_threads(cpu_thread_num)
- if use_mkl:
- if self._model.__class__.__name__ == 'MaskRCNN':
- logging.warning(
- "MaskRCNN does not support MKL-DNN, MKL-DNN is forcibly disabled"
- )
- else:
- try:
- # Cache 10 different shapes for mkldnn to avoid memory leak.
- config.set_mkldnn_cache_capacity(10)
- config.enable_mkldnn()
- config.set_cpu_math_library_num_threads(mkl_thread_num)
- except Exception as e:
- logging.warning(
- "The current environment does not support MKL-DNN, MKL-DNN is disabled."
- )
- pass
- if not use_glog:
- config.disable_glog_info()
- if memory_optimize:
- config.enable_memory_optim()
- config.switch_use_feed_fetch_ops(False)
- return config
- def preprocess(self, images, transforms):
- preprocessed_samples = self._model.preprocess(
- images, transforms, to_tensor=False)
- if self._model.model_type == 'classifier':
- preprocessed_samples = {'image': preprocessed_samples[0]}
- elif self._model.model_type == 'segmenter':
- preprocessed_samples = {
- 'image': preprocessed_samples[0],
- 'ori_shape': preprocessed_samples[1]
- }
- elif self._model.model_type == 'detector':
- pass
- elif self._model.model_type == 'change_detector':
- preprocessed_samples = {
- 'image': preprocessed_samples[0],
- 'image2': preprocessed_samples[1],
- 'ori_shape': preprocessed_samples[2]
- }
- elif self._model.model_type == 'restorer':
- preprocessed_samples = {
- 'image': preprocessed_samples[0],
- 'tar_shape': preprocessed_samples[1]
- }
- else:
- logging.error(
- "Invalid model type {}".format(self._model.model_type),
- exit=True)
- return preprocessed_samples
- def postprocess(self,
- net_outputs,
- topk=1,
- ori_shape=None,
- tar_shape=None,
- transforms=None):
- if self._model.model_type == 'classifier':
- true_topk = min(self._model.num_classes, topk)
- if self._model.postprocess is None:
- self._model.build_postprocess_from_labels(topk)
- # XXX: Convert ndarray to tensor as self._model.postprocess requires
- assert len(net_outputs) == 1
- net_outputs = paddle.to_tensor(net_outputs[0])
- outputs = self._model.postprocess(net_outputs)
- class_ids = map(itemgetter('class_ids'), outputs)
- scores = map(itemgetter('scores'), outputs)
- label_names = map(itemgetter('label_names'), outputs)
- preds = [{
- 'class_ids_map': l,
- 'scores_map': s,
- 'label_names_map': n,
- } for l, s, n in zip(class_ids, scores, label_names)]
- elif self._model.model_type in ('segmenter', 'change_detector'):
- label_map, score_map = self._model.postprocess(
- net_outputs,
- batch_origin_shape=ori_shape,
- transforms=transforms.transforms)
- preds = [{
- 'label_map': l,
- 'score_map': s
- } for l, s in zip(label_map, score_map)]
- elif self._model.model_type == 'detector':
- net_outputs = {
- k: v
- for k, v in zip(['bbox', 'bbox_num', 'mask'], net_outputs)
- }
- preds = self._model.postprocess(net_outputs)
- elif self._model.model_type == 'restorer':
- res_maps = self._model.postprocess(
- net_outputs[0],
- batch_tar_shape=tar_shape,
- transforms=transforms.transforms)
- preds = [{'res_map': res_map} for res_map in res_maps]
- else:
- logging.error(
- "Invalid model type {}.".format(self._model.model_type),
- exit=True)
- return preds
- def _run(self, images, topk=1, transforms=None, time_it=False):
- if self.benchmark and time_it:
- self.autolog.times.start()
- preprocessed_input = self.preprocess(images, transforms)
- input_names = self.predictor.get_input_names()
- for name in input_names:
- input_tensor = self.predictor.get_input_handle(name)
- input_tensor.copy_from_cpu(preprocessed_input[name])
- if self.benchmark and time_it:
- self.autolog.times.stamp()
- self.predictor.run()
- output_names = self.predictor.get_output_names()
- net_outputs = []
- for name in output_names:
- output_tensor = self.predictor.get_output_handle(name)
- net_outputs.append(output_tensor.copy_to_cpu())
- if self.benchmark and time_it:
- self.autolog.times.stamp()
- res = self.postprocess(
- net_outputs,
- topk,
- ori_shape=preprocessed_input.get('ori_shape', None),
- tar_shape=preprocessed_input.get('tar_shape', None),
- transforms=transforms)
- if self.benchmark and time_it:
- self.autolog.times.end(stamp=True)
- return res
- def predict(self, data_dir, file_list, topk=1, warmup_iters=5):
- transforms = self._model.test_transforms
- # Warm up
- iters = 0
- while True:
- for images in self._parse_lines(data_dir, file_list):
- if iters >= warmup_iters:
- break
- self._run(
- images=images,
- topk=topk,
- transforms=transforms,
- time_it=False)
- iters += 1
- else:
- continue
- break
- results = []
- for images in self._parse_lines(data_dir, file_list):
- res = self._run(
- images=images, topk=topk, transforms=transforms, time_it=True)
- results.append(res)
- return results
- def _parse_lines(self, data_dir, file_list):
- with open(file_list, 'r') as f:
- batch = []
- for line in f:
- items = line.strip().split()
- items = [osp.join(data_dir, item) for item in items]
- if self._model.model_type == 'change_detector':
- batch.append((items[0], items[1]))
- else:
- batch.append(items[0])
- if len(batch) == self.batch_size:
- yield batch
- batch.clear()
- if 0 < len(batch) < self.batch_size:
- yield batch
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument('--config', type=str)
- parser.add_argument('--inherit_off', action='store_true')
- parser.add_argument('--model_dir', type=str, default='./')
- parser.add_argument(
- '--device', type=str, choices=['cpu', 'gpu'], default='cpu')
- parser.add_argument('--enable_mkldnn', type=_bool, default=False)
- parser.add_argument('--cpu_threads', type=int, default=10)
- parser.add_argument('--use_trt', type=_bool, default=False)
- parser.add_argument(
- '--precision', type=str, choices=['fp32', 'fp16'], default='fp16')
- parser.add_argument('--batch_size', type=int, default=1)
- parser.add_argument('--benchmark', type=_bool, default=False)
- parser.add_argument('--model_name', type=str, default='')
- args = parser.parse_args()
- cfg = parse_configs(args.config, not args.inherit_off)
- eval_dataset = cfg['datasets']['eval']
- data_dir = eval_dataset.args['data_dir']
- file_list = eval_dataset.args['file_list']
- predictor = TIPCPredictor(
- args.model_dir,
- device=args.device,
- cpu_thread_num=args.cpu_threads,
- use_mkl=args.enable_mkldnn,
- mkl_thread_num=args.cpu_threads,
- use_trt=args.use_trt,
- trt_precision_mode=args.precision,
- benchmark=args.benchmark)
- predictor.predict(data_dir, file_list)
- if args.benchmark:
- predictor.autolog.report()
|