test_train_inference_python.sh 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. #!/usr/bin/env bash
  2. source test_tipc/common_func.sh
  3. FILENAME=$1
  4. # $MODE be one of {'lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer'}
  5. MODE=$2
  6. dataline=$(awk 'NR>=1{print}' $FILENAME)
  7. # Parse params
  8. IFS=$'\n'
  9. lines=(${dataline})
  10. # Training params
  11. task_name=$(parse_first_value "${lines[1]}")
  12. model_name=$(parse_second_value "${lines[1]}")
  13. python=$(func_parser_value "${lines[2]}")
  14. gpu_list=$(func_parser_value "${lines[3]}")
  15. train_use_gpu_key=$(func_parser_key "${lines[4]}")
  16. train_use_gpu_value=$(func_parser_value "${lines[4]}")
  17. autocast_list=$(func_parser_value "${lines[5]}")
  18. autocast_key=$(func_parser_key "${lines[5]}")
  19. epoch_key=$(func_parser_key "${lines[6]}")
  20. epoch_num=$(func_parser_params "${lines[6]}")
  21. save_model_key=$(func_parser_key "${lines[7]}")
  22. train_batch_key=$(func_parser_key "${lines[8]}")
  23. train_batch_value=$(func_parser_params "${lines[8]}")
  24. pretrain_model_key=$(func_parser_key "${lines[9]}")
  25. pretrain_model_value=$(func_parser_value "${lines[9]}")
  26. train_model_name=$(func_parser_value "${lines[10]}")
  27. train_infer_img_dir=$(parse_first_value "${lines[11]}")
  28. train_infer_img_file_list=$(parse_second_value "${lines[11]}")
  29. train_param_key1=$(func_parser_key "${lines[12]}")
  30. train_param_value1=$(func_parser_value "${lines[12]}")
  31. trainer_list=$(func_parser_value "${lines[14]}")
  32. trainer_norm=$(func_parser_key "${lines[15]}")
  33. norm_trainer=$(func_parser_value "${lines[15]}")
  34. pact_key=$(func_parser_key "${lines[16]}")
  35. pact_trainer=$(func_parser_value "${lines[16]}")
  36. fpgm_key=$(func_parser_key "${lines[17]}")
  37. fpgm_trainer=$(func_parser_value "${lines[17]}")
  38. distill_key=$(func_parser_key "${lines[18]}")
  39. distill_trainer=$(func_parser_value "${lines[18]}")
  40. trainer_key1=$(func_parser_key "${lines[19]}")
  41. trainer_value1=$(func_parser_value "${lines[19]}")
  42. trainer_key2=$(func_parser_key "${lines[20]}")
  43. trainer_value2=$(func_parser_value "${lines[20]}")
  44. eval_py=$(func_parser_value "${lines[23]}")
  45. eval_key1=$(func_parser_key "${lines[24]}")
  46. eval_value1=$(func_parser_value "${lines[24]}")
  47. save_infer_key=$(func_parser_key "${lines[27]}")
  48. export_weight=$(func_parser_key "${lines[28]}")
  49. export_shape_key=$(func_parser_key "${lines[29]}")
  50. export_shape_value=$(func_parser_value "${lines[29]}")
  51. norm_export=$(func_parser_value "${lines[30]}")
  52. pact_export=$(func_parser_value "${lines[31]}")
  53. fpgm_export=$(func_parser_value "${lines[32]}")
  54. distill_export=$(func_parser_value "${lines[33]}")
  55. export_key1=$(func_parser_key "${lines[34]}")
  56. export_value1=$(func_parser_value "${lines[34]}")
  57. export_key2=$(func_parser_key "${lines[35]}")
  58. export_value2=$(func_parser_value "${lines[35]}")
  59. inference_dir=$(func_parser_value "${lines[36]}")
  60. # Params of inference model
  61. infer_model_dir_list=$(func_parser_value "${lines[37]}")
  62. infer_export_list=$(func_parser_value "${lines[38]}")
  63. infer_is_quant=$(func_parser_value "${lines[39]}")
  64. # Inference params
  65. inference_py=$(func_parser_value "${lines[40]}")
  66. use_gpu_key=$(func_parser_key "${lines[41]}")
  67. use_gpu_list=$(func_parser_value "${lines[41]}")
  68. use_mkldnn_key=$(func_parser_key "${lines[42]}")
  69. use_mkldnn_list=$(func_parser_value "${lines[42]}")
  70. cpu_threads_key=$(func_parser_key "${lines[43]}")
  71. cpu_threads_list=$(func_parser_value "${lines[43]}")
  72. batch_size_key=$(func_parser_key "${lines[44]}")
  73. batch_size_list=$(func_parser_value "${lines[44]}")
  74. use_trt_key=$(func_parser_key "${lines[45]}")
  75. use_trt_list=$(func_parser_value "${lines[45]}")
  76. precision_key=$(func_parser_key "${lines[46]}")
  77. precision_list=$(func_parser_value "${lines[46]}")
  78. infer_model_key=$(func_parser_key "${lines[47]}")
  79. file_list_key=$(func_parser_key "${lines[48]}")
  80. infer_img_dir=$(parse_first_value "${lines[48]}")
  81. infer_img_file_list=$(parse_second_value "${lines[48]}")
  82. save_log_key=$(func_parser_key "${lines[49]}")
  83. benchmark_key=$(func_parser_key "${lines[50]}")
  84. benchmark_value=$(func_parser_value "${lines[50]}")
  85. infer_key1=$(func_parser_key "${lines[51]}")
  86. infer_value1=$(func_parser_value "${lines[51]}")
  87. infer_key2=$(func_parser_key "${lines[52]}")
  88. infer_value2=$(func_parser_value "${lines[52]}")
  89. OUT_PATH="./test_tipc/output/${task_name}/${model_name}/${MODE}"
  90. mkdir -p ${OUT_PATH}
  91. status_log="${OUT_PATH}/results_python.log"
  92. echo "------------------------ ${MODE} ------------------------" >> "${status_log}"
  93. # Parse extra args
  94. parse_extra_args "${lines[@]}"
  95. for params in ${extra_args[*]}; do
  96. IFS=':'
  97. arr=(${params})
  98. key=${arr[0]}
  99. value=${arr[1]}
  100. :
  101. done
  102. function func_inference() {
  103. local IFS='|'
  104. local _python=$1
  105. local _script="$2"
  106. local _model_dir="$3"
  107. local _log_path="$4"
  108. local _img_dir="$5"
  109. local _file_list="$6"
  110. # Do inference
  111. for use_gpu in ${use_gpu_list[*]}; do
  112. if [ ${use_gpu} = 'False' ] || [ ${use_gpu} = 'cpu' ]; then
  113. for use_mkldnn in ${use_mkldnn_list[*]}; do
  114. if [ ${use_mkldnn} = 'False' ]; then
  115. continue
  116. fi
  117. for threads in ${cpu_threads_list[*]}; do
  118. for batch_size in ${batch_size_list[*]}; do
  119. for precision in ${precision_list[*]}; do
  120. if [ ${use_mkldnn} = 'False' ] && [ ${precision} = 'fp16' ]; then
  121. continue
  122. fi # Skip when enable fp16 but disable mkldnn
  123. set_precision=$(func_set_params "${precision_key}" "${precision}")
  124. _save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
  125. infer_value1="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}_results"
  126. set_device=$(func_set_params "${use_gpu_key}" "${use_gpu}")
  127. set_mkldnn=$(func_set_params "${use_mkldnn_key}" "${use_mkldnn}")
  128. set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
  129. set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
  130. set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
  131. set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
  132. set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
  133. set_infer_params2=$(func_set_params "${infer_key2}" "${infer_value2}")
  134. cmd="${_python} ${_script} ${file_list_key} ${_img_dir} ${_file_list} ${set_device} ${set_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_benchmark} ${set_precision} ${set_infer_params1} ${set_infer_params2}"
  135. echo ${cmd}
  136. run_command "${cmd}" "${_save_log_path}"
  137. last_status=${PIPESTATUS[0]}
  138. status_check ${last_status} "${cmd}" "${status_log}" "${model_name}"
  139. done
  140. done
  141. done
  142. done
  143. elif [ ${use_gpu} = 'True' ] || [ ${use_gpu} = 'gpu' ]; then
  144. for use_trt in ${use_trt_list[*]}; do
  145. for precision in ${precision_list[*]}; do
  146. if [ ${precision} = 'fp16' ] && [ ${use_trt} = 'False' ]; then
  147. continue
  148. fi # Skip when enable fp16 but disable trt
  149. for batch_size in ${batch_size_list[*]}; do
  150. _save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
  151. infer_value1="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}_results"
  152. set_device=$(func_set_params "${use_gpu_key}" "${use_gpu}")
  153. set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
  154. set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
  155. set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
  156. set_precision=$(func_set_params "${precision_key}" "${precision}")
  157. set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
  158. set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
  159. set_infer_params2=$(func_set_params "${infer_key2}" "${infer_value2}")
  160. cmd="${_python} ${_script} ${file_list_key} ${_img_dir} ${_file_list} ${set_device} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_benchmark} ${set_infer_params2}"
  161. echo ${cmd}
  162. run_command "${cmd}" "${_save_log_path}"
  163. last_status=${PIPESTATUS[0]}
  164. status_check $last_status "${cmd}" "${status_log}" "${model_name}"
  165. done
  166. done
  167. done
  168. else
  169. echo "Currently, hardwares other than CPU and GPU are not supported!"
  170. fi
  171. done
  172. }
  173. if [ ${MODE} = 'whole_infer' ]; then
  174. GPUID=$3
  175. if [ ${#GPUID} -le 0 ]; then
  176. env=""
  177. else
  178. env="export CUDA_VISIBLE_DEVICES=${GPUID}"
  179. fi
  180. if [ ${infer_model_dir_list} == 'null' ]; then
  181. echo -e "\033[33m No inference model is specified! \033[0m"
  182. exit 1
  183. fi
  184. # Set CUDA_VISIBLE_DEVICES
  185. eval ${env}
  186. export count=0
  187. IFS='|'
  188. infer_run_exports=(${infer_export_list})
  189. for infer_model in ${infer_model_dir_list[*]}; do
  190. # Run export
  191. if [ ${infer_run_exports[count]} != 'null' ]; then
  192. save_infer_dir="${infer_model}/static"
  193. set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
  194. set_export_shape=$(func_set_params "${export_shape_key}" "${export_shape_value}")
  195. set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
  196. export_cmd="${python} ${infer_run_exports[count]} ${set_export_weight} ${set_save_infer_key} ${set_export_shape}"
  197. echo ${infer_run_exports[count]}
  198. eval ${export_cmd}
  199. status_export=$?
  200. status_check ${status_export} "${export_cmd}" "${status_log}" "${model_name}"
  201. else
  202. save_infer_dir=${infer_model}
  203. fi
  204. # Run inference
  205. func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${OUT_PATH}" "${infer_img_dir}" "${infer_img_file_list}"
  206. count=$((${count} + 1))
  207. done
  208. else
  209. IFS='|'
  210. export count=0
  211. USE_GPU_KEY=(${train_use_gpu_value})
  212. for gpu in ${gpu_list[*]}; do
  213. train_use_gpu=${USE_GPU_KEY[count]}
  214. count=$((${count} + 1))
  215. ips=""
  216. if [ ${gpu} = '-1' ]; then
  217. env=""
  218. elif [ ${#gpu} -le 1 ]; then
  219. env="export CUDA_VISIBLE_DEVICES=${gpu}"
  220. eval ${env}
  221. elif [ ${#gpu} -le 15 ]; then
  222. IFS=','
  223. array=(${gpu})
  224. env="export CUDA_VISIBLE_DEVICES=${array[0]}"
  225. IFS='|'
  226. else
  227. IFS=';'
  228. array=(${gpu})
  229. ips=${array[0]}
  230. gpu=${array[1]}
  231. IFS='|'
  232. env=""
  233. fi
  234. for autocast in ${autocast_list[*]}; do
  235. if [ ${autocast} = 'amp' ]; then
  236. set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
  237. else
  238. set_amp_config=""
  239. fi
  240. for trainer in ${trainer_list[*]}; do
  241. if [ ${trainer} = ${pact_key} ]; then
  242. run_train=${pact_trainer}
  243. run_export=${pact_export}
  244. elif [ ${trainer} = "${fpgm_key}" ]; then
  245. run_train=${fpgm_trainer}
  246. run_export=${fpgm_export}
  247. elif [ ${trainer} = "${distill_key}" ]; then
  248. run_train=${distill_trainer}
  249. run_export=${distill_export}
  250. elif [ ${trainer} = ${trainer_key1} ]; then
  251. run_train=${trainer_value1}
  252. run_export=${export_value1}
  253. elif [[ ${trainer} = ${trainer_key2} ]]; then
  254. run_train=${trainer_value2}
  255. run_export=${export_value2}
  256. else
  257. run_train=${norm_trainer}
  258. run_export=${norm_export}
  259. fi
  260. if [ ${run_train} = 'null' ]; then
  261. continue
  262. fi
  263. set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
  264. set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
  265. set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
  266. set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
  267. set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
  268. set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
  269. # If length of ips >= 15, then it is seen as multi-machine.
  270. # 15 is the min length of ips info for multi-machine: 0.0.0.0,0.0.0.0
  271. if [ ${#ips} -le 15 ]; then
  272. save_dir="${OUT_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
  273. nodes=1
  274. else
  275. IFS=','
  276. ips_array=(${ips})
  277. IFS='|'
  278. nodes=${#ips_array[@]}
  279. save_dir="${OUT_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
  280. fi
  281. log_path="${OUT_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}.log"
  282. # Load pretrained model from norm training if current trainer is pact or fpgm trainer.
  283. if ([ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]) && [ ${nodes} -le 1 ]; then
  284. set_pretrain="${load_norm_train_model}"
  285. fi
  286. set_save_model=$(func_set_params "${save_model_key}" "${save_dir}")
  287. if [ ${#gpu} -le 2 ]; then # Train with cpu or single gpu
  288. cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
  289. elif [ ${#ips} -le 15 ]; then # Train with multi-gpu
  290. cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
  291. else # Train with multi-machine
  292. cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
  293. fi
  294. echo ${cmd}
  295. # Run train
  296. run_command "${cmd}" "${log_path}"
  297. status_check $? "${cmd}" "${status_log}" "${model_name}"
  298. if [[ "${cmd}" == *'paddle.distributed.launch'* ]]; then
  299. cat log/workerlog.0 >> ${log_path}
  300. fi
  301. set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_dir}/${train_model_name}/model.pdparams")
  302. # Save norm trained models to set pretrain for pact training and fpgm training
  303. if [ ${trainer} = ${trainer_norm} ] && [ ${nodes} -le 1 ]; then
  304. load_norm_train_model=${set_eval_pretrain}
  305. fi
  306. # Run evaluation
  307. if [ ${eval_py} != 'null' ]; then
  308. log_path="${OUT_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_eval.log"
  309. set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
  310. eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}"
  311. run_command "${eval_cmd}" "${log_path}"
  312. status_check $? "${eval_cmd}" "${status_log}" "${model_name}"
  313. fi
  314. # Run export model
  315. if [ ${run_export} != 'null' ]; then
  316. log_path="${OUT_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_export.log"
  317. save_infer_path="${save_dir}/static"
  318. set_export_weight=$(func_set_params "${export_weight}" "${save_dir}/${train_model_name}")
  319. set_export_shape=$(func_set_params "${export_shape_key}" "${export_shape_value}")
  320. set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
  321. export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key} ${set_export_shape}"
  322. run_command "${export_cmd}" "${log_path}"
  323. status_check $? "${export_cmd}" "${status_log}" "${model_name}"
  324. # Run inference
  325. eval ${env}
  326. if [[ ${inference_dir} != 'null' ]] && [[ ${inference_dir} != '##' ]]; then
  327. infer_model_dir="${save_infer_path}/${inference_dir}"
  328. else
  329. infer_model_dir=${save_infer_path}
  330. fi
  331. func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${OUT_PATH}" "${train_infer_img_dir}" "${train_infer_img_file_list}"
  332. eval "unset CUDA_VISIBLE_DEVICES"
  333. fi
  334. done # Done with: for trainer in ${trainer_list[*]}; do
  335. done # Done with: for autocast in ${autocast_list[*]}; do
  336. done # Done with: for gpu in ${gpu_list[*]}; do
  337. fi # End if [ ${MODE} = 'infer' ]; then