json_AnnoSta.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. '''
  15. @File Description:
  16. # json文件annotations信息,生成统计结果csv,对象框shape、对象看shape比例、对象框起始位置、对象结束位置、对象结束位置、对象类别、单个图像对象数量的分布
  17. python ./coco_tools/json_AnnoSta.py \
  18. --json_path=./annotations/instances_val2017.json \
  19. --csv_path=./anno_sta/annos.csv \
  20. --png_shape_path=./anno_sta/annos_shape.png \
  21. --png_shapeRate_path=./anno_sta/annos_shapeRate.png \
  22. --png_pos_path=./anno_sta/annos_pos.png \
  23. --png_posEnd_path=./anno_sta/annos_posEnd.png \
  24. --png_cat_path=./anno_sta/annos_cat.png \
  25. --png_objNum_path=./anno_sta/annos_objNum.png \
  26. --get_relative=True
  27. '''
  28. import os
  29. import json
  30. import argparse
  31. import numpy as np
  32. import pandas as pd
  33. import seaborn as sns
  34. import matplotlib.pyplot as plt
  35. shp_rate_bins = [
  36. 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5,
  37. 1.6, 1.7, 1.8, 1.9, 2, 2.1, 2.2, 2.4, 2.6, 3, 3.5, 4, 5
  38. ]
  39. def check_dir(check_path, show=True):
  40. if os.path.isdir(check_path):
  41. check_directory = check_path
  42. else:
  43. check_directory = os.path.dirname(check_path)
  44. if not os.path.exists(check_directory):
  45. os.makedirs(check_directory)
  46. if show:
  47. print('make dir:', check_directory)
  48. def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path,
  49. png_pos_path, png_posEnd_path, png_cat_path, png_objNum_path,
  50. get_relative, image_keyname, anno_keyname):
  51. print('json read...\n')
  52. with open(js_path, 'r') as load_f:
  53. data = json.load(load_f)
  54. df_img = pd.DataFrame(data[image_keyname])
  55. sns.jointplot('height', 'width', data=df_img, kind='hex')
  56. plt.close()
  57. df_img = df_img.rename(columns={
  58. "id": "image_id",
  59. "height": "image_height",
  60. "width": "image_width"
  61. })
  62. df_anno = pd.DataFrame(data[anno_keyname])
  63. df_anno[['pox_x', 'pox_y', 'width', 'height']] = pd.DataFrame(df_anno[
  64. 'bbox'].values.tolist())
  65. df_anno['width'] = df_anno['width'].astype(int)
  66. df_anno['height'] = df_anno['height'].astype(int)
  67. df_merge = pd.merge(df_img, df_anno, on="image_id")
  68. if png_shape_path is not None:
  69. check_dir(png_shape_path)
  70. sns.jointplot('height', 'width', data=df_merge, kind='hex')
  71. plt.savefig(png_shape_path)
  72. plt.close()
  73. print('png save to', png_shape_path)
  74. if get_relative:
  75. png_shapeR_path = png_shape_path.replace('.png', '_Relative.png')
  76. df_merge['heightR'] = df_merge['height'] / df_merge['image_height']
  77. df_merge['widthR'] = df_merge['width'] / df_merge['image_width']
  78. sns.jointplot('heightR', 'widthR', data=df_merge, kind='hex')
  79. plt.savefig(png_shapeR_path)
  80. plt.close()
  81. print('png save to', png_shapeR_path)
  82. if png_shapeRate_path is not None:
  83. check_dir(png_shapeRate_path)
  84. plt.figure(figsize=(12, 8))
  85. df_merge['shape_rate'] = (df_merge['width'] /
  86. df_merge['height']).round(1)
  87. df_merge['shape_rate'].value_counts(
  88. sort=False, bins=shp_rate_bins).plot(
  89. kind='bar', title='images shape rate')
  90. plt.xticks(rotation=20)
  91. plt.savefig(png_shapeRate_path)
  92. plt.close()
  93. print('png save to', png_shapeRate_path)
  94. if png_pos_path is not None:
  95. check_dir(png_pos_path)
  96. sns.jointplot('pox_y', 'pox_x', data=df_merge, kind='hex')
  97. plt.savefig(png_pos_path)
  98. plt.close()
  99. print('png save to', png_pos_path)
  100. if get_relative:
  101. png_posR_path = png_pos_path.replace('.png', '_Relative.png')
  102. df_merge['pox_yR'] = df_merge['pox_y'] / df_merge['image_height']
  103. df_merge['pox_xR'] = df_merge['pox_x'] / df_merge['image_width']
  104. sns.jointplot('pox_yR', 'pox_xR', data=df_merge, kind='hex')
  105. plt.savefig(png_posR_path)
  106. plt.close()
  107. print('png save to', png_posR_path)
  108. if png_posEnd_path is not None:
  109. check_dir(png_posEnd_path)
  110. df_merge['pox_y_end'] = df_merge['pox_y'] + df_merge['height']
  111. df_merge['pox_x_end'] = df_merge['pox_x'] + df_merge['width']
  112. sns.jointplot('pox_y_end', 'pox_x_end', data=df_merge, kind='hex')
  113. plt.savefig(png_posEnd_path)
  114. plt.close()
  115. print('png save to', png_posEnd_path)
  116. if get_relative:
  117. png_posEndR_path = png_posEnd_path.replace('.png', '_Relative.png')
  118. df_merge['pox_y_endR'] = df_merge['pox_y_end'] / df_merge[
  119. 'image_height']
  120. df_merge['pox_x_endR'] = df_merge['pox_x_end'] / df_merge[
  121. 'image_width']
  122. sns.jointplot('pox_y_endR', 'pox_x_endR', data=df_merge, kind='hex')
  123. plt.savefig(png_posEndR_path)
  124. plt.close()
  125. print('png save to', png_posEndR_path)
  126. if png_cat_path is not None:
  127. check_dir(png_cat_path)
  128. plt.figure(figsize=(12, 8))
  129. df_merge['category_id'].value_counts().sort_index().plot(
  130. kind='bar', title='obj category')
  131. plt.savefig(png_cat_path)
  132. plt.close()
  133. print('png save to', png_cat_path)
  134. if png_objNum_path is not None:
  135. check_dir(png_objNum_path)
  136. plt.figure(figsize=(12, 8))
  137. df_merge['image_id'].value_counts().value_counts().sort_index().plot(
  138. kind='bar', title='obj number per image')
  139. # df_merge['image_id'].value_counts().value_counts(bins=np.linspace(1,31,16)).sort_index().plot(kind='bar', title='obj number per image')
  140. plt.xticks(rotation=20)
  141. plt.savefig(png_objNum_path)
  142. plt.close()
  143. print('png save to', png_objNum_path)
  144. if csv_path is not None:
  145. check_dir(csv_path)
  146. df_merge.to_csv(csv_path)
  147. print('csv save to', csv_path)
  148. def get_args():
  149. parser = argparse.ArgumentParser(
  150. description='Json Images Infomation Statistic')
  151. # parameters
  152. parser.add_argument(
  153. '--json_path',
  154. type=str,
  155. help='json path to statistic images information')
  156. parser.add_argument(
  157. '--csv_path',
  158. type=str,
  159. default=None,
  160. help='csv path to save statistic images information, default None, do not save'
  161. )
  162. parser.add_argument(
  163. '--png_shape_path',
  164. type=str,
  165. default=None,
  166. help='png path to save statistic images shape information, default None, do not save'
  167. )
  168. parser.add_argument(
  169. '--png_shapeRate_path',
  170. type=str,
  171. default=None,
  172. help='png path to save statistic images shape rate information, default None, do not save'
  173. )
  174. parser.add_argument(
  175. '--png_pos_path',
  176. type=str,
  177. default=None,
  178. help='png path to save statistic pos information, default None, do not save'
  179. )
  180. parser.add_argument(
  181. '--png_posEnd_path',
  182. type=str,
  183. default=None,
  184. help='png path to save statistic end pos information, default None, do not save'
  185. )
  186. parser.add_argument(
  187. '--png_cat_path',
  188. type=str,
  189. default=None,
  190. help='png path to save statistic category information, default None, do not save'
  191. )
  192. parser.add_argument(
  193. '--png_objNum_path',
  194. type=str,
  195. default=None,
  196. help='png path to save statistic images object number information, default None, do not save'
  197. )
  198. parser.add_argument(
  199. '--get_relative',
  200. type=bool,
  201. default=True,
  202. help='if True, get relative result')
  203. parser.add_argument(
  204. '--image_keyname',
  205. type=str,
  206. default='images',
  207. help='image key name in json, default images')
  208. parser.add_argument(
  209. '--anno_keyname',
  210. type=str,
  211. default='annotations',
  212. help='annotation key name in json, default annotations')
  213. parser.add_argument(
  214. '-Args_show',
  215. '--Args_show',
  216. type=bool,
  217. default=True,
  218. help='Args_show(default: True), if True, show args info')
  219. args = parser.parse_args()
  220. if args.Args_show:
  221. print('Args'.center(100, '-'))
  222. for k, v in vars(args).items():
  223. print('%s = %s' % (k, v))
  224. print()
  225. return args
  226. if __name__ == '__main__':
  227. args = get_args()
  228. js_anno_sta(args.json_path, args.csv_path, args.png_shape_path,
  229. args.png_shapeRate_path, args.png_pos_path,
  230. args.png_posEnd_path, args.png_cat_path, args.png_objNum_path,
  231. args.get_relative, args.image_keyname, args.anno_keyname)