json_image_sta.py 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import json
  15. import argparse
  16. import os.path
  17. import pandas as pd
  18. import seaborn as sns
  19. import matplotlib.pyplot as plt
  20. def _check_dir(check_path, show=False):
  21. if os.path.isdir(check_path):
  22. check_directory = check_path
  23. else:
  24. check_directory = os.path.dirname(check_path)
  25. if len(check_directory) > 0 and not os.path.exists(check_directory):
  26. os.makedirs(check_directory)
  27. if show:
  28. print("make dir:", check_directory)
  29. def json_image_sta(json_path, csv_path, img_shape_path, img_shape_rate_path,
  30. img_keyname):
  31. print("json read...\n")
  32. with open(json_path, "r") as load_f:
  33. data = json.load(load_f)
  34. df_image = pd.DataFrame(data[img_keyname])
  35. if img_shape_path is not None:
  36. _check_dir(img_shape_path)
  37. sns.jointplot(y="height", x="width", data=df_image, kind="hex")
  38. plt.savefig(img_shape_path)
  39. plt.close()
  40. print("png save to", img_shape_path)
  41. if img_shape_rate_path is not None:
  42. _check_dir(img_shape_rate_path)
  43. df_image["shape_rate"] = (df_image["width"] /
  44. df_image["height"]).round(1)
  45. df_image["shape_rate"].value_counts().sort_index().plot(
  46. kind="bar", title="images shape rate")
  47. plt.savefig(img_shape_rate_path)
  48. plt.close()
  49. print("png save to", img_shape_rate_path)
  50. if csv_path is not None:
  51. _check_dir(csv_path)
  52. df_image.to_csv(csv_path)
  53. print("csv save to", csv_path)
  54. if __name__ == "__main__":
  55. parser = argparse.ArgumentParser(
  56. description="Get image infomation statistics")
  57. parser.add_argument("--json_path", type=str, \
  58. help="Path of the JSON file whose statistics are to be collected.")
  59. parser.add_argument("--csv_path", type=str, default=None, \
  60. help="Path for the statistics table.")
  61. parser.add_argument("--img_shape_path", type=str, default=None, \
  62. help="Output image saving path. The image visualizes the two-dimensional distribution of all image shapes.")
  63. parser.add_argument("--img_shape_rate_path", type=str, default=None, \
  64. help="Output image saving path. The image visualizes the one-dimensional distribution of shape ratio (width/height) of all images.")
  65. parser.add_argument("--img_keyname", type=str, default="images", \
  66. help="Image key in the JSON file.")
  67. args = parser.parse_args()
  68. json_image_sta(args.json_path, args.csv_path, args.img_shape_path,
  69. args.img_shape_rate_path, args.img_keyname)