generate_file_lists.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. #!/usr/bin/env python
  2. # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import argparse
  16. import os.path as osp
  17. from prepare_dataset.common import get_path_tuples, create_file_list
  18. def gen_file_lists(
  19. data_dir,
  20. save_dir,
  21. subsets=None,
  22. subdirs=('images', 'masks'),
  23. glob_pattern='*',
  24. file_list_pattern="{subset}.txt",
  25. store_abs_path=False,
  26. sep=' ', ):
  27. """
  28. Generate file lists.
  29. Args:
  30. data_dir (str): Root directory of the dataset.
  31. save_dir (str): Directory to save the generated file lists.
  32. subsets (tuple|list|None, optional): List or tuple of names of subsets or None.
  33. Images should be stored in `data_dir/subset/subdir/` or `data_dir/subdir/`
  34. (when `subsets` is set to None), where `subset` is an element of `subsets`.
  35. Defaults to None.
  36. subdirs (tuple|list, optional): List or tuple of names of subdirectories. Images
  37. should be stored in `data_dir/subset/subdir/` or `data_dir/subdir/` (when
  38. `subsets` is set to None), where `subdir` is an element of `subdirs`.
  39. Defaults to ('images', 'masks').
  40. glob_pattern (str, optional): Glob pattern used to match image files. Defaults
  41. to '*', which matches arbitrary file.
  42. file_list_pattern (str, optional): Pattern to name the file lists. Defaults to
  43. '{subset}.txt'.
  44. store_abs_path (bool, optional): Whether to store the absolute path in file
  45. lists. Defaults to 'False', which indicates storing the relative path.
  46. sep (str, optional): Delimiter to use when writing lines to file lists.
  47. Defaults to ' '.
  48. """
  49. if subsets is None:
  50. subsets = ('', )
  51. for subset in subsets:
  52. path_tuples = get_path_tuples(
  53. *(osp.join(data_dir, subset, subdir) for subdir in subdirs),
  54. glob_pattern=glob_pattern,
  55. data_dir=data_dir)
  56. if store_abs_path:
  57. path_tuples_new = []
  58. for path_tuple in path_tuples:
  59. path_tuple_new = [
  60. osp.abspath(osp.join(data_dir, path_t))
  61. for path_t in path_tuple
  62. ]
  63. path_tuples_new.append(tuple(path_tuple_new))
  64. path_tuples = path_tuples_new
  65. if len(subset) > 0:
  66. file_list_name = file_list_pattern.format(subset=subset)
  67. else:
  68. file_list_name = 'list.txt'
  69. file_list = osp.join(save_dir, file_list_name)
  70. create_file_list(file_list, path_tuples, sep)
  71. print(f"File list {file_list} created.")
  72. if __name__ == '__main__':
  73. parser = argparse.ArgumentParser()
  74. parser.add_argument(
  75. '--data_dir', type=str, help="Root directory of the dataset.")
  76. parser.add_argument(
  77. '--save_dir',
  78. type=str,
  79. default='./',
  80. help="Directory to save the generated file lists.")
  81. parser.add_argument(
  82. '--subsets',
  83. nargs="*",
  84. default=None,
  85. help="List or tuple of names of subsets.", )
  86. parser.add_argument(
  87. '--subdirs',
  88. nargs="*",
  89. default=['A', 'B', 'label'],
  90. help="List or tuple of names of subdirectories of subsets.", )
  91. parser.add_argument(
  92. '--glob_pattern',
  93. type=str,
  94. default='*',
  95. help="Glob pattern used to match image files.", )
  96. parser.add_argument(
  97. '--file_list_pattern',
  98. type=str,
  99. default='{subset}.txt',
  100. help="Pattern to name the file lists.", )
  101. parser.add_argument(
  102. '--store_abs_path',
  103. action='store_true',
  104. help='Whether to store the absolute path in file lists.', )
  105. parser.add_argument(
  106. '--sep',
  107. type=str,
  108. default=' ',
  109. help="Delimiter to use when writing lines to file lists.", )
  110. args = parser.parse_args()
  111. gen_file_lists(
  112. data_dir=args.data_dir,
  113. save_dir=args.save_dir,
  114. subsets=args.subsets,
  115. subdirs=args.subdirs,
  116. glob_pattern=args.glob_pattern,
  117. file_list_pattern=args.file_list_pattern,
  118. store_abs_path=args.store_abs_path,
  119. sep=args.sep, )