123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- #!/usr/bin/env python
- # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import argparse
- import os.path as osp
- from prepare_dataset.common import get_path_tuples, create_file_list
- def gen_file_lists(
- data_dir,
- save_dir,
- subsets=None,
- subdirs=('images', 'masks'),
- glob_pattern='*',
- file_list_pattern="{subset}.txt",
- store_abs_path=False,
- sep=' ', ):
- """
- Generate file lists.
- Args:
- data_dir (str): Root directory of the dataset.
- save_dir (str): Directory to save the generated file lists.
- subsets (tuple|list|None, optional): List or tuple of names of subsets or None.
- Images should be stored in `data_dir/subset/subdir/` or `data_dir/subdir/`
- (when `subsets` is set to None), where `subset` is an element of `subsets`.
- Defaults to None.
- subdirs (tuple|list, optional): List or tuple of names of subdirectories. Images
- should be stored in `data_dir/subset/subdir/` or `data_dir/subdir/` (when
- `subsets` is set to None), where `subdir` is an element of `subdirs`.
- Defaults to ('images', 'masks').
- glob_pattern (str, optional): Glob pattern used to match image files. Defaults
- to '*', which matches arbitrary file.
- file_list_pattern (str, optional): Pattern to name the file lists. Defaults to
- '{subset}.txt'.
- store_abs_path (bool, optional): Whether to store the absolute path in file
- lists. Defaults to 'False', which indicates storing the relative path.
- sep (str, optional): Delimiter to use when writing lines to file lists.
- Defaults to ' '.
- """
- if subsets is None:
- subsets = ('', )
- for subset in subsets:
- path_tuples = get_path_tuples(
- *(osp.join(data_dir, subset, subdir) for subdir in subdirs),
- glob_pattern=glob_pattern,
- data_dir=data_dir)
- if store_abs_path:
- path_tuples_new = []
- for path_tuple in path_tuples:
- path_tuple_new = [
- osp.abspath(osp.join(data_dir, path_t))
- for path_t in path_tuple
- ]
- path_tuples_new.append(tuple(path_tuple_new))
- path_tuples = path_tuples_new
- if len(subset) > 0:
- file_list_name = file_list_pattern.format(subset=subset)
- else:
- file_list_name = 'list.txt'
- file_list = osp.join(save_dir, file_list_name)
- create_file_list(file_list, path_tuples, sep)
- print(f"File list {file_list} created.")
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '--data_dir', type=str, help="Root directory of the dataset.")
- parser.add_argument(
- '--save_dir',
- type=str,
- default='./',
- help="Directory to save the generated file lists.")
- parser.add_argument(
- '--subsets',
- nargs="*",
- default=None,
- help="List or tuple of names of subsets.", )
- parser.add_argument(
- '--subdirs',
- nargs="*",
- default=['A', 'B', 'label'],
- help="List or tuple of names of subdirectories of subsets.", )
- parser.add_argument(
- '--glob_pattern',
- type=str,
- default='*',
- help="Glob pattern used to match image files.", )
- parser.add_argument(
- '--file_list_pattern',
- type=str,
- default='{subset}.txt',
- help="Pattern to name the file lists.", )
- parser.add_argument(
- '--store_abs_path',
- action='store_true',
- help='Whether to store the absolute path in file lists.', )
- parser.add_argument(
- '--sep',
- type=str,
- default=' ',
- help="Delimiter to use when writing lines to file lists.", )
- args = parser.parse_args()
- gen_file_lists(
- data_dir=args.data_dir,
- save_dir=args.save_dir,
- subsets=args.subsets,
- subdirs=args.subdirs,
- glob_pattern=args.glob_pattern,
- file_list_pattern=args.file_list_pattern,
- store_abs_path=args.store_abs_path,
- sep=args.sep, )
|