prepare_svcd.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. #!/usr/bin/env python
  2. # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import os.path as osp
  16. from common import (get_default_parser, get_path_tuples, create_file_list,
  17. link_dataset)
  18. SUBSETS = ('train', 'val', 'test')
  19. SUBDIRS = ('A', 'B', 'OUT')
  20. FILE_LIST_PATTERN = "{subset}.txt"
  21. URL = ""
  22. if __name__ == '__main__':
  23. parser = get_default_parser()
  24. args = parser.parse_args()
  25. out_dir = osp.join(args.out_dataset_dir,
  26. osp.basename(osp.normpath(args.in_dataset_dir)))
  27. link_dataset(args.in_dataset_dir, args.out_dataset_dir)
  28. for subset in SUBSETS:
  29. # NOTE: Only use cropped real samples.
  30. path_tuples = get_path_tuples(
  31. *(osp.join(out_dir, 'Real', 'subset', subset, subdir)
  32. for subdir in SUBDIRS),
  33. data_dir=args.out_dataset_dir)
  34. file_list = osp.join(
  35. args.out_dataset_dir, FILE_LIST_PATTERN.format(subset=subset))
  36. create_file_list(file_list, path_tuples)
  37. print(f"Write file list to {file_list}.")