3 vuotta sitten · 1c30b71959
--- a/tools/prepare_dataset/common.py
+++ b/tools/prepare_dataset/common.py
@@ -1,4 +1,6 @@
 
				 import argparse
			
 
				+import random
			
 
				+import copy
			
 
				 import os
			
 
				 import os.path as osp
			
 
				 from glob import glob
			
@@ -198,6 +200,20 @@ def create_file_list(file_list, path_tuples, sep=' '):
 
				             f.write(line + '\n')
			
 
				 
			
 
				 
			
 
				+def create_label_list(label_list, labels):
			
 
				+    """
			
 
				+    Create label list.
			
 
				+    
			
 
				+    Args:
			
 
				+        label_list (str): Path of label list to create.
			
 
				+        labels (list[str]|tuple[str]]): Label names.
			
 
				+    """
			
 
				+
			
 
				+    with open(label_list, 'w') as f:
			
 
				+        for label in labels:
			
 
				+            f.write(label + '\n')
			
 
				+
			
 
				+
			
 
				 def link_dataset(src, dst):
			
 
				     """
			
 
				     Make a symbolic link to a dataset.
			
@@ -211,5 +227,57 @@ def link_dataset(src, dst):
 
				         raise ValueError(f"{dst} exists and is not a directory.")
			
 
				     elif not osp.exists(dst):
			
 
				         os.makedirs(dst)
			
 
				+    src = osp.realpath(src)
			
 
				     name = osp.basename(osp.normpath(src))
			
 
				     os.symlink(src, osp.join(dst, name), target_is_directory=True)
			
 
				+
			
 
				+
			
 
				+def random_split(samples,
			
 
				+                 ratios=(0.7, 0.2, 0.1),
			
 
				+                 inplace=True,
			
 
				+                 drop_remainder=False):
			
 
				+    """
			
 
				+    Randomly split the dataset into two or three subsets.
			
 
				+    
			
 
				+    Args:
			
 
				+        samples (list): All samples of the dataset.
			
 
				+        ratios (tuple[float], optional): If the length of `ratios` is 2,
			
 
				+            the two elements indicate the ratios of samples used for training
			
 
				+            and evaluation. If the length of `ratios` is 3, the three elements
			
 
				+            indicate the ratios of samples used for training, validation, and 
			
 
				+            testing. Defaults to (0.7, 0.2, 0.1).
			
 
				+        inplace (bool, optional): Whether to shuffle `samples` in place. 
			
 
				+            Defaults to True.
			
 
				+        drop_remainder (bool, optional): Whether to discard the remaining samples.
			
 
				+            If False, the remaining samples will be included in the last subset.
			
 
				+            For example, if `ratios` is (0.7, 0.1) and `drop_remainder` is False, 
			
 
				+            the two subsets after splitting will contain 70% and 30% of the samples, 
			
 
				+            respectively. Defaults to False.
			
 
				+    """
			
 
				+
			
 
				+    if not inplace:
			
 
				+        samples = copy.deepcopy(samples)
			
 
				+
			
 
				+    if len(samples) == 0:
			
 
				+        raise ValueError("There are no samples!")
			
 
				+
			
 
				+    if len(ratios) not in (2, 3):
			
 
				+        raise ValueError("`len(ratios)` must be 2 or 3!")
			
 
				+
			
 
				+    random.shuffle(samples)
			
 
				+
			
 
				+    n_samples = len(samples)
			
 
				+    acc_r = 0
			
 
				+    st_idx, ed_idx = 0, 0
			
 
				+    splits = []
			
 
				+    for r in ratios:
			
 
				+        acc_r += r
			
 
				+        ed_idx = round(acc_r * n_samples)
			
 
				+        splits.append(samples[st_idx:ed_idx])
			
 
				+        st_idx = ed_idx
			
 
				+
			
 
				+    if ed_idx < len(ratios) and not drop_remainder:
			
 
				+        # Append remainder to the last split
			
 
				+        splits[-1].append(splits[ed_idx:])
			
 
				+
			
 
				+    return splits