3 tahun lalu · 2f6c71ddd2
--- a/docs/data/tools_cn.md
+++ b/docs/data/tools_cn.md
@@ -4,14 +4,15 @@
 
				 
			
 
				 PaddleRS在`tools`目录中提供了丰富的遥感影像处理工具，包括：
			
 
				 
			
 
				-- `coco2mask.py`：用于将COCO格式的标注文件转换为.png格式。
			
 
				-- `mask2shape.py`：用于将模型推理输出的.png格式栅格标签转换为.shp矢量格式。
			
 
				+- `coco2mask.py`：用于将COCO格式的标注文件转换为PNG格式。
			
 
				+- `mask2shape.py`：用于将模型推理输出的PNG格式栅格标签转换为.shp矢量格式。
			
 
				 - `geojson2mask.py`：用于将GeoJSON格式标签转换为.tif栅格格式。
			
 
				 - `match.py`：用于实现两幅影像的配准。
			
 
				 - `split.py`：用于对大幅面影像数据进行切片。
			
 
				 - `coco_tools/`：COCO工具合集，用于统计处理COCO格式标注文件。
			
 
				 - `prepare_dataset/`：数据集预处理脚本合集。
			
 
				 - `extract_ms_patches.py`：从整幅遥感影像中提取多尺度影像块。
			
 
				+- `generate_file_lists.py`：对数据集生成file list。
			
 
				 
			
 
				 ## 使用说明
			
 
				 
			
@@ -23,7 +24,7 @@ cd tools
 
				 
			
 
				 ### coco2mask
			
 
				 
			
 
				-`coco2mask.py`的主要功能是将影像以及对应的COCO格式的分割标签转换为影像与.png格式的标签，结果会分别存放在`img`和`gt`两个目录中。相关的数据样例可以参考[中国典型城市建筑物实例数据集](https://www.scidb.cn/detail?dataSetId=806674532768153600&dataSetType=journal)。对于mask，保存结果为单通道的伪彩色影像。使用方式如下：
			
 
				+`coco2mask.py`的主要功能是将影像以及对应的COCO格式的分割标签转换为影像与PNG格式的标签，结果会分别存放在`img`和`gt`两个目录中。相关的数据样例可以参考[中国典型城市建筑物实例数据集](https://www.scidb.cn/detail?dataSetId=806674532768153600&dataSetType=journal)。对于mask，保存结果为单通道的伪彩色影像。使用方式如下：
			
 
				 
			
 
				 ```shell
			
 
				 python coco2mask.py --raw_dir {输入目录路径} --save_dir {输出目录路径}
			
@@ -31,12 +32,12 @@ python coco2mask.py --raw_dir {输入目录路径} --save_dir {输出目录路
 
				 
			
 
				 其中：
			
 
				 
			
 
				-- `raw_dir`：存放原始数据的目录，其中影像存放在`images`子目录中，标签以`xxx.json`格式保存。
			
 
				-- `save_dir`：保存输出结果的目录，其中影像保存在`img`子目录中，.png格式的标签保存在`gt`子目录中。
			
 
				+- `--raw_dir`：存放原始数据的目录，其中影像存放在`images`子目录中，标签以`xxx.json`格式保存。
			
 
				+- `--save_dir`：保存输出结果的目录，其中影像保存在`img`子目录中，PNG格式的标签保存在`gt`子目录中。
			
 
				 
			
 
				 ### mask2shape
			
 
				 
			
 
				-`mask2shape.py`的主要功能是将.png格式的分割结果转换为shapefile格式（矢量图）。使用方式如下：
			
 
				+`mask2shape.py`的主要功能是将PNG格式的分割结果转换为shapefile格式（矢量图）。使用方式如下：
			
 
				 
			
 
				 ```shell
			
 
				 python mask2shape.py --src_img_path {带有地理信息的原始影像路径} --mask_path {输入分割标签路径} [--save_path {输出矢量图路径}] [--ignore_index {需要忽略的索引值}]
			
@@ -44,10 +45,10 @@ python mask2shape.py --src_img_path {带有地理信息的原始影像路径} --
 
				 
			
 
				 其中：
			
 
				 
			
 
				-- `src_img_path`：原始影像路径，需要带有地理元信息，以便为生成的shapefile提供地理投影坐标系等信息。
			
 
				-- `mask_path`：模型推理得到的.png格式的分割结果。
			
 
				-- `save_path`：保存shapefile的路径，默认为`output`。
			
 
				-- `ignore_index`：需要在shapefile中忽略的索引值（例如分割任务中的背景类），默认为`255`。
			
 
				+- `--src_img_path`：原始影像路径，需要带有地理元信息，以便为生成的shapefile提供地理投影坐标系等信息。
			
 
				+- `--mask_path`：模型推理得到的PNG格式的分割结果。
			
 
				+- `--save_path`：保存shapefile的路径，默认为`output`。
			
 
				+- `--ignore_index`：需要在shapefile中忽略的索引值（例如分割任务中的背景类），默认为`255`。
			
 
				 
			
 
				 ### geojson2mask
			
 
				 
			
@@ -59,9 +60,9 @@ python geojson2mask.py --src_img_path {带有地理信息的原始影像路径}
 
				 
			
 
				 其中：
			
 
				 
			
 
				-- `src_img_path`：原始影像路径，需要带有地理元信息。
			
 
				-- `geojson_path`：GeoJSON格式标签路径。
			
 
				-- `save_path`：保存转换后的栅格文件的路径。
			
 
				+- `--src_img_path`：原始影像路径，需要带有地理元信息。
			
 
				+- `--geojson_path`：GeoJSON格式标签路径。
			
 
				+- `--save_path`：保存转换后的栅格文件的路径。
			
 
				 
			
 
				 ### match
			
 
				 
			
@@ -73,26 +74,26 @@ python match.py --image1_path {时相1影像路径} --image2_path {时相2影像
 
				 
			
 
				 其中：
			
 
				 
			
 
				-- `image1_path`：时相1影像路径。该影像必须包含地理信息，且配准过程中以该影像为基准影像。
			
 
				-- `image2_path`：时相2影像路径。该影像的地理信息将不被用到。配准过程中将该影像配准到时相1影像。
			
 
				-- `image1_bands`：时相1影像用于配准的波段，指定为三通道（分别代表R、G、B）或单通道，默认为`[1, 2, 3]`。
			
 
				-- `image2_bands`：时相2影像用于配准的波段，指定为三通道（分别代表R、G、B）或单通道，默认为`[1, 2, 3]`。
			
 
				-- `save_path`： 配准后时相2影像输出路径。
			
 
				+- `--image1_path`：时相1影像路径。该影像必须包含地理信息，且配准过程中以该影像为基准影像。
			
 
				+- `--image2_path`：时相2影像路径。该影像的地理信息将不被用到。配准过程中将该影像配准到时相1影像。
			
 
				+- `--image1_bands`：时相1影像用于配准的波段，指定为三通道（分别代表R、G、B）或单通道，默认为`[1, 2, 3]`。
			
 
				+- `--image2_bands`：时相2影像用于配准的波段，指定为三通道（分别代表R、G、B）或单通道，默认为`[1, 2, 3]`。
			
 
				+- `--save_path`： 配准后时相2影像输出路径。
			
 
				 
			
 
				 ### split
			
 
				 
			
 
				 `split.py`的主要功能是将大幅面遥感影像划分为影像块，这些影像块可以作为训练时的输入。使用方式如下：
			
 
				 
			
 
				 ```shell
			
 
				-python split.py --image_path {输入影像路径} [--mask_path {真值标签路径}] [--block_size {影像块尺寸}] [--save_dir {输出目录}]
			
 
				+python split.py --image_path {输入影像路径} [--mask_path {真值标签路径}] [--block_size {影像块尺寸}] [--save_dir {输出目录路径}]
			
 
				 ```
			
 
				 
			
 
				 其中：
			
 
				 
			
 
				-- `image_path`：需要切分的影像的路径。
			
 
				-- `mask_path`：一同切分的标签影像路径，默认为`None`。
			
 
				-- `block_size`：切分影像块大小，默认为`512`。
			
 
				-- `save_dir`：保存切分后结果的文件夹路径，默认为`output`。
			
 
				+- `--image_path`：需要切分的影像的路径。
			
 
				+- `--mask_path`：一同切分的标签影像路径，默认为`None`。
			
 
				+- `--block_size`：切分影像块大小，默认为`512`。
			
 
				+- `--save_dir`：保存切分后结果的文件夹路径，默认为`output`。
			
 
				 
			
 
				 ### coco_tools
			
 
				 
			
@@ -138,17 +139,36 @@ python extract_ms_patches.py --image_paths {一个或多个输入影像路径} -
 
				 
			
 
				 其中：
			
 
				 
			
 
				-- `image_paths`：源影像路径，可以指定多个路径。
			
 
				-- `mask_path`：真值标签路径。
			
 
				-- `save_dir`：保存切分后结果的文件夹路径，默认为`output`。
			
 
				-- `min_patch_size`：提取的影像块的最小尺寸（以影像块长/宽的像素个数计），即四叉树的叶子结点在图中覆盖的最小范围，默认为`256`。
			
 
				-- `bg_class`：背景类别的类别编号，默认为`0`。
			
 
				-- `target_class`：目标类别的类别编号，若为`None`，则表示所有背景类别以外的类别均为目标类别，默认为`None`。
			
 
				-- `max_level`：检索的最大尺度层级，若为`None`，则表示不限制层级，默认为`None`。
			
 
				-- `include_bg`：若指定此选项，则也保存那些仅包含背景类别、不包含目标类别的影像块。
			
 
				+- `--image_paths`：源影像路径，可以指定多个路径。
			
 
				+- `--mask_path`：真值标签路径。
			
 
				+- `--save_dir`：保存切分后结果的文件夹路径，默认为`output`。
			
 
				+- `--min_patch_size`：提取的影像块的最小尺寸（以影像块长/宽的像素个数计），即四叉树的叶子结点在图中覆盖的最小范围，默认为`256`。
			
 
				+- `--bg_class`：背景类别的类别编号，默认为`0`。
			
 
				+- `--target_class`：目标类别的类别编号，若为`None`，则表示所有背景类别以外的类别均为目标类别，默认为`None`。
			
 
				+- `--max_level`：检索的最大尺度层级，若为`None`，则表示不限制层级，默认为`None`。
			
 
				+- `--include_bg`：若指定此选项，则也保存那些仅包含背景类别、不包含目标类别的影像块。
			
 
				 - `--nonzero_ratio`：指定一个阈值，对于任意一幅源影像，若影像块中非零像素占比小于此阈值，则该影像块将被舍弃。若为`None`，则表示不进行过滤。默认为`None`。
			
 
				 - `--visualize`：若指定此选项，则程序执行完毕后将生成图像`./vis_quadtree.png`，其中保存有四叉树中节点情况的可视化结果，一个例子如下图所示：
			
 
				 
			
 
				 <div align="center">
			
 
				 <img src="https://user-images.githubusercontent.com/21275753/189264850-f94b3d7b-c631-47b1-9833-0800de2ccf54.png"  width = "400" />  
			
 
				 </div>
			
 
				+
			
 
				+### generate_file_lists
			
 
				+
			
 
				+`generate_file_lists.py`的主要功能是对数据集生成符合PaddleRS格式要求的file list。使用方式如下：
			
 
				+
			
 
				+```shell
			
 
				+python generate_file_lists.py --data_dir {数据集根目录路径} --save_dir {输出目录路径} [--subsets {数据集所包含子集名称}] [--subdirs {子目录名称}] [--glob_pattern {影像文件名匹配模板}] [--file_list_pattern {file list文件名模板}] [--store_abs_path] [--sep {file list中使用的分隔符}]
			
 
				+```
			
 
				+
			
 
				+其中：
			
 
				+
			
 
				+- `--data_dir`：数据集的根目录。
			
 
				+- `--save_dir`：保存生成的file list的目录。
			
 
				+- `--subsets`：数据集所包含子集名称。数据集中的影像应保存在`data_dir/subset/subdir/`或者`data_dir/subdir/` (当不指定`--subsets`时)，其中`subset`是通过`--subsets`指定的子集名称之一。示例：`--subsets train val test`。
			
 
				+- `--subdirs`：子目录名称。数据集中的影像应保存在`data_dir/subset/subdir/`或者`data_dir/subdir/` (当不指定`--subsets`时)，其中`subdir`是通过`--subdirs`指定的子目录名称之一。默认为`('images', 'masks')`。
			
 
				+- `--glob_pattern`：影像文件名匹配模板。默认为`*`，表示匹配所有文件。
			
 
				+- `--file_list_pattern`：file list文件名模板。默认为`'{subset}.txt'`。
			
 
				+- `--store_abs_path`：若指定此选项，则在file list中保存绝对路径，否则保存相对路径。
			
 
				+- `--sep`：file list中使用的分隔符，默认为` `（空格）。
			
--- a/docs/data/tools_en.md
+++ b/docs/data/tools_en.md
@@ -4,14 +4,15 @@
 
				 
			
 
				 PaddleRS provides a rich set of remote sensing image processing tools in the `tools` directory, including:
			
 
				 
			
 
				-- `coco2mask.py`: Convert COCO annotation files to .png files.
			
 
				-- `mask2shape.py`: Convert .png format raster labels from model inference output to .shp vector format.
			
 
				+- `coco2mask.py`: Convert COCO annotation files to PNG files.
			
 
				+- `mask2shape.py`: Convert PNG format raster labels from model inference output to .shp vector format.
			
 
				 - `geojson2mask.py`: Convert GeoJSON format labels to .tif raster format.
			
 
				 - `match.py`: Implement registration of two images.
			
 
				 - `split.py`: Split large image into tiles.
			
 
				 - `coco_tools/`: A collection of COCO tools for processing COCO format annotation files.
			
 
				 - `prepare_dataset/`: A collection of scripts for preprocessing datasets.
			
 
				 - `extract_ms_patches.py`: Extract multi-scale image blocks from entire remote sensing images.
			
 
				+- `generate_file_lists.py`：Generate file lists.
			
 
				 
			
 
				 ## Usage
			
 
				 
			
@@ -23,7 +24,7 @@ cd tools
 
				 
			
 
				 ### coco2mask
			
 
				 
			
 
				-The main function of `coco2mask.py` is to convert images and corresponding COCO-formatted segmentation labels into images and labels in .png format, which are stored separately in the `img` and `gt` directories. The relevant data examples can be found in the [Chinese Typical City Building Instance Dataset](https://www.scidb.cn/detail?dataSetId=806674532768153600&dataSetType=journal). For the masks, the saved result is a single-channel pseudo-color image. The usage is as follows:
			
 
				+The main function of `coco2mask.py` is to convert images and corresponding COCO-formatted segmentation labels into images and labels in PNG format, which are stored separately in the `img` and `gt` directories. The relevant data examples can be found in the [Chinese Typical City Building Instance Dataset](https://www.scidb.cn/detail?dataSetId=806674532768153600&dataSetType=journal). For the masks, the saved result is a single-channel pseudo-color image. The usage is as follows:
			
 
				 
			
 
				 ```shell
			
 
				 python coco2mask.py --raw_dir {input directory path} --save_dir {output directory path}
			
@@ -31,12 +32,12 @@ python coco2mask.py --raw_dir {input directory path} --save_dir {output director
 
				 
			
 
				 Among them:
			
 
				 
			
 
				-- `raw_dir`: Directory where the raw data are stored. Images are stored in the `images` subdirectory, and labels are saved in the `xxx.json` format.
			
 
				-- `save_dir`: Directory where the output results are saved. Images are saved in the `img` subdirectory, and .png format labels are saved in the `gt` subdirectory.
			
 
				+- `--raw_dir`: Directory where the raw data are stored. Images are stored in the `images` subdirectory, and labels are saved in the `xxx.json` format.
			
 
				+- `--save_dir`: Directory where the output results are saved. Images are saved in the `img` subdirectory, and PNG format labels are saved in the `gt` subdirectory.
			
 
				 
			
 
				 ### mask2shape
			
 
				 
			
 
				-The main function of `mask2shape.py` is to convert the segmentation results in .png format into shapefile format (vector graphics). The usage is as follows:
			
 
				+The main function of `mask2shape.py` is to convert the segmentation results in PNG format into shapefile format (vector graphics). The usage is as follows:
			
 
				 
			
 
				 ```shell
			
 
				 python mask2shape.py --src_img_path {path to the original image with geographic information} --mask_path {path to segmentation mask} [--save_path {path to save the output vector graphics}] [--ignore_index {index value to be ignored}]
			
@@ -44,10 +45,10 @@ python mask2shape.py --src_img_path {path to the original image with geographic
 
				 
			
 
				 Among them:
			
 
				 
			
 
				-- `src_img_path`: Path to the original image with geographic information, which is required to provide the shapefile with geoprojection coordinate system information.
			
 
				-- `mask_path`: Path to the .png format segmentation result obtained by the model inference.
			
 
				-- `save_path`: Path to save the shapefile. The default value is `output`.
			
 
				-- `ignore_index`: Index value to be ignored in the shapefile, such as the background class ID in segmentation tasks. The default value is `255`.
			
 
				+- `--src_img_path`: Path to the original image with geographic information, which is required to provide the shapefile with geoprojection coordinate system information.
			
 
				+- `--mask_path`: Path to the PNG format segmentation result obtained by the model inference.
			
 
				+- `--save_path`: Path to save the shapefile. The default value is `output`.
			
 
				+- `--ignore_index`: Index value to be ignored in the shapefile, such as the background class ID in segmentation tasks. The default value is `255`.
			
 
				 
			
 
				 ### geojson2mask
			
 
				 
			
@@ -59,9 +60,9 @@ python geojson2mask.py --src_img_path {path to the original image with geographi
 
				 
			
 
				 Among them:
			
 
				 
			
 
				-- `src_img_path`: Path to the original image file that contains the geospatial information.
			
 
				-- `geojson_path`: Path to the GeoJSON format label file.
			
 
				-- `save_path`: Path to save the converted raster file.
			
 
				+- `--src_img_path`: Path to the original image file that contains the geospatial information.
			
 
				+- `--geojson_path`: Path to the GeoJSON format label file.
			
 
				+- `--save_path`: Path to save the converted raster file.
			
 
				 
			
 
				 ### match
			
 
				 
			
@@ -73,11 +74,11 @@ python match.py --image1_path {path to temporal image 1} --image2_path {path to
 
				 
			
 
				 Among them:
			
 
				 
			
 
				-- `image1_path`: File path of the first temporal image. This image must contain geospatial information and will be used as the reference image during the registration process.
			
 
				-- `image2_path`: File path of the second temporal image. The geospatial information of this image will not be used. This image will be registered to the first temporal image.
			
 
				-- `image1_bands`: Bands of the first temporal image used for registration, specified as three channels (representing R, G, and B) or a single channel. Default is `[1, 2, 3]`.
			
 
				-- `image2_bands`: Bands of the second temporal image used for registration, specified as three channels (representing R, G, and B) or a single channel. Default is `[1, 2, 3]`.
			
 
				-- `save_path`: Output file path of the registered image.
			
 
				+- `--image1_path`: File path of the first temporal image. This image must contain geospatial information and will be used as the reference image during the registration process.
			
 
				+- `--image2_path`: File path of the second temporal image. The geospatial information of this image will not be used. This image will be registered to the first temporal image.
			
 
				+- `--image1_bands`: Bands of the first temporal image used for registration, specified as three channels (representing R, G, and B) or a single channel. Default is `[1, 2, 3]`.
			
 
				+- `--image2_bands`: Bands of the second temporal image used for registration, specified as three channels (representing R, G, and B) or a single channel. Default is `[1, 2, 3]`.
			
 
				+- `--save_path`: Output file path of the registered image.
			
 
				 
			
 
				 ### split
			
 
				 
			
@@ -89,10 +90,10 @@ python split.py --image_path {input image path} [--mask_path {ground-truth label
 
				 
			
 
				 Among them:
			
 
				 
			
 
				-- `image_path`: Path of the image to be split.
			
 
				-- `mask_path`: Path of the ground-truth label image to be split together. Default is `None`.
			
 
				-- `block_size`: Size of the image blocks. Default is `512`.
			
 
				-- `save_dir`: Directory to save the cropped image blocks. Default is `output`.
			
 
				+- `--image_path`: Path of the image to be split.
			
 
				+- `--mask_path`: Path of the ground-truth label image to be split together. Default is `None`.
			
 
				+- `--block_size`: Size of the image blocks. Default is `512`.
			
 
				+- `--save_dir`: Directory to save the cropped image blocks. Default is `output`.
			
 
				 
			
 
				 ### coco_tools
			
 
				 
			
@@ -138,17 +139,36 @@ python extract_ms_patches.py --image_paths {one or more input image paths} --mas
 
				 
			
 
				 Among them:
			
 
				 
			
 
				-- `image_paths`: Path of the source image(s). Multiple paths can be specified.
			
 
				-- `mask_path`: Path to the ground-truth label.
			
 
				-- `save_dir`: Path to the directory to save the split result. Default is `output`.
			
 
				-- `min_patch_size`: Minimum size of the extracted image block (in terms of the number of pixels in the height/width of the image block). This is the minimum area covered by a leaf node in the quadtree. Default is `256`.
			
 
				-- `bg_class`: Category ID of the background class. Default is `0`.
			
 
				-- `target_class`: Category ID of the target class. If it is `None`, it means that all classes except the background class are target classes. Default is `None`.
			
 
				-- `max_level`: Maximum scale level to retrieve. If it is `None`, it means that there is no limit to the scale level. Default is `None`.
			
 
				-- `include_bg`: If specified, also save the image blocks that only contain the background class and do not contain the target class.
			
 
				+- `--image_paths`: Path of the source image(s). Multiple paths can be specified.
			
 
				+- `--mask_path`: Path to the ground-truth label.
			
 
				+- `--save_dir`: Path to the directory to save the split result. Default is `output`.
			
 
				+- `--min_patch_size`: Minimum size of the extracted image block (in terms of the number of pixels in the height/width of the image block). This is the minimum area covered by a leaf node in the quadtree. Default is `256`.
			
 
				+- `--bg_class`: Category ID of the background class. Default is `0`.
			
 
				+- `--target_class`: Category ID of the target class. If it is `None`, it means that all classes except the background class are target classes. Default is `None`.
			
 
				+- `--max_level`: Maximum scale level to retrieve. If it is `None`, it means that there is no limit to the scale level. Default is `None`.
			
 
				+- `--include_bg`: If specified, also save the image blocks that only contain the background class and do not contain the target class.
			
 
				 - `--nonzero_ratio`: Specify a threshold. For any source image, if the ratio of nonzero pixels in the image block is less than this threshold, the image block will be discarded. If it is `None`, no filtering will be performed. Default is `None`.
			
 
				 - `--visualize`: If specified, the image `./vis_quadtree.png` will be generated, which visualizes the nodes in the quadtree. An example is shown in the following figure:
			
 
				 
			
 
				 <div align="center">
			
 
				 <img src="https://user-images.githubusercontent.com/21275753/189264850-f94b3d7b-c631-47b1-9833-0800de2ccf54.png"  width = "400" />  
			
 
				 </div>
			
 
				+
			
 
				+### generate_file_lists
			
 
				+
			
 
				+The main function of `generate_file_lists.py` is to generate file lists that contain the image and label paths of a dataset. The usage is as follows:
			
 
				+
			
 
				+```shell
			
 
				+python generate_file_lists.py --data_dir {root directory of dataset} --save_dir {output directory} [--subsets {names of subsets}] [--subdirs {names of subdirectories}] [--glob_pattern {glob pattern used to match image files}] [--file_list_pattern {patterm to name the file lists}] [--store_abs_path] [--sep {delimeter to use in file lists}]
			
 
				+```
			
 
				+
			
 
				+Among them:
			
 
				+
			
 
				+- `--data_dir`: Root directory of the dataset.
			
 
				+- `--save_dir`: Directory to save the generated file lists.
			
 
				+- `--subsets`: Names of subsets. Images should be stored in `data_dir/subset/subdir/` or `data_dir/subdir/` (when `--subsets` is not specified), where `subset` is one of the values in `--subsets`. Example: `--subsets train val test`.
			
 
				+- `--subdirs`: Names of subdirectories. Images should be stored in `data_dir/subset/subdir/` or `data_dir/subdir/` (when `--subsets` is not specified), where `subdir` is one of the values in `--subdirs`. Defaults to `('images', 'masks')`.
			
 
				+- `--glob_pattern`: Glob pattern used to match image files. Defaults to `*`, which matches arbitrary file.
			
 
				+- `--file_list_pattern`: Pattern to name the file lists. Defaults to `{subset}.txt`.
			
 
				+- `--store_abs_path`: If specified, store the absolute path rather than the relative path in file lists.
			
 
				+- `--sep`: Delimiter to use when writing lines to file lists. Defaults to ` ` (a space).
			
--- a/tests/tools/run_generate_file_lists.py
+++ b/tests/tools/run_generate_file_lists.py
@@ -0,0 +1,20 @@
 
				+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from testing_utils import run_script
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    run_script(
			
 
				+        f"python generate_file_lists.py --data_dir ../tests/data/levircd_crop --save_dir ../tests/data/levircd_crop --subsets train val test --subdirs A B label --glob_pattern '*' --store_abs_path",
			
 
				+        wd="../tools")
			
--- a/tools/coco2mask.py
+++ b/tools/coco2mask.py
@@ -1,3 +1,5 @@
 
				+#!/usr/bin/env python

			
 
				+

			
 
				 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

			
 
				 #

			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");

			
--- a/tools/extract_ms_patches.py
+++ b/tools/extract_ms_patches.py
@@ -1,3 +1,5 @@
 
				+#!/usr/bin/env python
			
 
				+
			
 
				 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
--- a/tools/generate_file_lists.py
+++ b/tools/generate_file_lists.py
@@ -0,0 +1,129 @@
 
				+#!/usr/bin/env python
			
 
				+
			
 
				+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import argparse
			
 
				+import os.path as osp
			
 
				+
			
 
				+from prepare_dataset.common import get_path_tuples, create_file_list
			
 
				+
			
 
				+
			
 
				+def gen_file_lists(
			
 
				+        data_dir,
			
 
				+        save_dir,
			
 
				+        subsets=None,
			
 
				+        subdirs=('images', 'masks'),
			
 
				+        glob_pattern='*',
			
 
				+        file_list_pattern="{subset}.txt",
			
 
				+        store_abs_path=False,
			
 
				+        sep=' ', ):
			
 
				+    """
			
 
				+    Generate file lists.
			
 
				+
			
 
				+    Args:
			
 
				+        data_dir (str): Root directory of the dataset.
			
 
				+        save_dir (str): Directory to save the generated file lists.
			
 
				+        subsets (tuple|list|None, optional): List or tuple of names of subsets or None. 
			
 
				+            Images should be stored in `data_dir/subset/subdir/` or `data_dir/subdir/` 
			
 
				+            (when `subsets` is set to None), where `subset` is an element of `subsets`. 
			
 
				+            Defaults to None.
			
 
				+        subdirs (tuple|list, optional): List or tuple of names of subdirectories. Images
			
 
				+            should be stored in `data_dir/subset/subdir/` or `data_dir/subdir/` (when 
			
 
				+            `subsets` is set to None), where `subdir` is an element of `subdirs`. 
			
 
				+            Defaults to ('images', 'masks').
			
 
				+        glob_pattern (str, optional): Glob pattern used to match image files. Defaults 
			
 
				+            to '*', which matches arbitrary file.
			
 
				+        file_list_pattern (str, optional): Pattern to name the file lists. Defaults to 
			
 
				+            '{subset}.txt'.
			
 
				+        store_abs_path (bool, optional):  Whether to store the absolute path in file 
			
 
				+            lists. Defaults to 'False', which indicates storing the relative path.
			
 
				+        sep (str, optional): Delimiter to use when writing lines to file lists.
			
 
				+            Defaults to ' '.
			
 
				+    """
			
 
				+    if subsets is None:
			
 
				+        subsets = ('', )
			
 
				+    for subset in subsets:
			
 
				+        path_tuples = get_path_tuples(
			
 
				+            *(osp.join(data_dir, subset, subdir) for subdir in subdirs),
			
 
				+            glob_pattern=glob_pattern,
			
 
				+            data_dir=data_dir)
			
 
				+        if store_abs_path:
			
 
				+            path_tuples_new = []
			
 
				+            for path_tuple in path_tuples:
			
 
				+                path_tuple_new = [
			
 
				+                    osp.abspath(osp.join(data_dir, path_t))
			
 
				+                    for path_t in path_tuple
			
 
				+                ]
			
 
				+                path_tuples_new.append(tuple(path_tuple_new))
			
 
				+            path_tuples = path_tuples_new
			
 
				+
			
 
				+        if len(subset) > 0:
			
 
				+            file_list_name = file_list_pattern.format(subset=subset)
			
 
				+        else:
			
 
				+            file_list_name = 'list.txt'
			
 
				+        file_list = osp.join(save_dir, file_list_name)
			
 
				+        create_file_list(file_list, path_tuples, sep)
			
 
				+        print(f"File list {file_list} created.")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    parser = argparse.ArgumentParser()
			
 
				+    parser.add_argument(
			
 
				+        '--data_dir', type=str, help="Root directory of the dataset.")
			
 
				+    parser.add_argument(
			
 
				+        '--save_dir',
			
 
				+        type=str,
			
 
				+        default='./',
			
 
				+        help="Directory to save the generated file lists.")
			
 
				+    parser.add_argument(
			
 
				+        '--subsets',
			
 
				+        nargs="*",
			
 
				+        default=None,
			
 
				+        help="List or tuple of names of subsets.", )
			
 
				+    parser.add_argument(
			
 
				+        '--subdirs',
			
 
				+        nargs="*",
			
 
				+        default=['A', 'B', 'label'],
			
 
				+        help="List or tuple of names of subdirectories of subsets.", )
			
 
				+    parser.add_argument(
			
 
				+        '--glob_pattern',
			
 
				+        type=str,
			
 
				+        default='*',
			
 
				+        help="Glob pattern used to match image files.", )
			
 
				+    parser.add_argument(
			
 
				+        '--file_list_pattern',
			
 
				+        type=str,
			
 
				+        default='{subset}.txt',
			
 
				+        help="Pattern to name the file lists.", )
			
 
				+    parser.add_argument(
			
 
				+        '--store_abs_path',
			
 
				+        action='store_true',
			
 
				+        help='Whether to store the absolute path in file lists.', )
			
 
				+    parser.add_argument(
			
 
				+        '--sep',
			
 
				+        type=str,
			
 
				+        default=' ',
			
 
				+        help="Delimiter to use when writing lines to file lists.", )
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    gen_file_lists(
			
 
				+        data_dir=args.data_dir,
			
 
				+        save_dir=args.save_dir,
			
 
				+        subsets=args.subsets,
			
 
				+        subdirs=args.subdirs,
			
 
				+        glob_pattern=args.glob_pattern,
			
 
				+        file_list_pattern=args.file_list_pattern,
			
 
				+        store_abs_path=args.store_abs_path,
			
 
				+        sep=args.sep, )
			
--- a/tools/geojson2mask.py
+++ b/tools/geojson2mask.py
@@ -1,3 +1,5 @@
 
				+#!/usr/bin/env python
			
 
				+
			
 
				 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
--- a/tools/mask2shape.py
+++ b/tools/mask2shape.py
@@ -1,3 +1,5 @@
 
				+#!/usr/bin/env python
			
 
				+
			
 
				 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
			
 
				 #
			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");
			
--- a/tools/match.py
+++ b/tools/match.py
@@ -1,3 +1,5 @@
 
				+#!/usr/bin/env python

			
 
				+

			
 
				 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

			
 
				 #

			
 
				 # Licensed under the Apache License, Version 2.0 (the "License");

			
--- a/tools/prepare_dataset/common.py
+++ b/tools/prepare_dataset/common.py
@@ -1,3 +1,17 @@
 
				+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 import argparse
			
 
				 import random
			
 
				 import copy
			
--- a/tools/prepare_dataset/prepare_isaid.py
+++ b/tools/prepare_dataset/prepare_isaid.py
@@ -1,5 +1,19 @@
 
				 #!/usr/bin/env python
			
 
				 
			
 
				+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 import os.path as osp
			
 
				 from glob import glob
			
 
				 
			
--- a/tools/prepare_dataset/prepare_levircd.py
+++ b/tools/prepare_dataset/prepare_levircd.py
@@ -1,5 +1,19 @@
 
				 #!/usr/bin/env python
			
 
				 
			
 
				+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 import os.path as osp
			
 
				 
			
 
				 from common import (get_default_parser, add_crop_options, crop_patches,
			
--- a/tools/prepare_dataset/prepare_svcd.py
+++ b/tools/prepare_dataset/prepare_svcd.py
@@ -1,5 +1,19 @@
 
				 #!/usr/bin/env python
			
 
				 
			
 
				+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 import os.path as osp
			
 
				 
			
 
				 from common import (get_default_parser, get_path_tuples, create_file_list,
			
--- a/tools/prepare_dataset/prepare_ucmerced.py
+++ b/tools/prepare_dataset/prepare_ucmerced.py
@@ -1,5 +1,19 @@
 
				 #!/usr/bin/env python
			
 
				 
			
 
				+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				 import random
			
 
				 import os.path as osp
			
 
				 from glob import iglob