3 anos atrás · b641b2fd93
--- a/docs/apis/infer.md
+++ b/docs/apis/infer.md
@@ -159,6 +159,7 @@ def slider_predict(self,
 
				                    invalid_value=255,
			
 
				                    merge_strategy='keep_last',
			
 
				                    batch_size=1,
			
 
				+                   eager_load=False,
			
 
				                    quiet=False):
			
 
				 ```
			
 
				 
			
@@ -174,6 +175,7 @@ def slider_predict(self,
 
				 |`invalid_value`|`int`|输出影像中用于标记无效像素的数值。|`255`|
			
 
				 |`merge_strategy`|`str`|合并滑窗重叠区域使用的策略。`'keep_first'`表示保留遍历顺序（从左至右，从上往下，列优先）最靠前的窗口的预测类别；`'keep_last'`表示保留遍历顺序最靠后的窗口的预测类别；`'accum'`表示通过将各窗口在重叠区域给出的预测概率累加，计算最终预测类别。需要注意的是，在对大尺寸影像进行`overlap`较大的密集推理时，使用`'accum'`策略可能导致较长的推理时间，但一般能够在窗口交界部分取得更好的表现。|`'keep_last'`|
			
 
				 |`batch_size`|`int`|预测时使用的mini-batch大小。|`1`|
			
 
				+|`eager_load`|`bool`|若为`True`，则不使用延迟内存载入，而是在预测开始时一次性将整幅影像载入到内存。|`False`|
			
 
				 |`quiet`|`bool`|若为`True`，不显示预测进度。|`False`|
			
 
				 
			
 
				 变化检测任务的滑窗推理API与图像分割任务类似，但需要注意的是输出结果中存储的地理变换、投影等信息以从第一时相影像中读取的信息为准，存储滑窗推理结果的文件名也与第一时相影像文件相同。
			
--- a/paddlers/deploy/predictor.py
+++ b/paddlers/deploy/predictor.py
@@ -332,6 +332,7 @@ class Predictor(object):
 
				                        invalid_value=255,
			
 
				                        merge_strategy='keep_last',
			
 
				                        batch_size=1,
			
 
				+                       eager_load=False,
			
 
				                        quiet=False):
			
 
				         """
			
 
				         Do inference using sliding windows. Only semantic segmentation and change detection models are supported in the 
			
@@ -356,6 +357,7 @@ class Predictor(object):
 
				                 the last block in traversal order, respectively. 'accum' means determining the class of an overlapping pixel 
			
 
				                 according to accumulated probabilities. Defaults to 'keep_last'.
			
 
				             batch_size (int, optional): Batch size used in inference. Defaults to 1.
			
 
				+            eager_load (bool, optional): Whether to load the whole image(s) eagerly. Defaults to False.
			
 
				             quiet (bool, optional): If True, disable the progress bar. Defaults to False.
			
 
				         """
			
 
				 
			
@@ -375,6 +377,7 @@ class Predictor(object):
 
				             invalid_value,
			
 
				             merge_strategy,
			
 
				             batch_size,
			
 
				+            eager_load,
			
 
				             not quiet)
			
 
				 
			
 
				     def batch_predict(self, image_list, **params):
			
--- a/paddlers/tasks/change_detector.py
+++ b/paddlers/tasks/change_detector.py
@@ -591,6 +591,7 @@ class BaseChangeDetector(BaseModel):
 
				                        invalid_value=255,
			
 
				                        merge_strategy='keep_last',
			
 
				                        batch_size=1,
			
 
				+                       eager_load=False,
			
 
				                        quiet=False):
			
 
				         """
			
 
				         Do inference using sliding windows.
			
@@ -615,12 +616,14 @@ class BaseChangeDetector(BaseModel):
 
				                 order, respectively. 'accum' means determining the class of an overlapping 
			
 
				                 pixel according to accumulated probabilities. Defaults to 'keep_last'.
			
 
				             batch_size (int, optional): Batch size used in inference. Defaults to 1.
			
 
				+            eager_load (bool, optional): Whether to load the whole image(s) eagerly.
			
 
				+                Defaults to False.
			
 
				             quiet (bool, optional): If True, disable the progress bar. Defaults to False.
			
 
				         """
			
 
				 
			
 
				         slider_predict(self.predict, img_files, save_dir, block_size, overlap,
			
 
				                        transforms, invalid_value, merge_strategy, batch_size,
			
 
				-                       not quiet)
			
 
				+                       eager_load, not quiet)
			
 
				 
			
 
				     def preprocess(self, images, transforms, to_tensor=True):
			
 
				         self._check_transforms(transforms, 'test')
			
--- a/paddlers/tasks/segmenter.py
+++ b/paddlers/tasks/segmenter.py
@@ -557,6 +557,7 @@ class BaseSegmenter(BaseModel):
 
				                        invalid_value=255,
			
 
				                        merge_strategy='keep_last',
			
 
				                        batch_size=1,
			
 
				+                       eager_load=False,
			
 
				                        quiet=False):
			
 
				         """
			
 
				         Do inference using sliding windows.
			
@@ -581,12 +582,14 @@ class BaseSegmenter(BaseModel):
 
				                 order, respectively. 'accum' means determining the class of an overlapping 
			
 
				                 pixel according to accumulated probabilities. Defaults to 'keep_last'.
			
 
				             batch_size (int, optional): Batch size used in inference. Defaults to 1.
			
 
				+            eager_load (bool, optional): Whether to load the whole image(s) eagerly.
			
 
				+                Defaults to False.
			
 
				             quiet (bool, optional): If True, disable the progress bar. Defaults to False.
			
 
				         """
			
 
				 
			
 
				         slider_predict(self.predict, img_file, save_dir, block_size, overlap,
			
 
				                        transforms, invalid_value, merge_strategy, batch_size,
			
 
				-                       not quiet)
			
 
				+                       eager_load, not quiet)
			
 
				 
			
 
				     def preprocess(self, images, transforms, to_tensor=True):
			
 
				         self._check_transforms(transforms, 'test')
			
@@ -609,6 +612,8 @@ class BaseSegmenter(BaseModel):
 
				 
			
 
				     @staticmethod
			
 
				     def get_transforms_shape_info(batch_ori_shape, transforms):
			
 
				+        # TODO: Store transform meta info when applying transforms
			
 
				+        # and not here
			
 
				         batch_restore_list = list()
			
 
				         for ori_shape in batch_ori_shape:
			
 
				             restore_list = list()
			
--- a/paddlers/tasks/utils/slider_predict.py
+++ b/paddlers/tasks/utils/slider_predict.py
@@ -212,36 +212,63 @@ def assign_border_weights(array, weight=0.5, border_ratio=0.25, inplace=True):
 
				     return array
			
 
				 
			
 
				 
			
 
				-def read_block(ds,
			
 
				-               xoff,
			
 
				-               yoff,
			
 
				-               xsize,
			
 
				-               ysize,
			
 
				-               tar_xsize=None,
			
 
				-               tar_ysize=None,
			
 
				-               pad_val=0):
			
 
				-    if tar_xsize is None:
			
 
				-        tar_xsize = xsize
			
 
				-    if tar_ysize is None:
			
 
				-        tar_ysize = ysize
			
 
				-    # Read data from dataset
			
 
				-    block = ds.ReadAsArray(xoff, yoff, xsize, ysize)
			
 
				-    c, real_ysize, real_xsize = block.shape
			
 
				-    assert real_ysize == ysize and real_xsize == xsize
			
 
				-    # [c, h, w] -> [h, w, c]
			
 
				-    block = block.transpose((1, 2, 0))
			
 
				-    if (real_ysize, real_xsize) != (tar_ysize, tar_xsize):
			
 
				-        if real_ysize >= tar_ysize or real_xsize >= tar_xsize:
			
 
				-            raise ValueError
			
 
				-        padded_block = np.full(
			
 
				-            (tar_ysize, tar_xsize, c), fill_value=pad_val, dtype=block.dtype)
			
 
				-        # Fill
			
 
				-        padded_block[:real_ysize, :real_xsize] = block
			
 
				-        return padded_block
			
 
				-    else:
			
 
				+class BlockReader(metaclass=ABCMeta):
			
 
				+    def __init__(self, ds):
			
 
				+        super().__init__()
			
 
				+        self.ds = ds
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def read_block(self, xoff, yoff, xsize, ysize):
			
 
				+        pass
			
 
				+
			
 
				+    def get_block(self,
			
 
				+                  xoff,
			
 
				+                  yoff,
			
 
				+                  xsize,
			
 
				+                  ysize,
			
 
				+                  tar_xsize=None,
			
 
				+                  tar_ysize=None,
			
 
				+                  pad_val=0):
			
 
				+        if tar_xsize is None:
			
 
				+            tar_xsize = xsize
			
 
				+        if tar_ysize is None:
			
 
				+            tar_ysize = ysize
			
 
				+        block = self.read_block(xoff, yoff, xsize, ysize)
			
 
				+        c, real_ysize, real_xsize = block.shape
			
 
				+        assert real_ysize == ysize and real_xsize == xsize
			
 
				+        # [c, h, w] -> [h, w, c]
			
 
				+        block = block.transpose((1, 2, 0))
			
 
				+        if (real_ysize, real_xsize) != (tar_ysize, tar_xsize):
			
 
				+            if real_ysize >= tar_ysize or real_xsize >= tar_xsize:
			
 
				+                raise ValueError
			
 
				+            padded_block = np.full(
			
 
				+                (tar_ysize, tar_xsize, c),
			
 
				+                fill_value=pad_val,
			
 
				+                dtype=block.dtype)
			
 
				+            # Fill
			
 
				+            padded_block[:real_ysize, :real_xsize] = block
			
 
				+            return padded_block
			
 
				+        else:
			
 
				+            return block
			
 
				+
			
 
				+
			
 
				+class GDALLazyBlockReader(BlockReader):
			
 
				+    def read_block(self, xoff, yoff, xsize, ysize):
			
 
				+        block = self.ds.ReadAsArray(xoff, yoff, xsize, ysize)
			
 
				         return block
			
 
				 
			
 
				 
			
 
				+class EagerBlockReader(BlockReader):
			
 
				+    def __init__(self, ds):
			
 
				+        super().__init__(ds)
			
 
				+        # Read the whole image eagerly
			
 
				+        self._whole_image = self.ds.ReadAsArray()
			
 
				+
			
 
				+    def read_block(self, xoff, yoff, xsize, ysize):
			
 
				+        # First dim is channel
			
 
				+        return self._whole_image[:, yoff:yoff + ysize, xoff:xoff + xsize]
			
 
				+
			
 
				+
			
 
				 def slider_predict(predict_func,
			
 
				                    img_file,
			
 
				                    save_dir,
			
@@ -251,6 +278,7 @@ def slider_predict(predict_func,
 
				                    invalid_value,
			
 
				                    merge_strategy,
			
 
				                    batch_size,
			
 
				+                   eager_load=False,
			
 
				                    show_progress=False):
			
 
				     """
			
 
				     Do inference using sliding windows.
			
@@ -275,10 +303,19 @@ def slider_predict(predict_func,
 
				             traversal order, respectively. 'accum' means determining the class 
			
 
				             of an overlapping pixel according to accumulated probabilities.
			
 
				         batch_size (int): Batch size used in inference.
			
 
				+        eager_load (bool, optional): Whether to load the whole image(s) eagerly.
			
 
				+            Defaults to False.
			
 
				         show_progress (bool, optional): Whether to show prediction progress with a 
			
 
				             progress bar. Defaults to True.
			
 
				     """
			
 
				 
			
 
				+    def _construct_reader(eager_load, *args, **kwargs):
			
 
				+        if eager_load:
			
 
				+            reader = EagerBlockReader(*args, **kwargs)
			
 
				+        else:
			
 
				+            reader = GDALLazyBlockReader(*args, **kwargs)
			
 
				+        return reader
			
 
				+
			
 
				     try:
			
 
				         from osgeo import gdal
			
 
				     except:
			
@@ -311,11 +348,14 @@ def slider_predict(predict_func,
 
				             raise ValueError("Tuple `img_file` must have the length of two.")
			
 
				         # Assume that two input images have the same size
			
 
				         src_data = gdal.Open(img_file[0])
			
 
				+        reader = _construct_reader(eager_load=eager_load, ds=src_data)
			
 
				         src2_data = gdal.Open(img_file[1])
			
 
				+        reader2 = _construct_reader(eager_load=eager_load, ds=src2_data)
			
 
				         # Output name is the same as the name of the first image
			
 
				         file_name = osp.basename(osp.normpath(img_file[0]))
			
 
				     else:
			
 
				         src_data = gdal.Open(img_file)
			
 
				+        reader = _construct_reader(eager_load=eager_load, ds=src_data)
			
 
				         file_name = osp.basename(osp.normpath(img_file))
			
 
				 
			
 
				     # Get size of original raster
			
@@ -395,10 +435,10 @@ def slider_predict(predict_func,
 
				                 is_end_of_col = False
			
 
				 
			
 
				             # Read
			
 
				-            im = read_block(src_data, xoff, yoff, xsize, ysize)
			
 
				+            im = reader.get_block(xoff, yoff, xsize, ysize)
			
 
				 
			
 
				             if isinstance(img_file, tuple):
			
 
				-                im2 = read_block(src2_data, xoff, yoff, xsize, ysize)
			
 
				+                im2 = reader2.get_block(xoff, yoff, xsize, ysize)
			
 
				                 batch_data.append((im, im2))
			
 
				             else:
			
 
				                 batch_data.append(im)
			
@@ -423,7 +463,6 @@ def slider_predict(predict_func,
 
				                     # Write to file
			
 
				                     band.WriteArray(pred, xoff_, yoff_)
			
 
				 
			
 
				-                dst_data.FlushCache()
			
 
				                 batch_data.clear()
			
 
				                 batch_offsets.clear()
			
 
				 
			
@@ -433,6 +472,9 @@ def slider_predict(predict_func,
 
				                 pb.update(1)
			
 
				                 pb.set_description("{} out of {} blocks processed.".format(
			
 
				                     cnt, num_blocks))
			
 
				+        # Flush cache when finishing each row
			
 
				+        dst_data.FlushCache()
			
 
				 
			
 
				+    dst_data.FlushCache()
			
 
				     dst_data = None
			
 
				     logging.info("GeoTiff file saved in {}.".format(save_file))
			
--- a/tests/tasks/test_slider_predict.py
+++ b/tests/tasks/test_slider_predict.py
@@ -72,6 +72,33 @@ class _TestSliderPredictNamespace:
 
				                     self.model.slider_predict(self.image_path, save_dir, 512, 0,
			
 
				                                               self.transforms)
			
 
				 
			
 
				+        def test_eager_load(self):
			
 
				+            with tempfile.TemporaryDirectory() as td:
			
 
				+                # Lazy
			
 
				+                save_dir = osp.join(td, 'lazy')
			
 
				+                self.model.slider_predict(self.image_path, save_dir, 128, 64,
			
 
				+                                          self.transforms)
			
 
				+                pred_lazy = T.decode_image(
			
 
				+                    osp.join(save_dir, self.basename),
			
 
				+                    read_raw=True,
			
 
				+                    decode_sar=False)
			
 
				+
			
 
				+                # Eager
			
 
				+                save_dir = osp.join(td, 'eager')
			
 
				+                self.model.slider_predict(
			
 
				+                    self.image_path,
			
 
				+                    save_dir,
			
 
				+                    128,
			
 
				+                    64,
			
 
				+                    self.transforms,
			
 
				+                    eager_load=True)
			
 
				+                pred_eager = T.decode_image(
			
 
				+                    osp.join(save_dir, self.basename),
			
 
				+                    read_raw=True,
			
 
				+                    decode_sar=False)
			
 
				+
			
 
				+                self.check_output_equal(pred_lazy, pred_eager)
			
 
				+
			
 
				         def test_merge_strategy(self):
			
 
				             with tempfile.TemporaryDirectory() as td:
			
 
				                 # Whole-image inference using predict()