| 
					
				 | 
			
			
				@@ -59,7 +59,7 @@ class IndexingRunner: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     first() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 # load file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                text_docs = self._load_data(dataset_document) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                text_docs = self._load_data(dataset_document, processing_rule.mode == 'automatic') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 # get splitter 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 splitter = self._get_splitter(processing_rule) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -113,15 +113,14 @@ class IndexingRunner: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for document_segment in document_segments: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 db.session.delete(document_segment) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             db.session.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # load file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            text_docs = self._load_data(dataset_document) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # get the process rule 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             processing_rule = db.session.query(DatasetProcessRule). \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 filter(DatasetProcessRule.id == dataset_document.dataset_process_rule_id). \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 first() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # load file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            text_docs = self._load_data(dataset_document, processing_rule.mode == 'automatic') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # get splitter 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             splitter = self._get_splitter(processing_rule) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -238,14 +237,15 @@ class IndexingRunner: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         preview_texts = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         total_segments = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for file_detail in file_details: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # load data from file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            text_docs = FileExtractor.load(file_detail) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             processing_rule = DatasetProcessRule( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 mode=tmp_processing_rule["mode"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 rules=json.dumps(tmp_processing_rule["rules"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # load data from file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            text_docs = FileExtractor.load(file_detail, is_automatic=processing_rule.mode == 'automatic') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # get splitter 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             splitter = self._get_splitter(processing_rule) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -459,7 +459,7 @@ class IndexingRunner: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 one_or_none() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if file_detail: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                text_docs = FileExtractor.load(file_detail, is_automatic=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                text_docs = FileExtractor.load(file_detail, is_automatic=automatic) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         elif dataset_document.data_source_type == 'notion_import': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             loader = NotionLoader.from_document(dataset_document) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             text_docs = loader.load() 
			 |