|  | @@ -59,7 +59,7 @@ class IndexingRunner:
 | 
	
		
			
				|  |  |                      first()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |                  # load file
 | 
	
		
			
				|  |  | -                text_docs = self._load_data(dataset_document)
 | 
	
		
			
				|  |  | +                text_docs = self._load_data(dataset_document, processing_rule.mode == 'automatic')
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |                  # get splitter
 | 
	
		
			
				|  |  |                  splitter = self._get_splitter(processing_rule)
 | 
	
	
		
			
				|  | @@ -113,15 +113,14 @@ class IndexingRunner:
 | 
	
		
			
				|  |  |              for document_segment in document_segments:
 | 
	
		
			
				|  |  |                  db.session.delete(document_segment)
 | 
	
		
			
				|  |  |              db.session.commit()
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            # load file
 | 
	
		
			
				|  |  | -            text_docs = self._load_data(dataset_document)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |              # get the process rule
 | 
	
		
			
				|  |  |              processing_rule = db.session.query(DatasetProcessRule). \
 | 
	
		
			
				|  |  |                  filter(DatasetProcessRule.id == dataset_document.dataset_process_rule_id). \
 | 
	
		
			
				|  |  |                  first()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +            # load file
 | 
	
		
			
				|  |  | +            text_docs = self._load_data(dataset_document, processing_rule.mode == 'automatic')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |              # get splitter
 | 
	
		
			
				|  |  |              splitter = self._get_splitter(processing_rule)
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -238,14 +237,15 @@ class IndexingRunner:
 | 
	
		
			
				|  |  |          preview_texts = []
 | 
	
		
			
				|  |  |          total_segments = 0
 | 
	
		
			
				|  |  |          for file_detail in file_details:
 | 
	
		
			
				|  |  | -            # load data from file
 | 
	
		
			
				|  |  | -            text_docs = FileExtractor.load(file_detail)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |              processing_rule = DatasetProcessRule(
 | 
	
		
			
				|  |  |                  mode=tmp_processing_rule["mode"],
 | 
	
		
			
				|  |  |                  rules=json.dumps(tmp_processing_rule["rules"])
 | 
	
		
			
				|  |  |              )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +            # load data from file
 | 
	
		
			
				|  |  | +            text_docs = FileExtractor.load(file_detail, is_automatic=processing_rule.mode == 'automatic')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |              # get splitter
 | 
	
		
			
				|  |  |              splitter = self._get_splitter(processing_rule)
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -459,7 +459,7 @@ class IndexingRunner:
 | 
	
		
			
				|  |  |                  one_or_none()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |              if file_detail:
 | 
	
		
			
				|  |  | -                text_docs = FileExtractor.load(file_detail, is_automatic=True)
 | 
	
		
			
				|  |  | +                text_docs = FileExtractor.load(file_detail, is_automatic=automatic)
 | 
	
		
			
				|  |  |          elif dataset_document.data_source_type == 'notion_import':
 | 
	
		
			
				|  |  |              loader = NotionLoader.from_document(dataset_document)
 | 
	
		
			
				|  |  |              text_docs = loader.load()
 |