|  | @@ -62,7 +62,8 @@ class IndexingRunner:
 | 
	
		
			
				|  |  |                  text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |                  # transform
 | 
	
		
			
				|  |  | -                documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict())
 | 
	
		
			
				|  |  | +                documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
 | 
	
		
			
				|  |  | +                                            processing_rule.to_dict())
 | 
	
		
			
				|  |  |                  # save segment
 | 
	
		
			
				|  |  |                  self._load_segments(dataset, dataset_document, documents)
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -120,7 +121,8 @@ class IndexingRunner:
 | 
	
		
			
				|  |  |              text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |              # transform
 | 
	
		
			
				|  |  | -            documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict())
 | 
	
		
			
				|  |  | +            documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
 | 
	
		
			
				|  |  | +                                        processing_rule.to_dict())
 | 
	
		
			
				|  |  |              # save segment
 | 
	
		
			
				|  |  |              self._load_segments(dataset, dataset_document, documents)
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -750,7 +752,7 @@ class IndexingRunner:
 | 
	
		
			
				|  |  |          index_processor.load(dataset, documents)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def _transform(self, index_processor: BaseIndexProcessor, dataset: Dataset,
 | 
	
		
			
				|  |  | -                   text_docs: list[Document], process_rule: dict) -> list[Document]:
 | 
	
		
			
				|  |  | +                   text_docs: list[Document], doc_language: str, process_rule: dict) -> list[Document]:
 | 
	
		
			
				|  |  |          # get embedding model instance
 | 
	
		
			
				|  |  |          embedding_model_instance = None
 | 
	
		
			
				|  |  |          if dataset.indexing_technique == 'high_quality':
 | 
	
	
		
			
				|  | @@ -768,7 +770,8 @@ class IndexingRunner:
 | 
	
		
			
				|  |  |                  )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          documents = index_processor.transform(text_docs, embedding_model_instance=embedding_model_instance,
 | 
	
		
			
				|  |  | -                                              process_rule=process_rule)
 | 
	
		
			
				|  |  | +                                              process_rule=process_rule, tenant_id=dataset.tenant_id,
 | 
	
		
			
				|  |  | +                                              doc_language=doc_language)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          return documents
 | 
	
		
			
				|  |  |  
 |