|  | @@ -16,6 +16,7 @@ from core.rag.extractor.pdf_extractor import PdfExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.text_extractor import TextExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_doc_extractor import UnstructuredWordExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_eml_extractor import UnstructuredEmailExtractor
 | 
	
		
			
				|  |  | +from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor
 | 
	
	
		
			
				|  | @@ -106,6 +107,8 @@ class ExtractProcessor:
 | 
	
		
			
				|  |  |                          extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  |                      elif file_extension == '.xml':
 | 
	
		
			
				|  |  |                          extractor = UnstructuredXmlExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  | +                    elif file_extension == 'epub':
 | 
	
		
			
				|  |  | +                        extractor = UnstructuredEpubExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  |                      else:
 | 
	
		
			
				|  |  |                          # txt
 | 
	
		
			
				|  |  |                          extractor = UnstructuredTextExtractor(file_path, unstructured_api_url) if is_automatic \
 | 
	
	
		
			
				|  | @@ -123,6 +126,8 @@ class ExtractProcessor:
 | 
	
		
			
				|  |  |                          extractor = WordExtractor(file_path)
 | 
	
		
			
				|  |  |                      elif file_extension == '.csv':
 | 
	
		
			
				|  |  |                          extractor = CSVExtractor(file_path, autodetect_encoding=True)
 | 
	
		
			
				|  |  | +                    elif file_extension == 'epub':
 | 
	
		
			
				|  |  | +                        extractor = UnstructuredEpubExtractor(file_path)
 | 
	
		
			
				|  |  |                      else:
 | 
	
		
			
				|  |  |                          # txt
 | 
	
		
			
				|  |  |                          extractor = TextExtractor(file_path, autodetect_encoding=True)
 |