|  | @@ -21,6 +21,7 @@ from core.rag.extractor.unstructured.unstructured_eml_extractor import Unstructu
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor
 | 
	
		
			
				|  |  | +from core.rag.extractor.unstructured.unstructured_pdf_extractor import UnstructuredPDFExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor
 | 
	
		
			
				|  |  |  from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor
 | 
	
	
		
			
				|  | @@ -102,10 +103,10 @@ class ExtractProcessor:
 | 
	
		
			
				|  |  |                      if file_extension in {".xlsx", ".xls"}:
 | 
	
		
			
				|  |  |                          extractor = ExcelExtractor(file_path)
 | 
	
		
			
				|  |  |                      elif file_extension == ".pdf":
 | 
	
		
			
				|  |  | -                        extractor = PdfExtractor(file_path)
 | 
	
		
			
				|  |  | +                        extractor = UnstructuredPDFExtractor(file_path, unstructured_api_url, unstructured_api_key)
 | 
	
		
			
				|  |  |                      elif file_extension in {".md", ".markdown"}:
 | 
	
		
			
				|  |  |                          extractor = (
 | 
	
		
			
				|  |  | -                            UnstructuredMarkdownExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  | +                            UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)
 | 
	
		
			
				|  |  |                              if is_automatic
 | 
	
		
			
				|  |  |                              else MarkdownExtractor(file_path, autodetect_encoding=True)
 | 
	
		
			
				|  |  |                          )
 | 
	
	
		
			
				|  | @@ -116,17 +117,17 @@ class ExtractProcessor:
 | 
	
		
			
				|  |  |                      elif file_extension == ".csv":
 | 
	
		
			
				|  |  |                          extractor = CSVExtractor(file_path, autodetect_encoding=True)
 | 
	
		
			
				|  |  |                      elif file_extension == ".msg":
 | 
	
		
			
				|  |  | -                        extractor = UnstructuredMsgExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  | +                        extractor = UnstructuredMsgExtractor(file_path, unstructured_api_url, unstructured_api_key)
 | 
	
		
			
				|  |  |                      elif file_extension == ".eml":
 | 
	
		
			
				|  |  | -                        extractor = UnstructuredEmailExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  | +                        extractor = UnstructuredEmailExtractor(file_path, unstructured_api_url, unstructured_api_key)
 | 
	
		
			
				|  |  |                      elif file_extension == ".ppt":
 | 
	
		
			
				|  |  |                          extractor = UnstructuredPPTExtractor(file_path, unstructured_api_url, unstructured_api_key)
 | 
	
		
			
				|  |  |                      elif file_extension == ".pptx":
 | 
	
		
			
				|  |  | -                        extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  | +                        extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url, unstructured_api_key)
 | 
	
		
			
				|  |  |                      elif file_extension == ".xml":
 | 
	
		
			
				|  |  | -                        extractor = UnstructuredXmlExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  | +                        extractor = UnstructuredXmlExtractor(file_path, unstructured_api_url, unstructured_api_key)
 | 
	
		
			
				|  |  |                      elif file_extension == ".epub":
 | 
	
		
			
				|  |  | -                        extractor = UnstructuredEpubExtractor(file_path, unstructured_api_url)
 | 
	
		
			
				|  |  | +                        extractor = UnstructuredEpubExtractor(file_path, unstructured_api_url, unstructured_api_key)
 | 
	
		
			
				|  |  |                      else:
 | 
	
		
			
				|  |  |                          # txt
 | 
	
		
			
				|  |  |                          extractor = (
 |