| 
					
				 | 
			
			
				@@ -6,12 +6,14 @@ import docx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import pandas as pd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import pypdfium2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import yaml 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from unstructured.partition.api import partition_via_api 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from unstructured.partition.email import partition_email 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from unstructured.partition.epub import partition_epub 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from unstructured.partition.msg import partition_msg 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from unstructured.partition.ppt import partition_ppt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from unstructured.partition.pptx import partition_pptx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from configs import dify_config 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from core.file import File, FileTransferMethod, file_manager 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from core.helper import ssrf_proxy 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from core.variables import ArrayFileSegment 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -263,7 +265,14 @@ def _extract_text_from_ppt(file_content: bytes) -> str: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def _extract_text_from_pptx(file_content: bytes) -> str: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         with io.BytesIO(file_content) as file: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            elements = partition_pptx(file=file) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if dify_config.UNSTRUCTURED_API_URL and dify_config.UNSTRUCTURED_API_KEY: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                elements = partition_via_api( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    file=file, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    api_url=dify_config.UNSTRUCTURED_API_URL, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    api_key=dify_config.UNSTRUCTURED_API_KEY, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                elements = partition_pptx(file=file) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return "\n".join([getattr(element, "text", "") for element in elements]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         raise TextExtractionError(f"Failed to extract text from PPTX: {str(e)}") from e 
			 |