Browse Source

Added description for .ppt, specify the reason for unstructured.io (#9452)

Co-authored-by: crazywoola <427733928@qq.com>
Zixuan Cheng 6 months ago
parent
commit
88dec6ef2b
1 changed files with 2 additions and 0 deletions
  1. 2 0
      api/core/rag/extractor/extract_processor.py

+ 2 - 0
api/core/rag/extractor/extract_processor.py

@@ -121,6 +121,8 @@ class ExtractProcessor:
                         extractor = UnstructuredEmailExtractor(file_path, unstructured_api_url, unstructured_api_key)
                         extractor = UnstructuredEmailExtractor(file_path, unstructured_api_url, unstructured_api_key)
                     elif file_extension == ".ppt":
                     elif file_extension == ".ppt":
                         extractor = UnstructuredPPTExtractor(file_path, unstructured_api_url, unstructured_api_key)
                         extractor = UnstructuredPPTExtractor(file_path, unstructured_api_url, unstructured_api_key)
+                        # You must first specify the API key
+                        # because unstructured_api_key is necessary to parse .ppt documents
                     elif file_extension == ".pptx":
                     elif file_extension == ".pptx":
                         extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url, unstructured_api_key)
                         extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url, unstructured_api_key)
                     elif file_extension == ".xml":
                     elif file_extension == ".xml":