|  | @@ -10,7 +10,7 @@ from core.rag.models.document import Document
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  class WordExtractor(BaseExtractor):
 |  |  class WordExtractor(BaseExtractor):
 | 
											
												
													
														|  | -    """Load pdf files.
 |  | 
 | 
											
												
													
														|  | 
 |  | +    """Load docx files.
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |      Args:
 |  |      Args:
 | 
											
										
											
												
													
														|  | @@ -46,14 +46,16 @@ class WordExtractor(BaseExtractor):
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |      def extract(self) -> list[Document]:
 |  |      def extract(self) -> list[Document]:
 | 
											
												
													
														|  |          """Load given path as single page."""
 |  |          """Load given path as single page."""
 | 
											
												
													
														|  | -        import docx2txt
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -        return [
 |  | 
 | 
											
												
													
														|  | -            Document(
 |  | 
 | 
											
												
													
														|  | -                page_content=docx2txt.process(self.file_path),
 |  | 
 | 
											
												
													
														|  | -                metadata={"source": self.file_path},
 |  | 
 | 
											
												
													
														|  | -            )
 |  | 
 | 
											
												
													
														|  | -        ]
 |  | 
 | 
											
												
													
														|  | 
 |  | +        from docx import Document as docx_Document
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        document = docx_Document(self.file_path)
 | 
											
												
													
														|  | 
 |  | +        doc_texts = [paragraph.text for paragraph in document.paragraphs]
 | 
											
												
													
														|  | 
 |  | +        content = '\n'.join(doc_texts)
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        return [Document(
 | 
											
												
													
														|  | 
 |  | +            page_content=content,
 | 
											
												
													
														|  | 
 |  | +            metadata={"source": self.file_path},
 | 
											
												
													
														|  | 
 |  | +        )]
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |      @staticmethod
 |  |      @staticmethod
 | 
											
												
													
														|  |      def _is_valid_url(url: str) -> bool:
 |  |      def _is_valid_url(url: str) -> bool:
 |