| 
					
				 | 
			
			
				@@ -42,31 +42,42 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             ).all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if dataset_documents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                documents = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                for dataset_document in dataset_documents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    # delete from vector index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    segments = db.session.query(DocumentSegment).filter( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        DocumentSegment.document_id == dataset_document.id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        DocumentSegment.enabled == True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    ) .order_by(DocumentSegment.position.asc()).all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    for segment in segments: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        document = Document( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            page_content=segment.content, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            metadata={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                "doc_id": segment.index_node_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                "doc_hash": segment.index_node_hash, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                "document_id": segment.document_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                "dataset_id": segment.dataset_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                dataset_documents_ids = [doc.id for doc in dataset_documents] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                db.session.query(DatasetDocument).filter(DatasetDocument.id.in_(dataset_documents_ids)) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    .update({"indexing_status": "indexing"}, synchronize_session=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                db.session.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        documents.append(document) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                for dataset_document in dataset_documents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        # add from vector index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        segments = db.session.query(DocumentSegment).filter( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            DocumentSegment.document_id == dataset_document.id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            DocumentSegment.enabled == True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        ) .order_by(DocumentSegment.position.asc()).all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        if segments: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            documents = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            for segment in segments: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                document = Document( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    page_content=segment.content, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    metadata={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                        "doc_id": segment.index_node_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                        "doc_hash": segment.index_node_hash, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                        "document_id": segment.document_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                        "dataset_id": segment.dataset_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # save vector index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                index_processor.load(dataset, documents, with_keywords=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                documents.append(document) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            # save vector index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            index_processor.load(dataset, documents, with_keywords=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document.id) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            .update({"indexing_status": "completed"}, synchronize_session=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        db.session.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document.id) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            .update({"indexing_status": "error", "error": str(e)}, synchronize_session=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        db.session.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         elif action == 'update': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # clean index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            index_processor.clean(dataset, None, with_keywords=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             dataset_documents = db.session.query(DatasetDocument).filter( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 DatasetDocument.dataset_id == dataset_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 DatasetDocument.indexing_status == 'completed', 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -75,28 +86,46 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             ).all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # add new index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if dataset_documents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                documents = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # update document status 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                dataset_documents_ids = [doc.id for doc in dataset_documents] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                db.session.query(DatasetDocument).filter(DatasetDocument.id.in_(dataset_documents_ids)) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    .update({"indexing_status": "indexing"}, synchronize_session=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                db.session.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # clean index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                index_processor.clean(dataset, None, with_keywords=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 for dataset_document in dataset_documents: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    # delete from vector index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    segments = db.session.query(DocumentSegment).filter( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        DocumentSegment.document_id == dataset_document.id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        DocumentSegment.enabled == True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    ).order_by(DocumentSegment.position.asc()).all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    for segment in segments: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        document = Document( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            page_content=segment.content, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            metadata={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                "doc_id": segment.index_node_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                "doc_hash": segment.index_node_hash, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                "document_id": segment.document_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                "dataset_id": segment.dataset_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    # update from vector index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        segments = db.session.query(DocumentSegment).filter( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            DocumentSegment.document_id == dataset_document.id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            DocumentSegment.enabled == True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        ).order_by(DocumentSegment.position.asc()).all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        if segments: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            documents = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            for segment in segments: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                document = Document( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    page_content=segment.content, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    metadata={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                        "doc_id": segment.index_node_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                        "doc_hash": segment.index_node_hash, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                        "document_id": segment.document_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                        "dataset_id": segment.dataset_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        documents.append(document) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                documents.append(document) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            # save vector index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            index_processor.load(dataset, documents, with_keywords=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document.id) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            .update({"indexing_status": "completed"}, synchronize_session=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        db.session.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document.id) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            .update({"indexing_status": "error", "error": str(e)}, synchronize_session=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        db.session.commit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # save vector index 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                index_processor.load(dataset, documents, with_keywords=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         end_at = time.perf_counter() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         logging.info( 
			 |