소스 검색

fix add segment when dataset and document is empty (#3021)

Co-authored-by: jyong <jyong@dify.ai>
Jyong 1 년 전
부모
커밋
a6cd0f0e73

+ 10 - 0
api/core/rag/datasource/vdb/milvus/milvus_vector.py

@@ -144,6 +144,16 @@ class MilvusVector(BaseVector):
             utility.drop_collection(self._collection_name, None, using=alias)
 
     def text_exists(self, id: str) -> bool:
+        alias = uuid4().hex
+        if self._client_config.secure:
+            uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port)
+        else:
+            uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port)
+        connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)
+
+        from pymilvus import utility
+        if not utility.has_collection(self._collection_name, using=alias):
+            return False
 
         result = self._client.query(collection_name=self._collection_name,
                                     filter=f'metadata["doc_id"] == "{id}"',

+ 7 - 0
api/core/rag/datasource/vdb/qdrant/qdrant_vector.py

@@ -275,6 +275,13 @@ class QdrantVector(BaseVector):
             )
 
     def text_exists(self, id: str) -> bool:
+        all_collection_name = []
+        collections_response = self._client.get_collections()
+        collection_list = collections_response.collections
+        for collection in collection_list:
+            all_collection_name.append(collection.name)
+        if self._collection_name not in all_collection_name:
+            return False
         response = self._client.retrieve(
             collection_name=self._collection_name,
             ids=[id]

+ 2 - 2
api/core/rag/datasource/vdb/vector_factory.py

@@ -128,8 +128,8 @@ class Vector:
         if kwargs.get('duplicate_check', False):
             documents = self._filter_duplicate_texts(documents)
         embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
-        self._vector_processor.add_texts(
-            documents=documents,
+        self._vector_processor.create(
+            texts=documents,
             embeddings=embeddings,
             **kwargs
         )

+ 5 - 0
api/core/rag/datasource/vdb/weaviate/weaviate_vector.py

@@ -134,6 +134,11 @@ class WeaviateVector(BaseVector):
 
     def text_exists(self, id: str) -> bool:
         collection_name = self._collection_name
+        schema = self._default_schema(self._collection_name)
+
+        # check whether the index already exists
+        if not self._client.schema.contains(schema):
+            return False
         result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
             "path": ["doc_id"],
             "operator": "Equal",