Browse Source

generalize the generation of new collection name by dataset id (#2620)

Bowen Liang 1 year ago
parent
commit
801d135390

+ 3 - 3
api/commands.py

@@ -150,7 +150,7 @@ def vdb_migrate():
                         continue
                 if vector_type == "weaviate":
                     dataset_id = dataset.id
-                    collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
                     index_struct_dict = {
                         "type": 'weaviate',
                         "vector_store": {"class_prefix": collection_name}
@@ -167,7 +167,7 @@ def vdb_migrate():
                             raise ValueError('Dataset Collection Bindings is not exist!')
                     else:
                         dataset_id = dataset.id
-                        collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
+                        collection_name = Dataset.gen_collection_name_by_id(dataset_id)
                     index_struct_dict = {
                         "type": 'qdrant',
                         "vector_store": {"class_prefix": collection_name}
@@ -176,7 +176,7 @@ def vdb_migrate():
 
                 elif vector_type == "milvus":
                     dataset_id = dataset.id
-                    collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
                     index_struct_dict = {
                         "type": 'milvus',
                         "vector_store": {"class_prefix": collection_name}

+ 3 - 3
api/core/rag/datasource/vdb/vector_factory.py

@@ -39,7 +39,7 @@ class Vector:
                 collection_name = class_prefix
             else:
                 dataset_id = self._dataset.id
-                collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
+                collection_name = Dataset.gen_collection_name_by_id(dataset_id)
                 index_struct_dict = {
                     "type": 'weaviate',
                     "vector_store": {"class_prefix": collection_name}
@@ -70,7 +70,7 @@ class Vector:
                     collection_name = class_prefix
                 else:
                     dataset_id = self._dataset.id
-                    collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
 
             if not self._dataset.index_struct_dict:
                 index_struct_dict = {
@@ -96,7 +96,7 @@ class Vector:
                 collection_name = class_prefix
             else:
                 dataset_id = self._dataset.id
-                collection_name = "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
+                collection_name = Dataset.gen_collection_name_by_id(dataset_id)
                 index_struct_dict = {
                     "type": 'milvus',
                     "vector_store": {"class_prefix": collection_name}

+ 1 - 1
api/core/rag/datasource/vdb/weaviate/weaviate_vector.py

@@ -70,7 +70,7 @@ class WeaviateVector(BaseVector):
             return class_prefix
 
         dataset_id = dataset.id
-        return "Vector_index_" + dataset_id.replace("-", "_") + '_Node'
+        return Dataset.gen_collection_name_by_id(dataset_id)
 
     def to_index_struct(self) -> dict:
         return {

+ 4 - 0
api/models/dataset.py

@@ -116,6 +116,10 @@ class Dataset(db.Model):
         }
         return self.retrieval_model if self.retrieval_model else default_retrieval_model
 
+    @staticmethod
+    def gen_collection_name_by_id(dataset_id: str) -> str:
+        normalized_dataset_id = dataset_id.replace("-", "_")
+        return f'Vector_index_{normalized_dataset_id}_Node'
 
 class DatasetProcessRule(db.Model):
     __tablename__ = 'dataset_process_rules'

+ 1 - 1
api/services/dataset_service.py

@@ -1244,7 +1244,7 @@ class DatasetCollectionBindingService:
             dataset_collection_binding = DatasetCollectionBinding(
                 provider_name=provider_name,
                 model_name=model_name,
-                collection_name="Vector_index_" + str(uuid.uuid4()).replace("-", "_") + '_Node',
+                collection_name=Dataset.gen_collection_name_by_id(str(uuid.uuid4())),
                 type=collection_type
             )
             db.session.add(dataset_collection_binding)