Browse Source

test: add unit tests for vector stores of Milvus, Qdrant and Weaviate (#3688)

Bowen Liang 11 months ago
parent
commit
9cec8c1750

+ 26 - 2
.github/workflows/api-tests.yml

@@ -37,6 +37,27 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Set up Weaviate
+        uses: hoverkraft-tech/compose-action@v2.0.0
+        with:
+          compose-file: docker/docker-compose.middleware.yaml
+          services: weaviate
+
+      - name: Set up Qdrant
+        uses: hoverkraft-tech/compose-action@v2.0.0
+        with:
+          compose-file: docker/docker-compose.qdrant.yaml
+          services: qdrant
+
+      - name: Set up Milvus
+        uses: hoverkraft-tech/compose-action@v2.0.0
+        with:
+          compose-file: docker/docker-compose.milvus.yaml
+          services: |
+            etcd
+            minio
+            milvus-standalone
+
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:
@@ -49,6 +70,9 @@ jobs:
       - name: Install dependencies
         run: pip install -r ./api/requirements.txt -r ./api/requirements-dev.txt
 
+      - name: Run Unit tests
+        run: dev/pytest/pytest_unit_tests.sh
+
       - name: Run ModelRuntime
         run: dev/pytest/pytest_model_runtime.sh
 
@@ -58,5 +82,5 @@ jobs:
       - name: Run Workflow
         run: dev/pytest/pytest_workflow.sh
 
-      - name: Run Unit tests
-        run: dev/pytest/pytest_unit_tests.sh
+      - name: Run Vector Stores
+        run: dev/pytest/pytest_vdb.sh

+ 1 - 1
api/core/rag/datasource/vdb/milvus/milvus_vector.py

@@ -250,7 +250,7 @@ class MilvusVector(BaseVector):
 
                 # Create the collection
                 collection_name = self._collection_name
-                self._client.create_collection(collection_name=collection_name,
+                self._client.create_collection_with_schema(collection_name=collection_name,
                                                            schema=schema, index_param=index_params,
                                                            consistency_level=self._consistency_level)
             redis_client.set(collection_exist_cache_key, 1, ex=3600)

+ 1 - 1
api/requirements.txt

@@ -55,7 +55,7 @@ xinference-client==0.9.4
 safetensors~=0.4.3
 zhipuai==1.0.7
 werkzeug~=3.0.1
-pymilvus~=2.3.7
+pymilvus==2.3.1
 qdrant-client==1.7.3
 cohere~=5.2.4
 pyyaml~=6.0.1

+ 0 - 0
api/tests/unit_tests/core/rag/datasource/vdb/milvus/__init__.py → api/tests/integration_tests/vdb/__init__.py


+ 0 - 0
api/tests/integration_tests/vdb/milvus/__init__.py


+ 38 - 0
api/tests/integration_tests/vdb/milvus/test_milvus.py

@@ -0,0 +1,38 @@
+import uuid
+
+from core.rag.datasource.vdb.milvus.milvus_vector import MilvusConfig, MilvusVector
+from models.dataset import Dataset
+from tests.integration_tests.vdb.test_vector_store import (
+    get_sample_document,
+    get_sample_embedding,
+    get_sample_query_vector,
+    setup_mock_redis,
+)
+
+
+def test_milvus_vector(setup_mock_redis) -> None:
+    dataset_id = str(uuid.uuid4())
+    vector = MilvusVector(
+        collection_name=Dataset.gen_collection_name_by_id(dataset_id),
+        config=MilvusConfig(
+            host='localhost',
+            port=19530,
+            user='root',
+            password='Milvus',
+        )
+    )
+
+    # create vector
+    vector.create(
+        texts=[get_sample_document(dataset_id)],
+        embeddings=[get_sample_embedding()],
+    )
+
+    # search by vector
+    hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
+    assert len(hits_by_vector) >= 1
+
+    # milvus dos not support full text searching yet in < 2.3.x
+
+    # delete vector
+    vector.delete()

+ 0 - 0
api/tests/integration_tests/vdb/qdrant/__init__.py


+ 40 - 0
api/tests/integration_tests/vdb/qdrant/test_qdrant.py

@@ -0,0 +1,40 @@
+import uuid
+
+from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector
+from models.dataset import Dataset
+from tests.integration_tests.vdb.test_vector_store import (
+    get_sample_document,
+    get_sample_embedding,
+    get_sample_query_vector,
+    get_sample_text,
+    setup_mock_redis,
+)
+
+
+def test_qdrant_vector(setup_mock_redis)-> None:
+    dataset_id = str(uuid.uuid4())
+    vector = QdrantVector(
+        collection_name=Dataset.gen_collection_name_by_id(dataset_id),
+        group_id=dataset_id,
+        config=QdrantConfig(
+            endpoint='http://localhost:6333',
+            api_key='difyai123456',
+        )
+    )
+
+    # create vector
+    vector.create(
+        texts=[get_sample_document(dataset_id)],
+        embeddings=[get_sample_embedding()],
+    )
+
+    # search by vector
+    hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
+    assert len(hits_by_vector) >= 1
+
+    # search by full text
+    hits_by_full_text = vector.search_by_full_text(query=get_sample_text())
+    assert len(hits_by_full_text) >= 1
+
+    # delete vector
+    vector.delete()

+ 46 - 0
api/tests/integration_tests/vdb/test_vector_store.py

@@ -0,0 +1,46 @@
+from unittest.mock import MagicMock
+
+import pytest
+
+from core.rag.models.document import Document
+from extensions import ext_redis
+
+
+def get_sample_text() -> str:
+    return 'test_text'
+
+
+def get_sample_embedding() -> list[float]:
+    return [1.1, 2.2, 3.3]
+
+
+def get_sample_query_vector() -> list[float]:
+    return get_sample_embedding()
+
+
+def get_sample_document(sample_dataset_id: str) -> Document:
+    doc = Document(
+        page_content=get_sample_text(),
+        metadata={
+            "doc_id": sample_dataset_id,
+            "doc_hash": sample_dataset_id,
+            "document_id": sample_dataset_id,
+            "dataset_id": sample_dataset_id,
+        }
+    )
+    return doc
+
+
+@pytest.fixture
+def setup_mock_redis() -> None:
+    # get
+    ext_redis.redis_client.get = MagicMock(return_value=None)
+
+    # set
+    ext_redis.redis_client.set = MagicMock(return_value=None)
+
+    # lock
+    mock_redis_lock = MagicMock()
+    mock_redis_lock.__enter__ = MagicMock()
+    mock_redis_lock.__exit__ = MagicMock()
+    ext_redis.redis_client.lock = mock_redis_lock

+ 0 - 0
api/tests/integration_tests/vdb/weaviate/__init__.py


+ 41 - 0
api/tests/integration_tests/vdb/weaviate/test_weaviate.py

@@ -0,0 +1,41 @@
+import uuid
+
+from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector
+from models.dataset import Dataset
+from tests.integration_tests.vdb.test_vector_store import (
+    get_sample_document,
+    get_sample_embedding,
+    get_sample_query_vector,
+    get_sample_text,
+    setup_mock_redis,
+)
+
+
+def test_weaviate_vector(setup_mock_redis) -> None:
+    attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash']
+    dataset_id = str(uuid.uuid4())
+    vector = WeaviateVector(
+        collection_name=Dataset.gen_collection_name_by_id(dataset_id),
+        config=WeaviateConfig(
+            endpoint='http://localhost:8080',
+            api_key='WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih',
+        ),
+        attributes=attributes
+    )
+
+    # create vector
+    vector.create(
+        texts=[get_sample_document(dataset_id)],
+        embeddings=[get_sample_embedding()],
+    )
+
+    # search by vector
+    hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
+    assert len(hits_by_vector) >= 1
+
+    # search by full text
+    hits_by_full_text = vector.search_by_full_text(query=get_sample_text())
+    assert len(hits_by_full_text) >= 1
+
+    # delete vector
+    vector.delete()

+ 1 - 1
api/tests/unit_tests/core/rag/datasource/vdb/milvus/test_milvus.py

@@ -18,7 +18,7 @@ def test_default_value():
         with pytest.raises(ValidationError) as e:
             MilvusConfig(**config)
         assert e.value.errors()[1]['msg'] == f'config MILVUS_{key.upper()} is required'
-    
+
     config = MilvusConfig(**valid_config)
     assert config.secure is False
     assert config.database == 'default'

+ 4 - 0
dev/pytest/pytest_vdb.sh

@@ -0,0 +1,4 @@
+#!/bin/bash
+set -x
+
+pytest api/tests/integration_tests/vdb/

+ 1 - 1
docker/milvus-standalone-docker-compose.yml → docker/docker-compose.milvus.yaml

@@ -36,7 +36,7 @@ services:
       timeout: 20s
       retries: 3
 
-  standalone:
+  milvus-standalone:
     container_name: milvus-standalone
     image: milvusdb/milvus:v2.3.1
     command: ["milvus", "run", "standalone"]

+ 12 - 0
docker/docker-compose.qdrant.yaml

@@ -0,0 +1,12 @@
+version: '3'
+services:
+  # Qdrant vector store.
+  qdrant:
+    image: langgenius/qdrant:v1.7.3
+    restart: always
+    volumes:
+      - ./volumes/qdrant:/qdrant/storage
+    environment:
+      QDRANT_API_KEY: 'difyai123456'
+    ports:
+      - "6333:6333"