Browse Source

chore: refine python dependency list and check dependencies in order (#9061)

Bowen Liang 6 months ago
parent
commit
896998ef3f

+ 4 - 1
.github/workflows/api-tests.yml

@@ -39,7 +39,7 @@ jobs:
             api/pyproject.toml
             api/poetry.lock
 
-      - name: Poetry check
+      - name: Check Poetry lockfile
         run: |
           poetry check -C api --lock
           poetry show -C api
@@ -47,6 +47,9 @@ jobs:
       - name: Install dependencies
         run: poetry install -C api --with dev
 
+      - name: Check dependencies in pyproject.toml
+        run: poetry run -C api bash dev/pytest/pytest_artifacts.sh
+
       - name: Run Unit tests
         run: poetry run -C api bash dev/pytest/pytest_unit_tests.sh
 

+ 1 - 1
api/poetry.lock

@@ -10595,4 +10595,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "d29d0c4ce384ef94fe65f0a2a145898bd1a117d4fd59c217d15bbb8993f4ce4e"
+content-hash = "fd183812f910faf4e840267501c571db5d758ad6eb328d106ba6f79a0322a555"

+ 39 - 34
api/pyproject.toml

@@ -104,25 +104,23 @@ name = "dify-api"
 package-mode = false
 
 ############################################################
-# Main dependencies
+# [ Main ] Dependency group
 ############################################################
 
 [tool.poetry.dependencies]
 anthropic = "~0.23.1"
 authlib = "1.3.1"
+azure-ai-inference = "~1.0.0b3"
+azure-ai-ml = "~1.20.0"
 azure-identity = "1.16.1"
 azure-storage-blob = "12.13.0"
 beautifulsoup4 = "4.12.2"
 boto3 = "1.35.17"
-sagemaker = "2.231.0"
 bs4 = "~0.0.1"
 cachetools = "~5.3.0"
 celery = "~5.3.6"
 chardet = "~5.1.0"
 cohere = "~5.2.4"
-cos-python-sdk-v5 = "1.9.30"
-esdk-obs-python = "3.24.6.1"
-bce-python-sdk = "~0.9.23"
 dashscope = { version = "~1.17.0", extras = ["tokenizer"] }
 flask = "~3.0.1"
 flask-compress = "~1.14"
@@ -130,7 +128,7 @@ flask-cors = "~4.0.0"
 flask-login = "~0.6.3"
 flask-migrate = "~4.0.5"
 flask-restful = "~0.3.10"
-Flask-SQLAlchemy = "~3.1.1"
+flask-sqlalchemy = "~3.1.1"
 gevent = "~23.9.1"
 gmpy2 = "~2.2.1"
 google-ai-generativelanguage = "0.6.9"
@@ -139,22 +137,22 @@ google-api-python-client = "2.90.0"
 google-auth = "2.29.0"
 google-auth-httplib2 = "0.2.0"
 google-cloud-aiplatform = "1.49.0"
-google-cloud-storage = "2.16.0"
 google-generativeai = "0.8.1"
 googleapis-common-protos = "1.63.0"
 gunicorn = "~22.0.0"
 httpx = { version = "~0.27.0", extras = ["socks"] }
 huggingface-hub = "~0.16.4"
 jieba = "0.42.1"
-langfuse = "^2.48.0"
-langsmith = "^0.1.77"
+langfuse = "~2.51.3"
+langsmith = "~0.1.77"
 mailchimp-transactional = "~1.0.50"
 markdown = "~3.5.1"
-novita-client = "^0.5.7"
+nomic = "~3.1.2"
+novita-client = "~0.5.7"
 numpy = "~1.26.4"
+oci = "~2.135.1"
 openai = "~1.29.0"
 openpyxl = "~3.1.5"
-oss2 = "2.18.5"
 pandas = { version = "~2.2.2", extras = ["performance", "excel"] }
 psycopg2-binary = "~2.9.6"
 pycryptodome = "3.19.1"
@@ -171,7 +169,8 @@ readabilipy = "0.2.0"
 redis = { version = "~5.0.3", extras = ["hiredis"] }
 replicate = "~0.22.0"
 resend = "~0.7.0"
-scikit-learn = "^1.5.1"
+sagemaker = "2.231.0"
+scikit-learn = "~1.5.1"
 sentry-sdk = { version = "~1.44.1", extras = ["flask"] }
 sqlalchemy = "~2.0.29"
 tencentcloud-sdk-python-hunyuan = "~3.0.1158"
@@ -179,6 +178,8 @@ tiktoken = "~0.7.0"
 tokenizers = "~0.15.0"
 transformers = "~4.35.0"
 unstructured = { version = "~0.10.27", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] }
+validators = "0.21.0"
+volcengine-python-sdk = {extras = ["ark"], version = "~1.0.98"}
 websocket-client = "~1.7.0"
 werkzeug = "~3.0.1"
 xinference-client = "0.15.2"
@@ -187,32 +188,26 @@ zhipuai = "1.0.7"
 # Before adding new dependency, consider place it in alphabet order (a-z) and suitable group.
 
 ############################################################
+# [ Indirect ] dependency group
 # Related transparent dependencies with pinned version
 # required by main implementations
 ############################################################
-azure-ai-ml = "^1.19.0"
-azure-ai-inference = "^1.0.0b3"
-volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"}
-oci = "^2.133.0"
-tos = "^2.7.1"
-nomic = "^3.1.2"
-validators = "0.21.0"
-[tool.poetry.group.indriect.dependencies]
+[tool.poetry.group.indirect.dependencies]
 kaleido = "0.2.1"
 rank-bm25 = "~0.2.2"
 safetensors = "~0.4.3"
 
 ############################################################
-# Tool dependencies required by tool implementations
+# [ Tools ] dependency group
 ############################################################
-
-[tool.poetry.group.tool.dependencies]
+[tool.poetry.group.tools.dependencies]
 arxiv = "2.1.0"
 cloudscraper = "1.2.71"
-matplotlib = "~3.8.2"
-newspaper3k = "0.2.8"
 duckduckgo-search = "~6.3.0"
 jsonpath-ng = "1.6.1"
+matplotlib = "~3.8.2"
+newspaper3k = "0.2.8"
+nltk = "3.8.1"
 numexpr = "~2.9.0"
 opensearch-py = "2.4.0"
 qrcode = "~7.4.2"
@@ -220,11 +215,23 @@ twilio = "~9.0.4"
 vanna = { version = "0.5.5", extras = ["postgres", "mysql", "clickhouse", "duckdb"] }
 wikipedia = "1.4.0"
 yfinance = "~0.2.40"
-nltk = "3.8.1"
+
 ############################################################
-# VDB dependencies required by vector store clients
+# [ Storage ] dependency group
+# Required for storage clients
 ############################################################
+[tool.poetry.group.storage.dependencies]
+bce-python-sdk = "~0.9.23"
+cos-python-sdk-v5 = "1.9.30"
+esdk-obs-python = "3.24.6.1"
+google-cloud-storage = "2.16.0"
+oss2 = "2.18.5"
+tos = "~2.7.1"
 
+############################################################
+# [ VDB ] dependency group
+# Required by vector store clients
+############################################################
 [tool.poetry.group.vdb.dependencies]
 alibabacloud_gpdb20160503 = "~3.8.0"
 alibabacloud_tea_openapi = "~0.3.9"
@@ -235,18 +242,17 @@ oracledb = "~2.2.1"
 pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] }
 pgvector = "0.2.5"
 pymilvus = "~2.4.4"
+qdrant-client = "1.7.3"
 tcvectordb = "1.3.2"
 tidb-vector = "0.0.9"
-qdrant-client = "1.7.3"
 weaviate-client = "~3.21.0"
 
 ############################################################
-# Dev dependencies for running tests
+# [ Dev ] dependency group
+# Required for development and running tests
 ############################################################
-
 [tool.poetry.group.dev]
 optional = true
-
 [tool.poetry.group.dev.dependencies]
 coverage = "~7.2.4"
 pytest = "~8.3.2"
@@ -255,12 +261,11 @@ pytest-env = "~1.1.3"
 pytest-mock = "~3.14.0"
 
 ############################################################
-# Lint dependencies for code style linting
+# [ Lint ] dependency group
+# Required for code style linting
 ############################################################
-
 [tool.poetry.group.lint]
 optional = true
-
 [tool.poetry.group.lint.dependencies]
 dotenv-linter = "~0.5.0"
 ruff = "~0.6.9"

+ 0 - 0
api/tests/artifact_tests/dependencies/__init__.py


+ 61 - 0
api/tests/artifact_tests/dependencies/test_dependencies_sorted.py

@@ -0,0 +1,61 @@
+from typing import Any
+
+import toml
+
+ALL_DEPENDENCY_GROUP_NAMES = [
+    # default main group
+    "",
+    # required groups
+    "indirect",
+    "storage",
+    "tools",
+    "vdb",
+    # optional groups
+    "dev",
+    "lint",
+]
+
+
+def load_api_poetry_configs() -> dict[str, Any]:
+    pyproject_toml = toml.load("api/pyproject.toml")
+    return pyproject_toml.get("tool").get("poetry")
+
+
+def load_dependency_groups() -> dict[str, dict[str, dict[str, Any]]]:
+    poetry_configs = load_api_poetry_configs()
+    group_name_to_dependencies = {
+        group_name: (poetry_configs.get("group").get(group_name) if group_name else poetry_configs).get("dependencies")
+        for group_name in ALL_DEPENDENCY_GROUP_NAMES
+    }
+    return group_name_to_dependencies
+
+
+def test_group_dependencies_sorted():
+    for group_name, dependencies in load_dependency_groups().items():
+        dependency_names = list(dependencies.keys())
+        expected_dependency_names = sorted(set(dependency_names))
+        section = f"tool.poetry.group.{group_name}.dependencies" if group_name else "tool.poetry.dependencies"
+        assert expected_dependency_names == dependency_names, (
+            f"Dependencies in group {group_name} are not sorted. "
+            f"Check and fix [{section}] section in pyproject.toml file"
+        )
+
+
+def test_group_dependencies_version_operator():
+    for group_name, dependencies in load_dependency_groups().items():
+        for dependency_name, specification in dependencies.items():
+            version_spec = specification if isinstance(specification, str) else specification.get("version")
+            assert not version_spec.startswith("^"), (
+                f"'^' is not allowed in dependency version," f" but found in '{dependency_name} = {version_spec}'"
+            )
+
+
+def test_duplicated_dependency_crossing_groups():
+    all_dependency_names: list[str] = []
+    for dependencies in load_dependency_groups().values():
+        dependency_names = list(dependencies.keys())
+        all_dependency_names.extend(dependency_names)
+    expected_all_dependency_names = set(all_dependency_names)
+    assert sorted(expected_all_dependency_names) == sorted(
+        all_dependency_names
+    ), "Duplicated dependencies crossing groups are found"

+ 4 - 0
dev/pytest/pytest_artifacts.sh

@@ -0,0 +1,4 @@
+#!/bin/bash
+set -x
+
+pytest api/tests/artifact_tests/

+ 3 - 0
dev/sync-poetry

@@ -11,5 +11,8 @@ poetry check -C api --lock
 if [ $? -ne 0 ]; then
     # update poetry.lock
     # refreshing lockfile only without updating locked versions
+    echo "poetry.lock is outdated, refreshing without updating locked versions ..."
     poetry lock -C api --no-update
+else
+  echo "poetry.lock is ready."
 fi