| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278 | 
							- import json
 
- from copy import deepcopy
 
- from datetime import datetime, timezone
 
- from typing import Any, Optional, Union
 
- import httpx
 
- import validators
 
- from constants import HIDDEN_VALUE
 
- # from tasks.external_document_indexing_task import external_document_indexing_task
 
- from core.helper import ssrf_proxy
 
- from extensions.ext_database import db
 
- from models.dataset import (
 
-     Dataset,
 
-     ExternalKnowledgeApis,
 
-     ExternalKnowledgeBindings,
 
- )
 
- from services.entities.external_knowledge_entities.external_knowledge_entities import (
 
-     Authorization,
 
-     ExternalKnowledgeApiSetting,
 
- )
 
- from services.errors.dataset import DatasetNameDuplicateError
 
- class ExternalDatasetService:
 
-     @staticmethod
 
-     def get_external_knowledge_apis(page, per_page, tenant_id, search=None) -> tuple[list[ExternalKnowledgeApis], int]:
 
-         query = ExternalKnowledgeApis.query.filter(ExternalKnowledgeApis.tenant_id == tenant_id).order_by(
 
-             ExternalKnowledgeApis.created_at.desc()
 
-         )
 
-         if search:
 
-             query = query.filter(ExternalKnowledgeApis.name.ilike(f"%{search}%"))
 
-         external_knowledge_apis = query.paginate(page=page, per_page=per_page, max_per_page=100, error_out=False)
 
-         return external_knowledge_apis.items, external_knowledge_apis.total
 
-     @classmethod
 
-     def validate_api_list(cls, api_settings: dict):
 
-         if not api_settings:
 
-             raise ValueError("api list is empty")
 
-         if "endpoint" not in api_settings and not api_settings["endpoint"]:
 
-             raise ValueError("endpoint is required")
 
-         if "api_key" not in api_settings and not api_settings["api_key"]:
 
-             raise ValueError("api_key is required")
 
-     @staticmethod
 
-     def create_external_knowledge_api(tenant_id: str, user_id: str, args: dict) -> ExternalKnowledgeApis:
 
-         ExternalDatasetService.check_endpoint_and_api_key(args.get("settings"))
 
-         external_knowledge_api = ExternalKnowledgeApis(
 
-             tenant_id=tenant_id,
 
-             created_by=user_id,
 
-             updated_by=user_id,
 
-             name=args.get("name"),
 
-             description=args.get("description", ""),
 
-             settings=json.dumps(args.get("settings"), ensure_ascii=False),
 
-         )
 
-         db.session.add(external_knowledge_api)
 
-         db.session.commit()
 
-         return external_knowledge_api
 
-     @staticmethod
 
-     def check_endpoint_and_api_key(settings: dict):
 
-         if "endpoint" not in settings or not settings["endpoint"]:
 
-             raise ValueError("endpoint is required")
 
-         if "api_key" not in settings or not settings["api_key"]:
 
-             raise ValueError("api_key is required")
 
-         endpoint = f"{settings['endpoint']}/retrieval"
 
-         api_key = settings["api_key"]
 
-         if not validators.url(endpoint, simple_host=True):
 
-             raise ValueError(f"invalid endpoint: {endpoint}")
 
-         try:
 
-             response = httpx.post(endpoint, headers={"Authorization": f"Bearer {api_key}"})
 
-         except Exception as e:
 
-             raise ValueError(f"failed to connect to the endpoint: {endpoint}")
 
-         if response.status_code == 502:
 
-             raise ValueError(f"Bad Gateway: failed to connect to the endpoint: {endpoint}")
 
-         if response.status_code == 404:
 
-             raise ValueError(f"Not Found: failed to connect to the endpoint: {endpoint}")
 
-         if response.status_code == 403:
 
-             raise ValueError(f"Forbidden: Authorization failed with api_key: {api_key}")
 
-     @staticmethod
 
-     def get_external_knowledge_api(external_knowledge_api_id: str) -> ExternalKnowledgeApis:
 
-         return ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id).first()
 
-     @staticmethod
 
-     def update_external_knowledge_api(tenant_id, user_id, external_knowledge_api_id, args) -> ExternalKnowledgeApis:
 
-         external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
 
-             id=external_knowledge_api_id, tenant_id=tenant_id
 
-         ).first()
 
-         if external_knowledge_api is None:
 
-             raise ValueError("api template not found")
 
-         if args.get("settings") and args.get("settings").get("api_key") == HIDDEN_VALUE:
 
-             args.get("settings")["api_key"] = external_knowledge_api.settings_dict.get("api_key")
 
-         external_knowledge_api.name = args.get("name")
 
-         external_knowledge_api.description = args.get("description", "")
 
-         external_knowledge_api.settings = json.dumps(args.get("settings"), ensure_ascii=False)
 
-         external_knowledge_api.updated_by = user_id
 
-         external_knowledge_api.updated_at = datetime.now(timezone.utc).replace(tzinfo=None)
 
-         db.session.commit()
 
-         return external_knowledge_api
 
-     @staticmethod
 
-     def delete_external_knowledge_api(tenant_id: str, external_knowledge_api_id: str):
 
-         external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
 
-             id=external_knowledge_api_id, tenant_id=tenant_id
 
-         ).first()
 
-         if external_knowledge_api is None:
 
-             raise ValueError("api template not found")
 
-         db.session.delete(external_knowledge_api)
 
-         db.session.commit()
 
-     @staticmethod
 
-     def external_knowledge_api_use_check(external_knowledge_api_id: str) -> tuple[bool, int]:
 
-         count = ExternalKnowledgeBindings.query.filter_by(external_knowledge_api_id=external_knowledge_api_id).count()
 
-         if count > 0:
 
-             return True, count
 
-         return False, 0
 
-     @staticmethod
 
-     def get_external_knowledge_binding_with_dataset_id(tenant_id: str, dataset_id: str) -> ExternalKnowledgeBindings:
 
-         external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(
 
-             dataset_id=dataset_id, tenant_id=tenant_id
 
-         ).first()
 
-         if not external_knowledge_binding:
 
-             raise ValueError("external knowledge binding not found")
 
-         return external_knowledge_binding
 
-     @staticmethod
 
-     def document_create_args_validate(tenant_id: str, external_knowledge_api_id: str, process_parameter: dict):
 
-         external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
 
-             id=external_knowledge_api_id, tenant_id=tenant_id
 
-         ).first()
 
-         if external_knowledge_api is None:
 
-             raise ValueError("api template not found")
 
-         settings = json.loads(external_knowledge_api.settings)
 
-         for setting in settings:
 
-             custom_parameters = setting.get("document_process_setting")
 
-             if custom_parameters:
 
-                 for parameter in custom_parameters:
 
-                     if parameter.get("required", False) and not process_parameter.get(parameter.get("name")):
 
-                         raise ValueError(f'{parameter.get("name")} is required')
 
-     @staticmethod
 
-     def process_external_api(
 
-         settings: ExternalKnowledgeApiSetting, files: Union[None, dict[str, Any]]
 
-     ) -> httpx.Response:
 
-         """
 
-         do http request depending on api bundle
 
-         """
 
-         kwargs = {
 
-             "url": settings.url,
 
-             "headers": settings.headers,
 
-             "follow_redirects": True,
 
-         }
 
-         response = getattr(ssrf_proxy, settings.request_method)(data=json.dumps(settings.params), files=files, **kwargs)
 
-         return response
 
-     @staticmethod
 
-     def assembling_headers(authorization: Authorization, headers: Optional[dict] = None) -> dict[str, Any]:
 
-         authorization = deepcopy(authorization)
 
-         if headers:
 
-             headers = deepcopy(headers)
 
-         else:
 
-             headers = {}
 
-         if authorization.type == "api-key":
 
-             if authorization.config is None:
 
-                 raise ValueError("authorization config is required")
 
-             if authorization.config.api_key is None:
 
-                 raise ValueError("api_key is required")
 
-             if not authorization.config.header:
 
-                 authorization.config.header = "Authorization"
 
-             if authorization.config.type == "bearer":
 
-                 headers[authorization.config.header] = f"Bearer {authorization.config.api_key}"
 
-             elif authorization.config.type == "basic":
 
-                 headers[authorization.config.header] = f"Basic {authorization.config.api_key}"
 
-             elif authorization.config.type == "custom":
 
-                 headers[authorization.config.header] = authorization.config.api_key
 
-         return headers
 
-     @staticmethod
 
-     def get_external_knowledge_api_settings(settings: dict) -> ExternalKnowledgeApiSetting:
 
-         return ExternalKnowledgeApiSetting.parse_obj(settings)
 
-     @staticmethod
 
-     def create_external_dataset(tenant_id: str, user_id: str, args: dict) -> Dataset:
 
-         # check if dataset name already exists
 
-         if Dataset.query.filter_by(name=args.get("name"), tenant_id=tenant_id).first():
 
-             raise DatasetNameDuplicateError(f"Dataset with name {args.get('name')} already exists.")
 
-         external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
 
-             id=args.get("external_knowledge_api_id"), tenant_id=tenant_id
 
-         ).first()
 
-         if external_knowledge_api is None:
 
-             raise ValueError("api template not found")
 
-         dataset = Dataset(
 
-             tenant_id=tenant_id,
 
-             name=args.get("name"),
 
-             description=args.get("description", ""),
 
-             provider="external",
 
-             retrieval_model=args.get("external_retrieval_model"),
 
-             created_by=user_id,
 
-         )
 
-         db.session.add(dataset)
 
-         db.session.flush()
 
-         external_knowledge_binding = ExternalKnowledgeBindings(
 
-             tenant_id=tenant_id,
 
-             dataset_id=dataset.id,
 
-             external_knowledge_api_id=args.get("external_knowledge_api_id"),
 
-             external_knowledge_id=args.get("external_knowledge_id"),
 
-             created_by=user_id,
 
-         )
 
-         db.session.add(external_knowledge_binding)
 
-         db.session.commit()
 
-         return dataset
 
-     @staticmethod
 
-     def fetch_external_knowledge_retrieval(
 
-         tenant_id: str, dataset_id: str, query: str, external_retrieval_parameters: dict
 
-     ) -> list:
 
-         external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(
 
-             dataset_id=dataset_id, tenant_id=tenant_id
 
-         ).first()
 
-         if not external_knowledge_binding:
 
-             raise ValueError("external knowledge binding not found")
 
-         external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
 
-             id=external_knowledge_binding.external_knowledge_api_id
 
-         ).first()
 
-         if not external_knowledge_api:
 
-             raise ValueError("external api template not found")
 
-         settings = json.loads(external_knowledge_api.settings)
 
-         headers = {"Content-Type": "application/json"}
 
-         if settings.get("api_key"):
 
-             headers["Authorization"] = f"Bearer {settings.get('api_key')}"
 
-         score_threshold_enabled = external_retrieval_parameters.get("score_threshold_enabled") or False
 
-         score_threshold = external_retrieval_parameters.get("score_threshold", 0.0) if score_threshold_enabled else 0.0
 
-         request_params = {
 
-             "retrieval_setting": {
 
-                 "top_k": external_retrieval_parameters.get("top_k"),
 
-                 "score_threshold": score_threshold,
 
-             },
 
-             "query": query,
 
-             "knowledge_id": external_knowledge_binding.external_knowledge_id,
 
-         }
 
-         external_knowledge_api_setting = {
 
-             "url": f"{settings.get('endpoint')}/retrieval",
 
-             "request_method": "post",
 
-             "headers": headers,
 
-             "params": request_params,
 
-         }
 
-         response = ExternalDatasetService.process_external_api(
 
-             ExternalKnowledgeApiSetting(**external_knowledge_api_setting), None
 
-         )
 
-         if response.status_code == 200:
 
-             return response.json().get("records", [])
 
-         return []
 
 
  |