knowledge_service.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. import boto3
  2. from configs import dify_config
  3. class ExternalDatasetTestService:
  4. # this service is only for internal testing
  5. @staticmethod
  6. def knowledge_retrieval(retrieval_setting: dict, query: str, knowledge_id: str):
  7. # get bedrock client
  8. client = boto3.client(
  9. "bedrock-agent-runtime",
  10. aws_secret_access_key=dify_config.AWS_SECRET_ACCESS_KEY,
  11. aws_access_key_id=dify_config.AWS_ACCESS_KEY_ID,
  12. # example: us-east-1
  13. region_name="us-east-1",
  14. )
  15. # fetch external knowledge retrieval
  16. response = client.retrieve(
  17. knowledgeBaseId=knowledge_id,
  18. retrievalConfiguration={
  19. "vectorSearchConfiguration": {
  20. "numberOfResults": retrieval_setting.get("top_k"),
  21. "overrideSearchType": "HYBRID",
  22. }
  23. },
  24. retrievalQuery={"text": query},
  25. )
  26. # parse response
  27. results = []
  28. if response.get("ResponseMetadata") and response.get("ResponseMetadata").get("HTTPStatusCode") == 200:
  29. if response.get("retrievalResults"):
  30. retrieval_results = response.get("retrievalResults")
  31. for retrieval_result in retrieval_results:
  32. # filter out results with score less than threshold
  33. if retrieval_result.get("score") < retrieval_setting.get("score_threshold", 0.0):
  34. continue
  35. result = {
  36. "metadata": retrieval_result.get("metadata"),
  37. "score": retrieval_result.get("score"),
  38. "title": retrieval_result.get("metadata").get("x-amz-bedrock-kb-source-uri"),
  39. "content": retrieval_result.get("content").get("text"),
  40. }
  41. results.append(result)
  42. return {"records": results}