embed_service.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import time
  2. import os
  3. from llm_model.embed import embed
  4. from app.common.res import res_success, res_error
  5. from langchain_community.embeddings import OllamaEmbeddings
  6. from langchain_community.vectorstores.chroma import Chroma
  7. # 解析文件到向量
  8. def parse_file_to_embed(file):
  9. start_time = time.time()
  10. embedded = embed(file)
  11. end_time = time.time()
  12. print("Time taken for embedding: ", end_time - start_time)
  13. if embedded:
  14. return res_success(msg="File embedded successfully")
  15. else:
  16. return res_error(msg="File embedded unsuccessfully")
  17. # 删除向量
  18. def delete_embed():
  19. db = get_vector_db()
  20. db.delete_collection()
  21. return res_success(msg="Collection deleted successfully")
  22. CHROMA_PATH = os.getenv('CHROMA_PATH', 'chroma')
  23. COLLECTION_NAME = os.getenv('COLLECTION_NAME', 'siwei_ai')
  24. TEXT_EMBEDDING_MODEL = os.getenv('TEXT_EMBEDDING_MODEL', 'nomic-embed-text')
  25. def get_vector_db():
  26. embedding = OllamaEmbeddings(
  27. model=TEXT_EMBEDDING_MODEL, show_progress=True, num_gpu=0, num_thread=4)
  28. db = Chroma(
  29. collection_name=COLLECTION_NAME,
  30. persist_directory=CHROMA_PATH,
  31. embedding_function=embedding
  32. )
  33. return db