index_builder.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. from langchain.callbacks import CallbackManager
  2. from llama_index import ServiceContext, PromptHelper, LLMPredictor
  3. from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
  4. from core.embedding.openai_embedding import OpenAIEmbedding
  5. from core.llm.llm_builder import LLMBuilder
  6. class IndexBuilder:
  7. @classmethod
  8. def get_default_service_context(cls, tenant_id: str) -> ServiceContext:
  9. # set number of output tokens
  10. num_output = 512
  11. # only for verbose
  12. callback_manager = CallbackManager([DifyStdOutCallbackHandler()])
  13. llm = LLMBuilder.to_llm(
  14. tenant_id=tenant_id,
  15. model_name='text-davinci-003',
  16. temperature=0,
  17. max_tokens=num_output,
  18. callback_manager=callback_manager,
  19. )
  20. llm_predictor = LLMPredictor(llm=llm)
  21. # These parameters here will affect the logic of segmenting the final synthesized response.
  22. # The number of refinement iterations in the synthesis process depends
  23. # on whether the length of the segmented output exceeds the max_input_size.
  24. prompt_helper = PromptHelper(
  25. max_input_size=3500,
  26. num_output=num_output,
  27. max_chunk_overlap=20
  28. )
  29. provider = LLMBuilder.get_default_provider(tenant_id)
  30. model_credentials = LLMBuilder.get_model_credentials(
  31. tenant_id=tenant_id,
  32. model_provider=provider,
  33. model_name='text-embedding-ada-002'
  34. )
  35. return ServiceContext.from_defaults(
  36. llm_predictor=llm_predictor,
  37. prompt_helper=prompt_helper,
  38. embed_model=OpenAIEmbedding(**model_credentials),
  39. )
  40. @classmethod
  41. def get_fake_llm_service_context(cls, tenant_id: str) -> ServiceContext:
  42. llm = LLMBuilder.to_llm(
  43. tenant_id=tenant_id,
  44. model_name='fake'
  45. )
  46. return ServiceContext.from_defaults(
  47. llm_predictor=LLMPredictor(llm=llm),
  48. embed_model=OpenAIEmbedding()
  49. )