123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- from langchain.callbacks import CallbackManager
- from llama_index import ServiceContext, PromptHelper, LLMPredictor
- from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
- from core.embedding.openai_embedding import OpenAIEmbedding
- from core.llm.llm_builder import LLMBuilder
- class IndexBuilder:
- @classmethod
- def get_default_service_context(cls, tenant_id: str) -> ServiceContext:
- # set number of output tokens
- num_output = 512
- # only for verbose
- callback_manager = CallbackManager([DifyStdOutCallbackHandler()])
- llm = LLMBuilder.to_llm(
- tenant_id=tenant_id,
- model_name='text-davinci-003',
- temperature=0,
- max_tokens=num_output,
- callback_manager=callback_manager,
- )
- llm_predictor = LLMPredictor(llm=llm)
- # These parameters here will affect the logic of segmenting the final synthesized response.
- # The number of refinement iterations in the synthesis process depends
- # on whether the length of the segmented output exceeds the max_input_size.
- prompt_helper = PromptHelper(
- max_input_size=3500,
- num_output=num_output,
- max_chunk_overlap=20
- )
- provider = LLMBuilder.get_default_provider(tenant_id)
- model_credentials = LLMBuilder.get_model_credentials(
- tenant_id=tenant_id,
- model_provider=provider,
- model_name='text-embedding-ada-002'
- )
- return ServiceContext.from_defaults(
- llm_predictor=llm_predictor,
- prompt_helper=prompt_helper,
- embed_model=OpenAIEmbedding(**model_credentials),
- )
- @classmethod
- def get_fake_llm_service_context(cls, tenant_id: str) -> ServiceContext:
- llm = LLMBuilder.to_llm(
- tenant_id=tenant_id,
- model_name='fake'
- )
- return ServiceContext.from_defaults(
- llm_predictor=LLMPredictor(llm=llm),
- embed_model=OpenAIEmbedding()
- )
|