| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 | from typing import Optionalfrom langchain.callbacks import CallbackManagerfrom llama_index.langchain_helpers.agents import IndexToolConfigfrom core.callback_handler.dataset_tool_callback_handler import DatasetToolCallbackHandlerfrom core.callback_handler.index_tool_callback_handler import DatasetIndexToolCallbackHandlerfrom core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandlerfrom core.index.keyword_table_index import KeywordTableIndexfrom core.index.vector_index import VectorIndexfrom core.prompt.prompts import QUERY_KEYWORD_EXTRACT_TEMPLATEfrom core.tool.llama_index_tool import EnhanceLlamaIndexToolfrom models.dataset import Datasetclass DatasetToolBuilder:    @classmethod    def build_dataset_tool(cls, dataset: Dataset,                           response_mode: str = "no_synthesizer",                           callback_handler: Optional[DatasetToolCallbackHandler] = None):        if dataset.indexing_technique == "economy":            # use keyword table query            index = KeywordTableIndex(dataset=dataset).query_index            if not index:                return None            query_kwargs = {                "mode": "default",                "response_mode": response_mode,                "query_keyword_extract_template": QUERY_KEYWORD_EXTRACT_TEMPLATE,                "max_keywords_per_query": 5,                # If num_chunks_per_query is too large,                # it will slow down the synthesis process due to multiple iterations of refinement.                "num_chunks_per_query": 2            }        else:            index = VectorIndex(dataset=dataset).query_index            if not index:                return None            query_kwargs = {                "mode": "default",                "response_mode": response_mode,                # If top_k is too large,                # it will slow down the synthesis process due to multiple iterations of refinement.                "similarity_top_k": 2            }        # fulfill description when it is empty        description = dataset.description        if not description:            description = 'useful for when you want to answer queries about the ' + dataset.name        index_tool_config = IndexToolConfig(            index=index,            name=f"dataset-{dataset.id}",            description=description,            index_query_kwargs=query_kwargs,            tool_kwargs={                "callback_manager": CallbackManager([callback_handler, DifyStdOutCallbackHandler()])            },            # tool_kwargs={"return_direct": True},            # return_direct: Whether to return LLM results directly or process the output data with an Output Parser        )        index_callback_handler = DatasetIndexToolCallbackHandler(dataset_id=dataset.id)        return EnhanceLlamaIndexTool.from_tool_config(            tool_config=index_tool_config,            callback_handler=index_callback_handler        )
 |