import os
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from llm_model.get_vector_db import get_vector_db

LLM_MODEL = os.getenv('LLM_MODEL', 'qwen2:7b')

# Function to get the prompt templates for generating alternative questions and answering based on context
def get_prompt():
    QUERY_PROMPT = PromptTemplate(
        input_variables=["question"],
        template="""你是一名AI语言模型助理。你的任务是生成三个
        从中检索相关文档的给定用户问题的不同版本
        矢量数据库。通过对用户问题生成多个视角
        目标是帮助用户克服基于距离的一些局限性
        相似性搜索。请提供这些用换行符分隔的备选问题。
        Original question: {question}""",
    )

    template = """仅根据以下上下文用中文回答问题：
    {context},请严格以markdown格式输出并保障寄送格式正确无误，
    Question: {question}
    """
    # Question: {question}


    prompt = ChatPromptTemplate.from_template(template)
    return QUERY_PROMPT, prompt

# Main function to handle the query process
def query(input):
    if input:
        # Initialize the language model with the specified model name
        llm = ChatOllama(model=LLM_MODEL,keep_alive=-1,num_gpu=0)
        # Get the vector database instance
        db = get_vector_db()
        # Get the prompt templates
        QUERY_PROMPT, prompt = get_prompt()

        # Set up the retriever to generate multiple queries using the language model and the query prompt
        retriever = MultiQueryRetriever.from_llm(
            db.as_retriever(),
            llm,
            prompt=QUERY_PROMPT
        )

        # Define the processing chain to retrieve context, generate the answer, and parse the output
        chain = (
            {"context": retriever, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
        )

        response = chain.invoke(input)

        return response

    return None