Ver Fonte

删除无效代码

gushoubang há 6 meses atrás
pai
commit
237708cb64
9 ficheiros alterados com 1 adições e 526 exclusões
  1. 0 21
      111.py
  2. 0 228
      app.py
  3. 0 38
      app.spec
  4. 1 1
      app/services/chat_service.py
  5. 0 48
      embed.py
  6. 0 18
      get_vector_db.py
  7. 0 62
      query.py
  8. 0 58
      vocal.py
  9. 0 52
      voice_translation_test.py

Diff do ficheiro suprimidas por serem muito extensas
+ 0 - 21
111.py


Diff do ficheiro suprimidas por serem muito extensas
+ 0 - 228
app.py


+ 0 - 38
app.spec

@@ -1,38 +0,0 @@
-# -*- mode: python ; coding: utf-8 -*-
-
-
-a = Analysis(
-    ['app.py'],
-    pathex=[],
-    binaries=[],
-    datas=[],
-    hiddenimports=[],
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=[],
-    excludes=[],
-    noarchive=False,
-    optimize=0,
-)
-pyz = PYZ(a.pure)
-
-exe = EXE(
-    pyz,
-    a.scripts,
-    a.binaries,
-    a.datas,
-    [],
-    name='app',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=True,
-    upx_exclude=[],
-    runtime_tmpdir=None,
-    console=True,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-)

+ 1 - 1
app/services/chat_service.py

@@ -83,7 +83,7 @@ def clear_chat_history():
 def create_chat(msg,type):
     # msg = data['msg']
     msg = replace_word(msg, target_word)
-    words_to_replace1 = ["爆破", "爆坡"]
+    words_to_replace1 = ["爆破", "爆坡","鲍坡"]
     for word in words_to_replace1:
         msg = msg.replace(word, "抱坡")
     print(msg)

+ 0 - 48
embed.py

@@ -1,48 +0,0 @@
-import os
-from datetime import datetime
-from werkzeug.utils import secure_filename
-from langchain_community.document_loaders import UnstructuredPDFLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from get_vector_db import get_vector_db
-
-TEMP_FOLDER = os.getenv('TEMP_FOLDER', './_temp')
-
-# Function to check if the uploaded file is allowed (only PDF files)
-def allowed_file(filename):
-    return '.' in filename and filename.rsplit('.', 1)[1].lower() in {'pdf'}
-
-# Function to save the uploaded file to the temporary folder
-def save_file(file):
-    # Save the uploaded file with a secure filename and return the file path
-    ct = datetime.now()
-    ts = ct.timestamp()
-    filename = str(ts) + "_" + secure_filename(file.filename)
-    file_path = os.path.join(TEMP_FOLDER, filename)
-    file.save(file_path)
-
-    return file_path
-
-# Function to load and split the data from the PDF file
-def load_and_split_data(file_path):
-    # Load the PDF file and split the data into chunks
-    loader = UnstructuredPDFLoader(file_path=file_path)
-    data = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
-    chunks = text_splitter.split_documents(data)
-
-    return chunks
-
-# Main function to handle the embedding process
-def embed(file):
-    # Check if the file is valid, save it, load and split the data, add to the database, and remove the temporary file
-    if file.filename != '' and file and allowed_file(file.filename):
-        file_path = save_file(file)
-        chunks = load_and_split_data(file_path)
-        db = get_vector_db()
-        db.add_documents(chunks)
-        db.persist()
-        os.remove(file_path)
-
-        return True
-
-    return False

+ 0 - 18
get_vector_db.py

@@ -1,18 +0,0 @@
-import os
-from langchain_community.embeddings import OllamaEmbeddings
-from langchain_community.vectorstores.chroma import Chroma
-
-CHROMA_PATH = os.getenv('CHROMA_PATH', 'chroma')
-COLLECTION_NAME = os.getenv('COLLECTION_NAME', 'siwei_ai')
-TEXT_EMBEDDING_MODEL = os.getenv('TEXT_EMBEDDING_MODEL', 'nomic-embed-text')
-
-def get_vector_db():
-    embedding = OllamaEmbeddings(model=TEXT_EMBEDDING_MODEL,show_progress=True,num_gpu=0,num_thread=4)
-
-    db = Chroma(
-        collection_name=COLLECTION_NAME,
-        persist_directory=CHROMA_PATH,
-        embedding_function=embedding
-    )
-
-    return db

+ 0 - 62
query.py

@@ -1,62 +0,0 @@
-import os
-from langchain_community.chat_models import ChatOllama
-from langchain.prompts import ChatPromptTemplate, PromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnablePassthrough
-from langchain.retrievers.multi_query import MultiQueryRetriever
-from get_vector_db import get_vector_db
-
-LLM_MODEL = os.getenv('LLM_MODEL', 'qwen2:7b')
-
-# Function to get the prompt templates for generating alternative questions and answering based on context
-def get_prompt():
-    QUERY_PROMPT = PromptTemplate(
-        input_variables=["question"],
-        template="""你是一名AI语言模型助理。你的任务是生成三个
-        从中检索相关文档的给定用户问题的不同版本
-        矢量数据库。通过对用户问题生成多个视角
-        目标是帮助用户克服基于距离的一些局限性
-        相似性搜索。请提供这些用换行符分隔的备选问题。
-        Original question: {question}""",
-    )
-
-    template = """仅根据以下上下文用中文回答问题:
-    {context},请严格以markdown格式输出并保障寄送格式正确无误,
-    Question: {question}
-    """
-    # Question: {question}
-
-
-    prompt = ChatPromptTemplate.from_template(template)
-    return QUERY_PROMPT, prompt
-
-# Main function to handle the query process
-def query(input):
-    if input:
-        # Initialize the language model with the specified model name
-        llm = ChatOllama(model=LLM_MODEL,keep_alive=-1,num_gpu=0)
-        # Get the vector database instance
-        db = get_vector_db()
-        # Get the prompt templates
-        QUERY_PROMPT, prompt = get_prompt()
-
-        # Set up the retriever to generate multiple queries using the language model and the query prompt
-        retriever = MultiQueryRetriever.from_llm(
-            db.as_retriever(), 
-            llm,
-            prompt=QUERY_PROMPT
-        )
-
-        # Define the processing chain to retrieve context, generate the answer, and parse the output
-        chain = (
-            {"context": retriever, "question": RunnablePassthrough()}
-            | prompt
-            | llm
-            | StrOutputParser()
-        )
-
-        response = chain.invoke(input)
-        
-        return response
-
-    return None

+ 0 - 58
vocal.py

@@ -1,58 +0,0 @@
-# from modelscope.pipelines import pipeline
-# from modelscope.utils.constant import Tasks
-# import time
-# import torch
-
-# # print(torch.__version__) # 查看torch当前版本号
-
-# # print(torch.version.cuda) # 编译当前版本的torch使用的cuda版本号
-
-# # print(torch.cuda.is_available()) # 查看当前cuda是否可用于当前版本的Torch,如果输出True,则表示可用
-
-
-
-# def voice_text(input_video_path,model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'):
-#     inference_pipeline = pipeline(
-#     task=Tasks.auto_speech_recognition,
-#     # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
-#     model=model,
-#     # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
-#     model_revision="v2.0.4",
-#     device='gpu')
-
-#     res = inference_pipeline(input_video_path)
-#     # print(res)
-#     texts = [item['text'] for item in res]
-
-#     # print(texts)
-#     result = ' '.join(texts)
-#     return result
-
-# if  __name__ == "__main__":
-#     start_time = time.time()
-#     inference_pipeline = pipeline(
-#         task=Tasks.auto_speech_recognition,
-#         # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
-#         model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
-#         # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
-#         model_revision="v2.0.4",
-#         device='gpu')
-
-#     # rec_result = inference_pipeline('https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav')
-
-#     # 替换为本地语音文件路径
-#     local_audio_path = 'data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3'
-#     res = inference_pipeline(local_audio_path)
-#     # print(res)
-#     texts = [item['text'] for item in res]
-
-#     # print(texts)
-#     result = ' '.join(texts)
-#     print(result)
-
-
-#     end_time = time.time()
-#     # 计算时间差
-#     elapsed_time = end_time - start_time
-
-#     print(f"耗时: {elapsed_time} 秒")

+ 0 - 52
voice_translation_test.py

@@ -1,52 +0,0 @@
-# from funasr import AutoModel
-# import time
-
-# def vocal_text(input_video_path):
-#     model = AutoModel(model="./Voice_translation", model_revision="v2.0.4",
-#                     vad_model="./Endpoint_detection", vad_model_revision="v2.0.4",
-#                     punc_model="./Ct_punc", punc_model_revision="v2.0.4",
-#                     use_cuda=True,use_fast = True,
-#                     )
-#     res = model.generate(input_video_path,
-#                 batch_size_s=30,
-#                 hotword='test')
-
-
-#     texts = [item['text'] for item in res]
-
-
-#     result = ' '.join(texts)
-#     return result
-
-
-# if  __name__ == "__main__":
-#     start_time = time.time()
-
-
-#     model = AutoModel(model="./Voice_translation", model_revision="v2.0.4",
-#                     vad_model="./Endpoint_detection", vad_model_revision="v2.0.4",
-#                     punc_model="./Ct_punc", punc_model_revision="v2.0.4",
-#                     )
-#     res = model.generate(input="./data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3",
-#                 batch_size_s=30,
-#                 hotword='test')
-
-#     print(res)
-#     texts = [item['text'] for item in res]
-
-#     print(texts)
-#     result = ' '.join(texts)
-#     print(result)
-
-
-# # def save(input,savepath):
-# #     outputs = open(savepath, 'w', encoding='utf-8')
-# #     outputs.write(input+'\n')
-# #     outputs.close()
-# # save(input=result,savepath=r"F:\work\voice_translation\datasets\1.txt")
-
-#     end_time = time.time()
-#     # 计算时间差
-#     elapsed_time = end_time - start_time
-
-#     print(f"耗时: {elapsed_time} 秒")

Alguns ficheiros não foram mostrados porque muitos ficheiros mudaram neste diff