from funasr import AutoModel import time def vocal_text(input_video_path): model = AutoModel(model="./Voice_translation", model_revision="v2.0.4", vad_model="./Endpoint_detection", vad_model_revision="v2.0.4", punc_model="./Ct_punc", punc_model_revision="v2.0.4", use_cuda=True,use_fast = True, ) res = model.generate(input_video_path, batch_size_s=30, hotword='test') texts = [item['text'] for item in res] result = ' '.join(texts) return result if __name__ == "__main__": start_time = time.time() model = AutoModel(model="./Voice_translation", model_revision="v2.0.4", vad_model="./Endpoint_detection", vad_model_revision="v2.0.4", punc_model="./Ct_punc", punc_model_revision="v2.0.4", ) res = model.generate(input="./data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3", batch_size_s=30, hotword='test') print(res) texts = [item['text'] for item in res] print(texts) result = ' '.join(texts) print(result) # def save(input,savepath): # outputs = open(savepath, 'w', encoding='utf-8') # outputs.write(input+'\n') # outputs.close() # save(input=result,savepath=r"F:\work\voice_translation\datasets\1.txt") end_time = time.time() # 计算时间差 elapsed_time = end_time - start_time print(f"耗时: {elapsed_time} 秒")