from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks import time import torch # print(torch.__version__) # 查看torch当前版本号 # print(torch.version.cuda) # 编译当前版本的torch使用的cuda版本号 # print(torch.cuda.is_available()) # 查看当前cuda是否可用于当前版本的Torch,如果输出True,则表示可用 def voice_text(input_video_path,model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'): inference_pipeline = pipeline( task=Tasks.auto_speech_recognition, # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch', model=model, # model="model\punc_ct-transformer_cn-en-common-vocab471067-large", model_revision="v2.0.4", device='gpu') res = inference_pipeline(input_video_path) # print(res) texts = [item['text'] for item in res] # print(texts) result = ' '.join(texts) return result if __name__ == "__main__": start_time = time.time() inference_pipeline = pipeline( task=Tasks.auto_speech_recognition, # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch', model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch', # model="model\punc_ct-transformer_cn-en-common-vocab471067-large", model_revision="v2.0.4", device='gpu') # rec_result = inference_pipeline('https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav') # 替换为本地语音文件路径 local_audio_path = 'data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3' res = inference_pipeline(local_audio_path) # print(res) texts = [item['text'] for item in res] # print(texts) result = ' '.join(texts) print(result) end_time = time.time() # 计算时间差 elapsed_time = end_time - start_time print(f"耗时: {elapsed_time} 秒")