12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- from modelscope.pipelines import pipeline
- from modelscope.utils.constant import Tasks
- import time
- import torch
- # print(torch.__version__) # 查看torch当前版本号
- # print(torch.version.cuda) # 编译当前版本的torch使用的cuda版本号
- # print(torch.cuda.is_available()) # 查看当前cuda是否可用于当前版本的Torch,如果输出True,则表示可用
- def voice_text(input_video_path,model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'):
- inference_pipeline = pipeline(
- task=Tasks.auto_speech_recognition,
- # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
- model=model,
- # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
- model_revision="v2.0.4",
- device='gpu')
-
- res = inference_pipeline(input_video_path)
- # print(res)
- texts = [item['text'] for item in res]
- # print(texts)
- result = ' '.join(texts)
- return result
- if __name__ == "__main__":
- start_time = time.time()
- inference_pipeline = pipeline(
- task=Tasks.auto_speech_recognition,
- # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
- model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
- # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
- model_revision="v2.0.4",
- device='gpu')
- # rec_result = inference_pipeline('https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav')
- # 替换为本地语音文件路径
- local_audio_path = 'data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3'
- res = inference_pipeline(local_audio_path)
- # print(res)
- texts = [item['text'] for item in res]
- # print(texts)
- result = ' '.join(texts)
- print(result)
- end_time = time.time()
- # 计算时间差
- elapsed_time = end_time - start_time
- print(f"耗时: {elapsed_time} 秒")
|