vocal.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. from modelscope.pipelines import pipeline
  2. from modelscope.utils.constant import Tasks
  3. import time
  4. import torch
  5. # print(torch.__version__) # 查看torch当前版本号
  6. # print(torch.version.cuda) # 编译当前版本的torch使用的cuda版本号
  7. # print(torch.cuda.is_available()) # 查看当前cuda是否可用于当前版本的Torch,如果输出True,则表示可用
  8. def voice_text(input_video_path,model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'):
  9. inference_pipeline = pipeline(
  10. task=Tasks.auto_speech_recognition,
  11. # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
  12. model=model,
  13. # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
  14. model_revision="v2.0.4",
  15. device='gpu')
  16. res = inference_pipeline(input_video_path)
  17. # print(res)
  18. texts = [item['text'] for item in res]
  19. # print(texts)
  20. result = ' '.join(texts)
  21. return result
  22. if __name__ == "__main__":
  23. start_time = time.time()
  24. inference_pipeline = pipeline(
  25. task=Tasks.auto_speech_recognition,
  26. # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
  27. model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
  28. # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
  29. model_revision="v2.0.4",
  30. device='gpu')
  31. # rec_result = inference_pipeline('https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav')
  32. # 替换为本地语音文件路径
  33. local_audio_path = 'data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3'
  34. res = inference_pipeline(local_audio_path)
  35. # print(res)
  36. texts = [item['text'] for item in res]
  37. # print(texts)
  38. result = ' '.join(texts)
  39. print(result)
  40. end_time = time.time()
  41. # 计算时间差
  42. elapsed_time = end_time - start_time
  43. print(f"耗时: {elapsed_time} 秒")