12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- from modelscope.pipelines import pipeline
- from modelscope.utils.constant import Tasks
- import time
- import torch
- def voice_text(input_video_path,model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'):
- inference_pipeline = pipeline(
- task=Tasks.auto_speech_recognition,
-
- model=model,
-
- model_revision="v2.0.4",
- device='gpu')
-
- res = inference_pipeline(input_video_path)
-
- texts = [item['text'] for item in res]
-
- result = ' '.join(texts)
- return result
- if __name__ == "__main__":
- start_time = time.time()
- inference_pipeline = pipeline(
- task=Tasks.auto_speech_recognition,
-
- model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
-
- model_revision="v2.0.4",
- device='gpu')
-
-
- local_audio_path = 'data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3'
- res = inference_pipeline(local_audio_path)
-
- texts = [item['text'] for item in res]
-
- result = ' '.join(texts)
- print(result)
- end_time = time.time()
-
- elapsed_time = end_time - start_time
- print(f"耗时: {elapsed_time} 秒")
|