gushoubang 9 bulan lalu
induk
melakukan
eceeb963e9
3 mengubah file dengan 45 tambahan dan 25 penghapusan
  1. TEMPAT SAMPAH
      __pycache__/vocal.cpython-310.pyc
  2. TEMPAT SAMPAH
      data/audio/1364627f-5a9b-42d7-b7f6-b99c094606cd.mp3
  3. 45 25
      vocal.py

TEMPAT SAMPAH
__pycache__/vocal.cpython-310.pyc


TEMPAT SAMPAH
data/audio/1364627f-5a9b-42d7-b7f6-b99c094606cd.mp3


+ 45 - 25
vocal.py

@@ -3,36 +3,56 @@ from modelscope.utils.constant import Tasks
 import time
 import torch
 
-print(torch.__version__) # 查看torch当前版本号
+# print(torch.__version__) # 查看torch当前版本号
 
-print(torch.version.cuda) # 编译当前版本的torch使用的cuda版本号
+# print(torch.version.cuda) # 编译当前版本的torch使用的cuda版本号
 
-print(torch.cuda.is_available()) # 查看当前cuda是否可用于当前版本的Torch,如果输出True,则表示可用
-start_time = time.time()
+# print(torch.cuda.is_available()) # 查看当前cuda是否可用于当前版本的Torch,如果输出True,则表示可用
 
-inference_pipeline = pipeline(
+
+
+def voice_text(input_video_path,model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'):
+    inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
-    model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch', 
+    model=model, 
     # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
     model_revision="v2.0.4",
     device='gpu')
-
-# rec_result = inference_pipeline('https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav')
-
-# 替换为本地语音文件路径
-local_audio_path = 'data\\audio\\5bf77846-0193-4f35-92f7-09ce51ee3793.mp3'
-res = inference_pipeline(local_audio_path)
-# print(res)
-texts = [item['text'] for item in res]
-
-# print(texts)
-result = ' '.join(texts)
-print(result)
-
-
-end_time = time.time()
-# 计算时间差
-elapsed_time = end_time - start_time
-
-print(f"耗时: {elapsed_time} 秒")
+    
+    res = inference_pipeline(input_video_path)
+    # print(res)
+    texts = [item['text'] for item in res]
+
+    # print(texts)
+    result = ' '.join(texts)
+    return result
+
+if  __name__ == "__main__":
+    start_time = time.time()
+    inference_pipeline = pipeline(
+        task=Tasks.auto_speech_recognition,
+        # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
+        model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch', 
+        # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
+        model_revision="v2.0.4",
+        device='gpu')
+
+    # rec_result = inference_pipeline('https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav')
+
+    # 替换为本地语音文件路径
+    local_audio_path = 'data/audio/1364627f-5a9b-42d7-b7f6-b99c094606cd.mp3'
+    res = inference_pipeline(local_audio_path)
+    # print(res)
+    texts = [item['text'] for item in res]
+
+    # print(texts)
+    result = ' '.join(texts)
+    print(result)
+
+
+    end_time = time.time()
+    # 计算时间差
+    elapsed_time = end_time - start_time
+
+    print(f"耗时: {elapsed_time} 秒")