gushoubang 9 months ago
parent
commit
fdd6ed5b90
3 changed files with 147 additions and 0 deletions
  1. BIN
      data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3
  2. 109 0
      requirements.txt
  3. 38 0
      vocal.py

BIN
data/audio/5bf77846-0193-4f35-92f7-09ce51ee3793.mp3


+ 109 - 0
requirements.txt

@@ -0,0 +1,109 @@
+addict==2.4.0
+aiohttp==3.9.5
+aiosignal==1.3.1
+aliyun-python-sdk-core==2.15.1
+aliyun-python-sdk-kms==2.16.3
+antlr4-python3-runtime==4.9.3
+async-timeout==4.0.3
+attrs==23.2.0
+audioread==3.0.1
+beartype==0.18.5
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+crcmod==1.7
+cryptography==42.0.7
+datasets==2.18.0
+decorator==5.1.1
+dill==0.3.8
+editdistance==0.8.1
+einops==0.8.0
+filelock==3.14.0
+frozenlist==1.4.1
+fsspec==2024.2.0
+funasr==1.0.27
+gast==0.5.4
+huggingface-hub==0.23.2
+hydra-core==1.3.2
+idna==3.7
+importlib_metadata==7.1.0
+jaconv==0.3.4
+jamo==0.4.1
+jieba==0.42.1
+Jinja2==3.1.4
+jmespath==0.10.0
+joblib==1.4.2
+kaldiio==2.18.0
+lazy_loader==0.4
+librosa==0.10.2.post1
+llvmlite==0.42.0
+MarkupSafe==2.1.5
+modelscope==1.14.0
+more-itertools==10.2.0
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+multiprocess==0.70.16
+networkx==3.3
+numba==0.59.1
+numpy==1.26.4
+# nvidia-cublas-cu12==12.1.3.1
+# nvidia-cuda-cupti-cu12==12.1.105
+# nvidia-cuda-nvrtc-cu12==12.1.105
+# nvidia-cuda-runtime-cu12==12.1.105
+# nvidia-cudnn-cu12==8.9.2.26
+# nvidia-cufft-cu12==11.0.2.54
+# nvidia-curand-cu12==10.3.2.106
+# nvidia-cusolver-cu12==11.4.5.107
+# nvidia-cusparse-cu12==12.1.0.106
+# nvidia-nccl-cu12==2.20.5
+# nvidia-nvjitlink-cu12==12.5.40
+# nvidia-nvtx-cu12==12.1.105
+omegaconf==2.3.0
+openai-whisper
+oss2==2.18.5
+packaging==24.0
+pandas==2.2.2
+pillow==10.3.0
+platformdirs==4.2.2
+pooch==1.8.1
+protobuf==4.21.6
+pyarrow==16.1.0
+pyarrow-hotfix==0.6
+pycparser==2.22
+pycryptodome==3.20.0
+pynndescent==0.5.12
+python-dateutil==2.9.0.post0
+pytorch-wpe==0.0.1
+pytz==2024.1
+PyYAML==6.0.1
+regex==2024.5.15
+requests==2.32.2
+rotary-embedding-torch
+scikit-learn==1.5.0
+scipy==1.13.1
+sentencepiece==0.2.0
+simplejson==3.19.2
+six==1.16.0
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soxr==0.3.7
+sympy==1.12
+tensorboardX==2.6.2.2
+threadpoolctl==3.5.0
+tiktoken==0.7.0
+tomli==2.0.1
+# torch==1.13.1
+torch-complex
+# torchaudio==0.13.1
+# torchvision==0.14.1
+tqdm
+# triton
+typing_extensions==4.12.0
+tzdata==2024.1
+umap-learn==0.5.6
+urllib3==2.2.1
+xxhash==3.4.1
+yapf==0.40.2
+yarl==1.9.4
+zipp==3.19.0

+ 38 - 0
vocal.py

@@ -0,0 +1,38 @@
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+import time
+import torch
+
+print(torch.__version__) # 查看torch当前版本号
+
+print(torch.version.cuda) # 编译当前版本的torch使用的cuda版本号
+
+print(torch.cuda.is_available()) # 查看当前cuda是否可用于当前版本的Torch,如果输出True,则表示可用
+start_time = time.time()
+
+inference_pipeline = pipeline(
+    task=Tasks.auto_speech_recognition,
+    # model='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
+    model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch', 
+    # model="model\punc_ct-transformer_cn-en-common-vocab471067-large",
+    model_revision="v2.0.4",
+    device='gpu')
+
+# rec_result = inference_pipeline('https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav')
+
+# 替换为本地语音文件路径
+local_audio_path = 'data\\audio\\5bf77846-0193-4f35-92f7-09ce51ee3793.mp3'
+res = inference_pipeline(local_audio_path)
+# print(res)
+texts = [item['text'] for item in res]
+
+# print(texts)
+result = ' '.join(texts)
+print(result)
+
+
+end_time = time.time()
+# 计算时间差
+elapsed_time = end_time - start_time
+
+print(f"耗时: {elapsed_time} 秒")