audio_service.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import io
  2. from werkzeug.datastructures import FileStorage
  3. from core.llm.llm_builder import LLMBuilder
  4. from core.llm.provider.llm_provider_service import LLMProviderService
  5. from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServiceError, UnsupportedAudioTypeServiceError, ProviderNotSupportSpeechToTextServiceError
  6. from core.llm.whisper import Whisper
  7. from models.provider import ProviderName
  8. FILE_SIZE_LIMIT = 1 * 1024 * 1024
  9. ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
  10. class AudioService:
  11. @classmethod
  12. def transcript(cls, tenant_id: str, file: FileStorage):
  13. if file is None:
  14. raise NoAudioUploadedServiceError()
  15. extension = file.mimetype
  16. if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
  17. raise UnsupportedAudioTypeServiceError()
  18. file_content = file.read()
  19. file_size = len(file_content)
  20. if file_size > FILE_SIZE_LIMIT:
  21. message = f"({file_size} > {FILE_SIZE_LIMIT})"
  22. raise AudioTooLargeServiceError(message)
  23. provider_name = LLMBuilder.get_default_provider(tenant_id)
  24. if provider_name != ProviderName.OPENAI.value:
  25. raise ProviderNotSupportSpeechToTextServiceError('haha')
  26. provider_service = LLMProviderService(tenant_id, provider_name)
  27. buffer = io.BytesIO(file_content)
  28. buffer.name = 'temp.wav'
  29. return Whisper(provider_service.provider).transcribe(buffer)