wav2text.py 1006 B

123456789101112131415161718192021222324252627282930
  1. from funasr import AutoModel
  2. from funasr.utils.postprocess_utils import rich_transcription_postprocess
  3. import torch
  4. import emoji
  5. device = "cuda:0" if torch.cuda.is_available() else "cpu"
  6. asr_model_path = r"/work/models/SenseVoiceSmall/"
  7. asr_model = AutoModel(model=asr_model_path, trust_remote_code=False, device=device, disable_update=True)
  8. class Audio2Text():
  9. def __init__(self, audio_file_path):
  10. self.audio_path = audio_file_path
  11. def audo_to_text(self):
  12. res = asr_model.generate(
  13. input=self.audio_path,
  14. cache={},
  15. language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech"
  16. use_itn=True,
  17. # batch_size_s=180,
  18. merge_vad=True, #
  19. # merge_length_s=15,
  20. ncpu=4
  21. )
  22. text = rich_transcription_postprocess(res[0]["text"])
  23. text = ''.join(char for char in text if char not in emoji.EMOJI_DATA)
  24. print(f"audio2text result: {text}")
  25. return text