| 123456789101112131415161718192021222324252627282930 |
- from funasr import AutoModel
- from funasr.utils.postprocess_utils import rich_transcription_postprocess
- import torch
- import emoji
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
- asr_model_path = r"/work/models/SenseVoiceSmall/"
- asr_model = AutoModel(model=asr_model_path, trust_remote_code=False, device=device, disable_update=True)
- class Audio2Text():
- def __init__(self, audio_file_path):
- self.audio_path = audio_file_path
-
- def audo_to_text(self):
- res = asr_model.generate(
- input=self.audio_path,
- cache={},
- language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech"
- use_itn=True,
- # batch_size_s=180,
- merge_vad=True, #
- # merge_length_s=15,
- ncpu=4
- )
- text = rich_transcription_postprocess(res[0]["text"])
- text = ''.join(char for char in text if char not in emoji.EMOJI_DATA)
- print(f"audio2text result: {text}")
- return text
|