speech.ts 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. import { ChatGPTApi } from "../client/platforms/openai";
  2. import { getSTTLang } from "../locales";
  3. import { isFirefox } from "../utils";
  4. export type TranscriptionCallback = (transcription: string) => void;
  5. export abstract class SpeechApi {
  6. protected onTranscription: TranscriptionCallback = () => {};
  7. abstract isListening(): boolean;
  8. abstract start(): Promise<void>;
  9. abstract stop(): Promise<void>;
  10. onTranscriptionReceived(callback: TranscriptionCallback) {
  11. this.onTranscription = callback;
  12. }
  13. }
  14. export class OpenAITranscriptionApi extends SpeechApi {
  15. private listeningStatus = false;
  16. private mediaRecorder: MediaRecorder | null = null;
  17. private stream: MediaStream | null = null;
  18. private audioChunks: Blob[] = [];
  19. isListening = () => this.listeningStatus;
  20. constructor(transcriptionCallback?: TranscriptionCallback) {
  21. super();
  22. if (transcriptionCallback) {
  23. this.onTranscriptionReceived(transcriptionCallback);
  24. }
  25. }
  26. async start(): Promise<void> {
  27. // @ts-ignore
  28. navigator.getUserMedia =
  29. // @ts-ignore
  30. navigator.getUserMedia ||
  31. // @ts-ignore
  32. navigator.webkitGetUserMedia ||
  33. // @ts-ignore
  34. navigator.mozGetUserMedia ||
  35. // @ts-ignore
  36. navigator.msGetUserMedia;
  37. if (navigator.mediaDevices) {
  38. const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  39. this.mediaRecorder = new MediaRecorder(stream);
  40. this.mediaRecorder.ondataavailable = (e) => {
  41. if (e.data && e.data.size > 0) {
  42. this.audioChunks.push(e.data);
  43. }
  44. };
  45. this.stream = stream;
  46. } else {
  47. console.warn("Media Decives will work only with SSL");
  48. return;
  49. }
  50. this.audioChunks = [];
  51. // this.recorder.addEventListener("dataavailable", (event) => {
  52. // this.audioChunks.push(event.data);
  53. // });
  54. this.mediaRecorder.start(1000);
  55. this.listeningStatus = true;
  56. }
  57. async stop(): Promise<void> {
  58. if (!this.mediaRecorder || !this.listeningStatus) {
  59. return;
  60. }
  61. return new Promise((resolve) => {
  62. this.mediaRecorder!.addEventListener("stop", async () => {
  63. const audioBlob = new Blob(this.audioChunks, { type: "audio/wav" });
  64. const llm = new ChatGPTApi();
  65. const transcription = await llm.transcription({ file: audioBlob });
  66. this.onTranscription(transcription);
  67. this.listeningStatus = false;
  68. resolve();
  69. });
  70. this.mediaRecorder!.stop();
  71. });
  72. }
  73. }
  74. export class WebTranscriptionApi extends SpeechApi {
  75. private listeningStatus = false;
  76. private recognitionInstance: any | null = null;
  77. isListening = () => this.listeningStatus;
  78. constructor(transcriptionCallback?: TranscriptionCallback) {
  79. super();
  80. if (isFirefox()) return;
  81. const SpeechRecognition =
  82. (window as any).SpeechRecognition ||
  83. (window as any).webkitSpeechRecognition;
  84. this.recognitionInstance = new SpeechRecognition();
  85. this.recognitionInstance.continuous = true;
  86. this.recognitionInstance.interimResults = true;
  87. this.recognitionInstance.lang = getSTTLang();
  88. if (transcriptionCallback) {
  89. this.onTranscriptionReceived(transcriptionCallback);
  90. }
  91. this.recognitionInstance.onresult = (event: any) => {
  92. const result = event.results[event.results.length - 1];
  93. if (result.isFinal) {
  94. this.onTranscription(result[0].transcript);
  95. }
  96. };
  97. }
  98. async start(): Promise<void> {
  99. this.listeningStatus = true;
  100. await this.recognitionInstance.start();
  101. }
  102. async stop(): Promise<void> {
  103. this.listeningStatus = false;
  104. await this.recognitionInstance.stop();
  105. }
  106. }