Преглед изворни кода

merge code and get analyser data

lloydzhou пре 1 година
родитељ
комит
50e63109a3
2 измењених фајлова са 32 додато и 2 уклоњено
  1. 17 2
      app/components/realtime-chat/realtime-chat.tsx
  2. 15 0
      app/lib/audio.ts

+ 17 - 2
app/components/realtime-chat/realtime-chat.tsx

@@ -71,7 +71,7 @@ export function RealtimeChat({
         const turnDetection: TurnDetection = useVAD
           ? { type: "server_vad" }
           : null;
-        clientRef.current.configure({
+        await clientRef.current.configure({
           instructions: "",
           voice,
           input_audio_transcription: { model: "whisper-1" },
@@ -100,6 +100,7 @@ export function RealtimeChat({
               });
             }
           }
+          await clientRef.current.generateResponse();
         } catch (error) {
           console.error("Set message failed:", error);
         }
@@ -267,11 +268,25 @@ export function RealtimeChat({
       console.error(error);
     });
 
+    // TODO demo to get frequency. will pass audioHandlerRef.current to child component draw.
+    // TODO try using requestAnimationFrame
+    const interval = setInterval(() => {
+      if (audioHandlerRef.current) {
+        const data = audioHandlerRef.current.getByteFrequencyData();
+        console.log("getByteFrequencyData", data);
+      }
+    }, 100);
+
     return () => {
       if (isRecording) {
         toggleRecording();
       }
-      audioHandlerRef.current?.close().catch(console.error);
+      audioHandlerRef.current
+        ?.close()
+        .catch(console.error)
+        .finally(() => {
+          clearInterval(interval);
+        });
       disconnect();
     };
   }, []);

+ 15 - 0
app/lib/audio.ts

@@ -1,5 +1,8 @@
 export class AudioHandler {
   private context: AudioContext;
+  private mergeNode: ChannelMergerNode;
+  private analyserData: Uint8Array;
+  public analyser: AnalyserNode;
   private workletNode: AudioWorkletNode | null = null;
   private stream: MediaStream | null = null;
   private source: MediaStreamAudioSourceNode | null = null;
@@ -13,6 +16,16 @@ export class AudioHandler {
 
   constructor() {
     this.context = new AudioContext({ sampleRate: this.sampleRate });
+    // using ChannelMergerNode to get merged audio data, and then get analyser data.
+    this.mergeNode = new ChannelMergerNode(this.context, { numberOfInputs: 2 });
+    this.analyser = new AnalyserNode(this.context, { fftSize: 256 });
+    this.analyserData = new Uint8Array(this.analyser.frequencyBinCount);
+    this.mergeNode.connect(this.analyser);
+  }
+
+  getByteFrequencyData() {
+    this.analyser.getByteFrequencyData(this.analyserData);
+    return this.analyserData;
   }
 
   async initialize() {
@@ -60,6 +73,7 @@ export class AudioHandler {
       };
 
       this.source.connect(this.workletNode);
+      this.source.connect(this.mergeNode, 0, 0);
       this.workletNode.connect(this.context.destination);
 
       this.workletNode.port.postMessage({ command: "START_RECORDING" });
@@ -114,6 +128,7 @@ export class AudioHandler {
     const source = this.context.createBufferSource();
     source.buffer = audioBuffer;
     source.connect(this.context.destination);
+    source.connect(this.mergeNode, 0, 1);
 
     const chunkDuration = audioBuffer.length / this.sampleRate;