Przeglądaj źródła

feat: voice print

Dogtiti 1 rok temu
rodzic
commit
8b4ca133fd

+ 0 - 6
app/components/realtime-chat/realtime-chat.module.scss

@@ -31,12 +31,6 @@
     box-sizing: border-box;
     padding: 0 20px;
   }
-  .icon-center {
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    gap: 4px;
-  }
 
   .icon-left,
   .icon-right {

+ 29 - 16
app/components/realtime-chat/realtime-chat.tsx

@@ -20,6 +20,7 @@ import {
 } from "rt-client";
 import { AudioHandler } from "@/app/lib/audio";
 import { uploadImage } from "@/app/utils/chat";
+import { VoicePrint } from "@/app/components/voice-print";
 
 interface RealtimeChatProps {
   onClose?: () => void;
@@ -41,6 +42,7 @@ export function RealtimeChat({
   const [isConnecting, setIsConnecting] = useState(false);
   const [modality, setModality] = useState("audio");
   const [useVAD, setUseVAD] = useState(true);
+  const [frequencies, setFrequencies] = useState<Uint8Array | undefined>();
 
   const clientRef = useRef<RTClient | null>(null);
   const audioHandlerRef = useRef<AudioHandler | null>(null);
@@ -272,29 +274,39 @@ export function RealtimeChat({
       console.error(error);
     });
 
-    // TODO demo to get frequency. will pass audioHandlerRef.current to child component draw.
-    // TODO try using requestAnimationFrame
-    const interval = setInterval(() => {
-      if (audioHandlerRef.current) {
-        const data = audioHandlerRef.current.getByteFrequencyData();
-        console.log("getByteFrequencyData", data);
-      }
-    }, 1000);
-
     return () => {
       if (isRecording) {
         toggleRecording();
       }
-      audioHandlerRef.current
-        ?.close()
-        .catch(console.error)
-        .finally(() => {
-          clearInterval(interval);
-        });
+      audioHandlerRef.current?.close().catch(console.error);
       disconnect();
     };
   }, []);
 
+  useEffect(() => {
+    let animationFrameId: number;
+
+    if (isConnected && isRecording) {
+      const animationFrame = () => {
+        if (audioHandlerRef.current) {
+          const freqData = audioHandlerRef.current.getByteFrequencyData();
+          setFrequencies(freqData);
+        }
+        animationFrameId = requestAnimationFrame(animationFrame);
+      };
+
+      animationFrameId = requestAnimationFrame(animationFrame);
+    } else {
+      setFrequencies(undefined);
+    }
+
+    return () => {
+      if (animationFrameId) {
+        cancelAnimationFrame(animationFrameId);
+      }
+    };
+  }, [isConnected, isRecording]);
+
   // update session params
   useEffect(() => {
     clientRef.current?.configure({ voice });
@@ -318,8 +330,9 @@ export function RealtimeChat({
           [styles["pulse"]]: isRecording,
         })}
       >
-        <div className={styles["icon-center"]}></div>
+        <VoicePrint frequencies={frequencies} isActive={isRecording} />
       </div>
+
       <div className={styles["bottom-icons"]}>
         <div>
           <IconButton

+ 1 - 0
app/components/voice-print/index.ts

@@ -0,0 +1 @@
+export * from "./voice-print";

+ 11 - 0
app/components/voice-print/voice-print.module.scss

@@ -0,0 +1,11 @@
+.voice-print {
+  width: 100%;
+  height: 60px;
+  margin: 20px 0;
+
+  canvas {
+    width: 100%;
+    height: 100%;
+    filter: brightness(1.2); // 增加整体亮度
+  }
+}

+ 125 - 0
app/components/voice-print/voice-print.tsx

@@ -0,0 +1,125 @@
+import { useEffect, useRef, useState } from "react";
+import styles from "./voice-print.module.scss";
+
+interface VoicePrintProps {
+  frequencies?: Uint8Array;
+  isActive?: boolean;
+}
+
+export function VoicePrint({ frequencies, isActive }: VoicePrintProps) {
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+  const [history, setHistory] = useState<number[][]>([]);
+  const historyLengthRef = useRef(10); // 保存10帧历史数据
+
+  useEffect(() => {
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+
+    const ctx = canvas.getContext("2d");
+    if (!ctx) return;
+
+    // 设置canvas尺寸
+    const dpr = window.devicePixelRatio || 1;
+    canvas.width = canvas.offsetWidth * dpr;
+    canvas.height = canvas.offsetHeight * dpr;
+    ctx.scale(dpr, dpr);
+
+    // 清空画布
+    ctx.clearRect(0, 0, canvas.width, canvas.height);
+
+    if (!frequencies || !isActive) {
+      setHistory([]); // 重置历史数据
+      return;
+    }
+
+    // 更新历史数据
+    const freqArray = Array.from(frequencies);
+    setHistory((prev) => {
+      const newHistory = [...prev, freqArray];
+      if (newHistory.length > historyLengthRef.current) {
+        newHistory.shift();
+      }
+      return newHistory;
+    });
+
+    // 绘制声纹
+    const points: [number, number][] = [];
+    const centerY = canvas.height / 2;
+    const width = canvas.width;
+    const sliceWidth = width / (frequencies.length - 1);
+
+    // 绘制主波形
+    ctx.beginPath();
+    ctx.moveTo(0, centerY);
+
+    // 使用历史数据计算平均值实现平滑效果
+    for (let i = 0; i < frequencies.length; i++) {
+      const x = i * sliceWidth;
+      let avgFrequency = frequencies[i];
+
+      // 计算历史数据的平均值
+      if (history.length > 0) {
+        const historicalValues = history.map((h) => h[i] || 0);
+        avgFrequency =
+          (avgFrequency + historicalValues.reduce((a, b) => a + b, 0)) /
+          (history.length + 1);
+      }
+
+      // 使用三角函数使波形更自然
+      const normalized = avgFrequency / 255.0;
+      const height = normalized * (canvas.height / 2);
+      const y = centerY + height * Math.sin(i * 0.2 + Date.now() * 0.002);
+
+      points.push([x, y]);
+
+      if (i === 0) {
+        ctx.moveTo(x, y);
+      } else {
+        // 使用贝塞尔曲线使波形更平滑
+        const prevPoint = points[i - 1];
+        const midX = (prevPoint[0] + x) / 2;
+        ctx.quadraticCurveTo(
+          prevPoint[0],
+          prevPoint[1],
+          midX,
+          (prevPoint[1] + y) / 2,
+        );
+      }
+    }
+
+    // 绘制对称的下半部分
+    for (let i = points.length - 1; i >= 0; i--) {
+      const [x, y] = points[i];
+      const symmetricY = centerY - (y - centerY);
+      if (i === points.length - 1) {
+        ctx.lineTo(x, symmetricY);
+      } else {
+        const nextPoint = points[i + 1];
+        const midX = (nextPoint[0] + x) / 2;
+        ctx.quadraticCurveTo(
+          nextPoint[0],
+          centerY - (nextPoint[1] - centerY),
+          midX,
+          centerY - ((nextPoint[1] + y) / 2 - centerY),
+        );
+      }
+    }
+
+    ctx.closePath();
+
+    // 设置渐变色和透明度
+    const gradient = ctx.createLinearGradient(0, 0, canvas.width, 0);
+    gradient.addColorStop(0, "rgba(100, 180, 255, 0.95)");
+    gradient.addColorStop(0.5, "rgba(140, 200, 255, 0.9)");
+    gradient.addColorStop(1, "rgba(180, 220, 255, 0.95)");
+
+    ctx.fillStyle = gradient;
+    ctx.fill();
+  }, [frequencies, isActive, history]);
+
+  return (
+    <div className={styles["voice-print"]}>
+      <canvas ref={canvasRef} />
+    </div>
+  );
+}