|
|
@@ -0,0 +1,359 @@
|
|
|
+import VoiceIcon from "@/app/icons/voice.svg";
|
|
|
+import VoiceOffIcon from "@/app/icons/voice-off.svg";
|
|
|
+import PowerIcon from "@/app/icons/power.svg";
|
|
|
+
|
|
|
+import styles from "./realtime-chat.module.scss";
|
|
|
+import clsx from "clsx";
|
|
|
+
|
|
|
+import { useState, useRef, useEffect } from "react";
|
|
|
+
|
|
|
+import { useChatStore, createMessage, useAppConfig } from "@/app/store";
|
|
|
+
|
|
|
+import { IconButton } from "@/app/components/button";
|
|
|
+
|
|
|
+import {
|
|
|
+ Modality,
|
|
|
+ RTClient,
|
|
|
+ RTInputAudioItem,
|
|
|
+ RTResponse,
|
|
|
+ TurnDetection,
|
|
|
+} from "rt-client";
|
|
|
+import { AudioHandler } from "@/app/lib/audio";
|
|
|
+import { uploadImage } from "@/app/utils/chat";
|
|
|
+import { VoicePrint } from "@/app/components/voice-print";
|
|
|
+
|
|
|
+interface RealtimeChatProps {
|
|
|
+ onClose?: () => void;
|
|
|
+ onStartVoice?: () => void;
|
|
|
+ onPausedVoice?: () => void;
|
|
|
+}
|
|
|
+
|
|
|
+export function RealtimeChat({
|
|
|
+ onClose,
|
|
|
+ onStartVoice,
|
|
|
+ onPausedVoice,
|
|
|
+}: RealtimeChatProps) {
|
|
|
+ const chatStore = useChatStore();
|
|
|
+ const session = chatStore.currentSession();
|
|
|
+ const config = useAppConfig();
|
|
|
+ const [status, setStatus] = useState("");
|
|
|
+ const [isRecording, setIsRecording] = useState(false);
|
|
|
+ const [isConnected, setIsConnected] = useState(false);
|
|
|
+ const [isConnecting, setIsConnecting] = useState(false);
|
|
|
+ const [modality, setModality] = useState("audio");
|
|
|
+ const [useVAD, setUseVAD] = useState(true);
|
|
|
+ const [frequencies, setFrequencies] = useState<Uint8Array | undefined>();
|
|
|
+
|
|
|
+ const clientRef = useRef<RTClient | null>(null);
|
|
|
+ const audioHandlerRef = useRef<AudioHandler | null>(null);
|
|
|
+ const initRef = useRef(false);
|
|
|
+
|
|
|
+ const temperature = config.realtimeConfig.temperature;
|
|
|
+ const apiKey = config.realtimeConfig.apiKey;
|
|
|
+ const model = config.realtimeConfig.model;
|
|
|
+ const azure = config.realtimeConfig.provider === "Azure";
|
|
|
+ const azureEndpoint = config.realtimeConfig.azure.endpoint;
|
|
|
+ const azureDeployment = config.realtimeConfig.azure.deployment;
|
|
|
+ const voice = config.realtimeConfig.voice;
|
|
|
+
|
|
|
+ const handleConnect = async () => {
|
|
|
+ if (isConnecting) return;
|
|
|
+ if (!isConnected) {
|
|
|
+ try {
|
|
|
+ setIsConnecting(true);
|
|
|
+ clientRef.current = azure
|
|
|
+ ? new RTClient(
|
|
|
+ new URL(azureEndpoint),
|
|
|
+ { key: apiKey },
|
|
|
+ { deployment: azureDeployment },
|
|
|
+ )
|
|
|
+ : new RTClient({ key: apiKey }, { model });
|
|
|
+ const modalities: Modality[] =
|
|
|
+ modality === "audio" ? ["text", "audio"] : ["text"];
|
|
|
+ const turnDetection: TurnDetection = useVAD
|
|
|
+ ? { type: "server_vad" }
|
|
|
+ : null;
|
|
|
+ await clientRef.current.configure({
|
|
|
+ instructions: "",
|
|
|
+ voice,
|
|
|
+ input_audio_transcription: { model: "whisper-1" },
|
|
|
+ turn_detection: turnDetection,
|
|
|
+ tools: [],
|
|
|
+ temperature,
|
|
|
+ modalities,
|
|
|
+ });
|
|
|
+ startResponseListener();
|
|
|
+
|
|
|
+ setIsConnected(true);
|
|
|
+ // TODO
|
|
|
+ // try {
|
|
|
+ // const recentMessages = chatStore.getMessagesWithMemory();
|
|
|
+ // for (const message of recentMessages) {
|
|
|
+ // const { role, content } = message;
|
|
|
+ // if (typeof content === "string") {
|
|
|
+ // await clientRef.current.sendItem({
|
|
|
+ // type: "message",
|
|
|
+ // role: role as any,
|
|
|
+ // content: [
|
|
|
+ // {
|
|
|
+ // type: (role === "assistant" ? "text" : "input_text") as any,
|
|
|
+ // text: content as string,
|
|
|
+ // },
|
|
|
+ // ],
|
|
|
+ // });
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // // await clientRef.current.generateResponse();
|
|
|
+ // } catch (error) {
|
|
|
+ // console.error("Set message failed:", error);
|
|
|
+ // }
|
|
|
+ } catch (error) {
|
|
|
+ console.error("Connection failed:", error);
|
|
|
+ setStatus("Connection failed");
|
|
|
+ } finally {
|
|
|
+ setIsConnecting(false);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ await disconnect();
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ const disconnect = async () => {
|
|
|
+ if (clientRef.current) {
|
|
|
+ try {
|
|
|
+ await clientRef.current.close();
|
|
|
+ clientRef.current = null;
|
|
|
+ setIsConnected(false);
|
|
|
+ } catch (error) {
|
|
|
+ console.error("Disconnect failed:", error);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ const startResponseListener = async () => {
|
|
|
+ if (!clientRef.current) return;
|
|
|
+
|
|
|
+ try {
|
|
|
+ for await (const serverEvent of clientRef.current.events()) {
|
|
|
+ if (serverEvent.type === "response") {
|
|
|
+ await handleResponse(serverEvent);
|
|
|
+ } else if (serverEvent.type === "input_audio") {
|
|
|
+ await handleInputAudio(serverEvent);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (error) {
|
|
|
+ if (clientRef.current) {
|
|
|
+ console.error("Response iteration error:", error);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ const handleResponse = async (response: RTResponse) => {
|
|
|
+ for await (const item of response) {
|
|
|
+ if (item.type === "message" && item.role === "assistant") {
|
|
|
+ const botMessage = createMessage({
|
|
|
+ role: item.role,
|
|
|
+ content: "",
|
|
|
+ });
|
|
|
+ // add bot message first
|
|
|
+ chatStore.updateTargetSession(session, (session) => {
|
|
|
+ session.messages = session.messages.concat([botMessage]);
|
|
|
+ });
|
|
|
+ let hasAudio = false;
|
|
|
+ for await (const content of item) {
|
|
|
+ if (content.type === "text") {
|
|
|
+ for await (const text of content.textChunks()) {
|
|
|
+ botMessage.content += text;
|
|
|
+ }
|
|
|
+ } else if (content.type === "audio") {
|
|
|
+ const textTask = async () => {
|
|
|
+ for await (const text of content.transcriptChunks()) {
|
|
|
+ botMessage.content += text;
|
|
|
+ }
|
|
|
+ };
|
|
|
+ const audioTask = async () => {
|
|
|
+ audioHandlerRef.current?.startStreamingPlayback();
|
|
|
+ for await (const audio of content.audioChunks()) {
|
|
|
+ hasAudio = true;
|
|
|
+ audioHandlerRef.current?.playChunk(audio);
|
|
|
+ }
|
|
|
+ };
|
|
|
+ await Promise.all([textTask(), audioTask()]);
|
|
|
+ }
|
|
|
+ // update message.content
|
|
|
+ chatStore.updateTargetSession(session, (session) => {
|
|
|
+ session.messages = session.messages.concat();
|
|
|
+ });
|
|
|
+ }
|
|
|
+ if (hasAudio) {
|
|
|
+ // upload audio get audio_url
|
|
|
+ const blob = audioHandlerRef.current?.savePlayFile();
|
|
|
+ uploadImage(blob!).then((audio_url) => {
|
|
|
+ botMessage.audio_url = audio_url;
|
|
|
+ // update text and audio_url
|
|
|
+ chatStore.updateTargetSession(session, (session) => {
|
|
|
+ session.messages = session.messages.concat();
|
|
|
+ });
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ const handleInputAudio = async (item: RTInputAudioItem) => {
|
|
|
+ await item.waitForCompletion();
|
|
|
+ if (item.transcription) {
|
|
|
+ const userMessage = createMessage({
|
|
|
+ role: "user",
|
|
|
+ content: item.transcription,
|
|
|
+ });
|
|
|
+ chatStore.updateTargetSession(session, (session) => {
|
|
|
+ session.messages = session.messages.concat([userMessage]);
|
|
|
+ });
|
|
|
+ // save input audio_url, and update session
|
|
|
+ const { audioStartMillis, audioEndMillis } = item;
|
|
|
+ // upload audio get audio_url
|
|
|
+ const blob = audioHandlerRef.current?.saveRecordFile(
|
|
|
+ audioStartMillis,
|
|
|
+ audioEndMillis,
|
|
|
+ );
|
|
|
+ uploadImage(blob!).then((audio_url) => {
|
|
|
+ userMessage.audio_url = audio_url;
|
|
|
+ chatStore.updateTargetSession(session, (session) => {
|
|
|
+ session.messages = session.messages.concat();
|
|
|
+ });
|
|
|
+ });
|
|
|
+ }
|
|
|
+ // stop streaming play after get input audio.
|
|
|
+ audioHandlerRef.current?.stopStreamingPlayback();
|
|
|
+ };
|
|
|
+
|
|
|
+ const toggleRecording = async () => {
|
|
|
+ if (!isRecording && clientRef.current) {
|
|
|
+ try {
|
|
|
+ if (!audioHandlerRef.current) {
|
|
|
+ audioHandlerRef.current = new AudioHandler();
|
|
|
+ await audioHandlerRef.current.initialize();
|
|
|
+ }
|
|
|
+ await audioHandlerRef.current.startRecording(async (chunk) => {
|
|
|
+ await clientRef.current?.sendAudio(chunk);
|
|
|
+ });
|
|
|
+ setIsRecording(true);
|
|
|
+ } catch (error) {
|
|
|
+ console.error("Failed to start recording:", error);
|
|
|
+ }
|
|
|
+ } else if (audioHandlerRef.current) {
|
|
|
+ try {
|
|
|
+ audioHandlerRef.current.stopRecording();
|
|
|
+ if (!useVAD) {
|
|
|
+ const inputAudio = await clientRef.current?.commitAudio();
|
|
|
+ await handleInputAudio(inputAudio!);
|
|
|
+ await clientRef.current?.generateResponse();
|
|
|
+ }
|
|
|
+ setIsRecording(false);
|
|
|
+ } catch (error) {
|
|
|
+ console.error("Failed to stop recording:", error);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ useEffect(() => {
|
|
|
+ // 防止重复初始化
|
|
|
+ if (initRef.current) return;
|
|
|
+ initRef.current = true;
|
|
|
+
|
|
|
+ const initAudioHandler = async () => {
|
|
|
+ const handler = new AudioHandler();
|
|
|
+ await handler.initialize();
|
|
|
+ audioHandlerRef.current = handler;
|
|
|
+ await handleConnect();
|
|
|
+ await toggleRecording();
|
|
|
+ };
|
|
|
+
|
|
|
+ initAudioHandler().catch((error) => {
|
|
|
+ setStatus(error);
|
|
|
+ console.error(error);
|
|
|
+ });
|
|
|
+
|
|
|
+ return () => {
|
|
|
+ if (isRecording) {
|
|
|
+ toggleRecording();
|
|
|
+ }
|
|
|
+ audioHandlerRef.current?.close().catch(console.error);
|
|
|
+ disconnect();
|
|
|
+ };
|
|
|
+ }, []);
|
|
|
+
|
|
|
+ useEffect(() => {
|
|
|
+ let animationFrameId: number;
|
|
|
+
|
|
|
+ if (isConnected && isRecording) {
|
|
|
+ const animationFrame = () => {
|
|
|
+ if (audioHandlerRef.current) {
|
|
|
+ const freqData = audioHandlerRef.current.getByteFrequencyData();
|
|
|
+ setFrequencies(freqData);
|
|
|
+ }
|
|
|
+ animationFrameId = requestAnimationFrame(animationFrame);
|
|
|
+ };
|
|
|
+
|
|
|
+ animationFrameId = requestAnimationFrame(animationFrame);
|
|
|
+ } else {
|
|
|
+ setFrequencies(undefined);
|
|
|
+ }
|
|
|
+
|
|
|
+ return () => {
|
|
|
+ if (animationFrameId) {
|
|
|
+ cancelAnimationFrame(animationFrameId);
|
|
|
+ }
|
|
|
+ };
|
|
|
+ }, [isConnected, isRecording]);
|
|
|
+
|
|
|
+ // update session params
|
|
|
+ useEffect(() => {
|
|
|
+ clientRef.current?.configure({ voice });
|
|
|
+ }, [voice]);
|
|
|
+ useEffect(() => {
|
|
|
+ clientRef.current?.configure({ temperature });
|
|
|
+ }, [temperature]);
|
|
|
+
|
|
|
+ const handleClose = async () => {
|
|
|
+ onClose?.();
|
|
|
+ if (isRecording) {
|
|
|
+ await toggleRecording();
|
|
|
+ }
|
|
|
+ disconnect().catch(console.error);
|
|
|
+ };
|
|
|
+
|
|
|
+ return (
|
|
|
+ <div className={styles["realtime-chat"]}>
|
|
|
+ <div
|
|
|
+ className={clsx(styles["circle-mic"], {
|
|
|
+ [styles["pulse"]]: isRecording,
|
|
|
+ })}
|
|
|
+ >
|
|
|
+ <VoicePrint frequencies={frequencies} isActive={isRecording} />
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div className={styles["bottom-icons"]}>
|
|
|
+ <div>
|
|
|
+ <IconButton
|
|
|
+ icon={isRecording ? <VoiceIcon /> : <VoiceOffIcon />}
|
|
|
+ onClick={toggleRecording}
|
|
|
+ disabled={!isConnected}
|
|
|
+ shadow
|
|
|
+ bordered
|
|
|
+ />
|
|
|
+ </div>
|
|
|
+ <div className={styles["icon-center"]}>{status}</div>
|
|
|
+ <div>
|
|
|
+ <IconButton
|
|
|
+ icon={<PowerIcon />}
|
|
|
+ onClick={handleClose}
|
|
|
+ shadow
|
|
|
+ bordered
|
|
|
+ />
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ );
|
|
|
+}
|