| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476 |
- import VoiceIcon from "@/app/icons/voice.svg";
- import VoiceOffIcon from "@/app/icons/voice-off.svg";
- import Close24Icon from "@/app/icons/close-24.svg";
- import PowerIcon from "@/app/icons/power.svg";
- import styles from "./realtime-chat.module.scss";
- import clsx from "clsx";
- import { useState, useRef, useEffect } from "react";
- import {
- useAccessStore,
- useChatStore,
- ChatMessage,
- createMessage,
- } from "@/app/store";
- import { IconButton } from "@/app/components/button";
- import {
- Modality,
- RTClient,
- RTInputAudioItem,
- RTResponse,
- TurnDetection,
- } from "rt-client";
- import { AudioHandler } from "@/app/lib/audio";
- import { uploadImage } from "@/app/utils/chat";
- interface RealtimeChatProps {
- onClose?: () => void;
- onStartVoice?: () => void;
- onPausedVoice?: () => void;
- }
- export function RealtimeChat({
- onClose,
- onStartVoice,
- onPausedVoice,
- }: RealtimeChatProps) {
- const currentItemId = useRef<string>("");
- const currentBotMessage = useRef<ChatMessage | null>();
- const currentUserMessage = useRef<ChatMessage | null>();
- const accessStore = useAccessStore.getState();
- const chatStore = useChatStore();
- const session = chatStore.currentSession();
- const [isRecording, setIsRecording] = useState(false);
- const [isConnected, setIsConnected] = useState(false);
- const [isConnecting, setIsConnecting] = useState(false);
- const [modality, setModality] = useState("audio");
- const [isAzure, setIsAzure] = useState(false);
- const [endpoint, setEndpoint] = useState("");
- const [deployment, setDeployment] = useState("");
- const [useVAD, setUseVAD] = useState(true);
- const clientRef = useRef<RTClient | null>(null);
- const audioHandlerRef = useRef<AudioHandler | null>(null);
- const apiKey = accessStore.openaiApiKey;
- const handleConnect = async () => {
- if (!isConnected) {
- try {
- setIsConnecting(true);
- clientRef.current = isAzure
- ? new RTClient(new URL(endpoint), { key: apiKey }, { deployment })
- : new RTClient(
- { key: apiKey },
- { model: "gpt-4o-realtime-preview-2024-10-01" },
- );
- const modalities: Modality[] =
- modality === "audio" ? ["text", "audio"] : ["text"];
- const turnDetection: TurnDetection = useVAD
- ? { type: "server_vad" }
- : null;
- clientRef.current.configure({
- instructions: "Hi",
- input_audio_transcription: { model: "whisper-1" },
- turn_detection: turnDetection,
- tools: [],
- temperature: 0.9,
- modalities,
- });
- startResponseListener();
- setIsConnected(true);
- } catch (error) {
- console.error("Connection failed:", error);
- } finally {
- setIsConnecting(false);
- }
- } else {
- await disconnect();
- }
- };
- const disconnect = async () => {
- if (clientRef.current) {
- try {
- await clientRef.current.close();
- clientRef.current = null;
- setIsConnected(false);
- } catch (error) {
- console.error("Disconnect failed:", error);
- }
- }
- };
- const startResponseListener = async () => {
- if (!clientRef.current) return;
- try {
- for await (const serverEvent of clientRef.current.events()) {
- if (serverEvent.type === "response") {
- await handleResponse(serverEvent);
- } else if (serverEvent.type === "input_audio") {
- await handleInputAudio(serverEvent);
- }
- }
- } catch (error) {
- if (clientRef.current) {
- console.error("Response iteration error:", error);
- }
- }
- };
- const handleResponse = async (response: RTResponse) => {
- for await (const item of response) {
- console.log("handleResponse", item);
- if (item.type === "message" && item.role === "assistant") {
- const botMessage = createMessage({
- role: item.role,
- content: "",
- });
- // add bot message first
- chatStore.updateTargetSession(session, (session) => {
- session.messages = session.messages.concat([botMessage]);
- });
- for await (const content of item) {
- if (content.type === "text") {
- for await (const text of content.textChunks()) {
- botMessage.content += text;
- }
- } else if (content.type === "audio") {
- const textTask = async () => {
- for await (const text of content.transcriptChunks()) {
- botMessage.content += text;
- }
- };
- const audioTask = async () => {
- audioHandlerRef.current?.startStreamingPlayback();
- for await (const audio of content.audioChunks()) {
- audioHandlerRef.current?.playChunk(audio);
- }
- };
- await Promise.all([textTask(), audioTask()]);
- }
- }
- // upload audio get audio_url
- const blob = audioHandlerRef.current?.savePlayFile();
- uploadImage(blob).then((audio_url) => {
- botMessage.audio_url = audio_url;
- botMessage.date = new Date().toLocaleString();
- // update text and audio_url
- chatStore.updateTargetSession((session) => {
- session.messages = session.messages.concat();
- });
- });
- }
- }
- };
- const handleInputAudio = async (item: RTInputAudioItem) => {
- audioHandlerRef.current?.stopStreamingPlayback();
- await item.waitForCompletion();
- const { audioStartMillis, audioEndMillis } = item;
- // TODO, save input audio_url, and update session
- console.log("handleInputAudio", item, audioStartMillis, audioEndMillis);
- const userMessage = createMessage({
- role: "user",
- content: item.transcription,
- });
- chatStore.updateTargetSession(session, (session) => {
- session.messages = session.messages.concat([userMessage]);
- });
- };
- const toggleRecording = async () => {
- if (!isRecording && clientRef.current) {
- try {
- if (!audioHandlerRef.current) {
- audioHandlerRef.current = new AudioHandler();
- await audioHandlerRef.current.initialize();
- }
- await audioHandlerRef.current.startRecording(async (chunk) => {
- await clientRef.current?.sendAudio(chunk);
- });
- setIsRecording(true);
- } catch (error) {
- console.error("Failed to start recording:", error);
- }
- } else if (audioHandlerRef.current) {
- try {
- audioHandlerRef.current.stopRecording();
- if (!useVAD) {
- const inputAudio = await clientRef.current?.commitAudio();
- await handleInputAudio(inputAudio!);
- await clientRef.current?.generateResponse();
- }
- setIsRecording(false);
- } catch (error) {
- console.error("Failed to stop recording:", error);
- }
- }
- };
- useEffect(() => {
- const initAudioHandler = async () => {
- const handler = new AudioHandler();
- await handler.initialize();
- audioHandlerRef.current = handler;
- };
- initAudioHandler().catch(console.error);
- return () => {
- disconnect();
- audioHandlerRef.current?.close().catch(console.error);
- };
- }, []);
- // useEffect(() => {
- // if (
- // clientRef.current?.getTurnDetectionType() === "server_vad" &&
- // audioData
- // ) {
- // // console.log("appendInputAudio", audioData);
- // // 将录制的16PCM音频发送给openai
- // clientRef.current?.appendInputAudio(audioData);
- // }
- // }, [audioData]);
- // useEffect(() => {
- // console.log("isRecording", isRecording);
- // if (!isRecording.current) return;
- // if (!clientRef.current) {
- // const apiKey = accessStore.openaiApiKey;
- // const client = (clientRef.current = new RealtimeClient({
- // url: "wss://api.openai.com/v1/realtime",
- // apiKey,
- // dangerouslyAllowAPIKeyInBrowser: true,
- // debug: true,
- // }));
- // client
- // .connect()
- // .then(() => {
- // // TODO 设置真实的上下文
- // client.sendUserMessageContent([
- // {
- // type: `input_text`,
- // text: `Hi`,
- // // text: `For testing purposes, I want you to list ten car brands. Number each item, e.g. "one (or whatever number you are one): the item name".`
- // },
- // ]);
- // // 配置服务端判断说话人开启还是结束
- // client.updateSession({
- // turn_detection: { type: "server_vad" },
- // });
- // client.on("realtime.event", (realtimeEvent) => {
- // // 调试
- // console.log("realtime.event", realtimeEvent);
- // });
- // client.on("conversation.interrupted", async () => {
- // if (currentBotMessage.current) {
- // stopPlaying();
- // try {
- // client.cancelResponse(
- // currentBotMessage.current?.id,
- // currentTime(),
- // );
- // } catch (e) {
- // console.error(e);
- // }
- // }
- // });
- // client.on("conversation.updated", async (event: any) => {
- // // console.log("currentSession", chatStore.currentSession());
- // // const items = client.conversation.getItems();
- // const content = event?.item?.content?.[0]?.transcript || "";
- // const text = event?.item?.content?.[0]?.text || "";
- // // console.log(
- // // "conversation.updated",
- // // event,
- // // "content[0]",
- // // event?.item?.content?.[0]?.transcript,
- // // "formatted",
- // // event?.item?.formatted?.transcript,
- // // "content",
- // // content,
- // // "text",
- // // text,
- // // event?.item?.status,
- // // event?.item?.role,
- // // items.length,
- // // items,
- // // );
- // const { item, delta } = event;
- // const { role, id, status, formatted } = item || {};
- // if (id && role == "assistant") {
- // if (
- // !currentBotMessage.current ||
- // currentBotMessage.current?.id != id
- // ) {
- // // create assistant message and save to session
- // currentBotMessage.current = createMessage({ id, role });
- // chatStore.updateCurrentSession((session) => {
- // session.messages = session.messages.concat([
- // currentBotMessage.current!,
- // ]);
- // });
- // }
- // if (currentBotMessage.current?.id != id) {
- // stopPlaying();
- // }
- // if (content) {
- // currentBotMessage.current.content = content;
- // chatStore.updateCurrentSession((session) => {
- // session.messages = session.messages.concat();
- // });
- // }
- // if (delta?.audio) {
- // // typeof delta.audio is Int16Array
- // // 直接播放
- // addInt16PCM(delta.audio);
- // }
- // // console.log(
- // // "updated try save wavFile",
- // // status,
- // // currentBotMessage.current?.audio_url,
- // // formatted?.audio,
- // // );
- // if (
- // status == "completed" &&
- // !currentBotMessage.current?.audio_url &&
- // formatted?.audio?.length
- // ) {
- // // 转换为wav文件保存 TODO 使用mp3格式会更节省空间
- // const botMessage = currentBotMessage.current;
- // const wavFile = new WavPacker().pack(sampleRate, {
- // bitsPerSample: 16,
- // channelCount: 1,
- // data: formatted?.audio,
- // });
- // // 这里将音频文件放到对象里面wavFile.url可以使用<audio>标签播放
- // item.formatted.file = wavFile;
- // uploadImageRemote(wavFile.blob).then((audio_url) => {
- // botMessage.audio_url = audio_url;
- // chatStore.updateCurrentSession((session) => {
- // session.messages = session.messages.concat();
- // });
- // });
- // }
- // if (
- // status == "completed" &&
- // !currentBotMessage.current?.content
- // ) {
- // chatStore.updateCurrentSession((session) => {
- // session.messages = session.messages.filter(
- // (m) => m.id !== currentBotMessage.current?.id,
- // );
- // });
- // }
- // }
- // if (id && role == "user" && !text) {
- // if (
- // !currentUserMessage.current ||
- // currentUserMessage.current?.id != id
- // ) {
- // // create assistant message and save to session
- // currentUserMessage.current = createMessage({ id, role });
- // chatStore.updateCurrentSession((session) => {
- // session.messages = session.messages.concat([
- // currentUserMessage.current!,
- // ]);
- // });
- // }
- // if (content) {
- // // 转换为wav文件保存 TODO 使用mp3格式会更节省空间
- // const userMessage = currentUserMessage.current;
- // const wavFile = new WavPacker().pack(sampleRate, {
- // bitsPerSample: 16,
- // channelCount: 1,
- // data: formatted?.audio,
- // });
- // // 这里将音频文件放到对象里面wavFile.url可以使用<audio>标签播放
- // item.formatted.file = wavFile;
- // uploadImageRemote(wavFile.blob).then((audio_url) => {
- // // update message content
- // userMessage.content = content;
- // // update message audio_url
- // userMessage.audio_url = audio_url;
- // chatStore.updateCurrentSession((session) => {
- // session.messages = session.messages.concat();
- // });
- // });
- // }
- // }
- // });
- // })
- // .catch((e) => {
- // console.error("Error", e);
- // });
- // }
- // return () => {
- // stop();
- // // TODO close client
- // clientRef.current?.disconnect();
- // };
- // }, [isRecording.current]);
- const handleClose = () => {
- onClose?.();
- disconnect();
- };
- return (
- <div className={styles["realtime-chat"]}>
- <div
- className={clsx(styles["circle-mic"], {
- [styles["pulse"]]: true,
- })}
- >
- <div className={styles["icon-center"]}></div>
- </div>
- <div className={styles["bottom-icons"]}>
- <div>
- <IconButton
- icon={isRecording ? <VoiceOffIcon /> : <VoiceIcon />}
- onClick={toggleRecording}
- disabled={!isConnected}
- bordered
- shadow
- />
- </div>
- <div className={styles["icon-center"]}>
- <IconButton
- icon={<PowerIcon />}
- text={
- isConnecting
- ? "Connecting..."
- : isConnected
- ? "Disconnect"
- : "Connect"
- }
- onClick={handleConnect}
- disabled={isConnecting}
- bordered
- shadow
- />
- </div>
- <div onClick={handleClose}>
- <IconButton
- icon={<Close24Icon />}
- onClick={handleClose}
- bordered
- shadow
- />
- </div>
- </div>
- </div>
- );
- }
|