1 год назад · 3ae8ec1af6
--- a/app/client/api.ts
+++ b/app/client/api.ts
@@ -64,16 +64,6 @@ export interface SpeechOptions {
 
				   onController?: (controller: AbortController) => void;
			
 
				 }
			
 
				 
			
 
				-export interface TranscriptionOptions {
			
 
				-  model?: "whisper-1";
			
 
				-  file: Blob;
			
 
				-  language?: string;
			
 
				-  prompt?: string;
			
 
				-  response_format?: "json" | "text" | "srt" | "verbose_json" | "vtt";
			
 
				-  temperature?: number;
			
 
				-  onController?: (controller: AbortController) => void;
			
 
				-}
			
 
				-
			
 
				 export interface ChatOptions {
			
 
				   messages: RequestMessage[];
			
 
				   config: LLMConfig;
			
@@ -109,7 +99,6 @@ export interface LLMModelProvider {
 
				 export abstract class LLMApi {
			
 
				   abstract chat(options: ChatOptions): Promise<void>;
			
 
				   abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
			
 
				-  abstract transcription(options: TranscriptionOptions): Promise<string>;
			
 
				   abstract usage(): Promise<LLMUsage>;
			
 
				   abstract models(): Promise<LLMModel[]>;
			
 
				 }
			
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -13,7 +13,6 @@ import {
 
				   LLMApi,
			
 
				   LLMModel,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				   MultimodalContent,
			
 
				 } from "../api";
			
 
				 import Locale from "../../locales";
			
@@ -88,9 +87,6 @@ export class QwenApi implements LLMApi {
 
				   speech(options: SpeechOptions): Promise<ArrayBuffer> {
			
 
				     throw new Error("Method not implemented.");
			
 
				   }
			
 
				-  transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    throw new Error("Method not implemented.");
			
 
				-  }
			
 
				 
			
 
				   async chat(options: ChatOptions) {
			
 
				     const messages = options.messages.map((v) => ({
			
--- a/app/client/platforms/anthropic.ts
+++ b/app/client/platforms/anthropic.ts
@@ -5,7 +5,6 @@ import {
 
				   LLMApi,
			
 
				   MultimodalContent,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				 } from "../api";
			
 
				 import {
			
 
				   useAccessStore,
			
@@ -90,9 +89,6 @@ export class ClaudeApi implements LLMApi {
 
				   speech(options: SpeechOptions): Promise<ArrayBuffer> {
			
 
				     throw new Error("Method not implemented.");
			
 
				   }
			
 
				-  transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    throw new Error("Method not implemented.");
			
 
				-  }
			
 
				 
			
 
				   extractMessage(res: any) {
			
 
				     console.log("[Response] claude response: ", res);
			
--- a/app/client/platforms/baidu.ts
+++ b/app/client/platforms/baidu.ts
@@ -15,7 +15,6 @@ import {
 
				   LLMModel,
			
 
				   MultimodalContent,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				 } from "../api";
			
 
				 import Locale from "../../locales";
			
 
				 import {
			
@@ -80,9 +79,6 @@ export class ErnieApi implements LLMApi {
 
				   speech(options: SpeechOptions): Promise<ArrayBuffer> {
			
 
				     throw new Error("Method not implemented.");
			
 
				   }
			
 
				-  transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    throw new Error("Method not implemented.");
			
 
				-  }
			
 
				 
			
 
				   async chat(options: ChatOptions) {
			
 
				     const messages = options.messages.map((v) => ({
			
--- a/app/client/platforms/bytedance.ts
+++ b/app/client/platforms/bytedance.ts
@@ -14,7 +14,6 @@ import {
 
				   LLMModel,
			
 
				   MultimodalContent,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				 } from "../api";
			
 
				 import Locale from "../../locales";
			
 
				 import {
			
@@ -82,9 +81,6 @@ export class DoubaoApi implements LLMApi {
 
				   speech(options: SpeechOptions): Promise<ArrayBuffer> {
			
 
				     throw new Error("Method not implemented.");
			
 
				   }
			
 
				-  transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    throw new Error("Method not implemented.");
			
 
				-  }
			
 
				 
			
 
				   async chat(options: ChatOptions) {
			
 
				     const messages = options.messages.map((v) => ({
			
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -6,7 +6,6 @@ import {
 
				   LLMModel,
			
 
				   LLMUsage,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				 } from "../api";
			
 
				 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
			
 
				 import { getClientConfig } from "@/app/config/client";
			
@@ -67,9 +66,7 @@ export class GeminiProApi implements LLMApi {
 
				   speech(options: SpeechOptions): Promise<ArrayBuffer> {
			
 
				     throw new Error("Method not implemented.");
			
 
				   }
			
 
				-  transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    throw new Error("Method not implemented.");
			
 
				-  }
			
 
				+
			
 
				   async chat(options: ChatOptions): Promise<void> {
			
 
				     const apiClient = this;
			
 
				     let multimodal = false;
			
--- a/app/client/platforms/iflytek.ts
+++ b/app/client/platforms/iflytek.ts
@@ -13,7 +13,6 @@ import {
 
				   LLMApi,
			
 
				   LLMModel,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				 } from "../api";
			
 
				 import Locale from "../../locales";
			
 
				 import {
			
@@ -63,9 +62,6 @@ export class SparkApi implements LLMApi {
 
				   speech(options: SpeechOptions): Promise<ArrayBuffer> {
			
 
				     throw new Error("Method not implemented.");
			
 
				   }
			
 
				-  transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    throw new Error("Method not implemented.");
			
 
				-  }
			
 
				 
			
 
				   async chat(options: ChatOptions) {
			
 
				     const messages: ChatOptions["messages"] = [];
			
--- a/app/client/platforms/moonshot.ts
+++ b/app/client/platforms/moonshot.ts
@@ -27,7 +27,6 @@ import {
 
				   LLMUsage,
			
 
				   MultimodalContent,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				 } from "../api";
			
 
				 import Locale from "../../locales";
			
 
				 import {
			
@@ -77,9 +76,6 @@ export class MoonshotApi implements LLMApi {
 
				   speech(options: SpeechOptions): Promise<ArrayBuffer> {
			
 
				     throw new Error("Method not implemented.");
			
 
				   }
			
 
				-  transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    throw new Error("Method not implemented.");
			
 
				-  }
			
 
				 
			
 
				   async chat(options: ChatOptions) {
			
 
				     const messages: ChatOptions["messages"] = [];
			
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -34,7 +34,6 @@ import {
 
				   LLMUsage,
			
 
				   MultimodalContent,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				 } from "../api";
			
 
				 import Locale from "../../locales";
			
 
				 import {
			
@@ -187,47 +186,6 @@ export class ChatGPTApi implements LLMApi {
 
				     }
			
 
				   }
			
 
				 
			
 
				-  async transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    const formData = new FormData();
			
 
				-    formData.append("file", options.file, "audio.wav");
			
 
				-    formData.append("model", options.model ?? "whisper-1");
			
 
				-    if (options.language) formData.append("language", options.language);
			
 
				-    if (options.prompt) formData.append("prompt", options.prompt);
			
 
				-    if (options.response_format)
			
 
				-      formData.append("response_format", options.response_format);
			
 
				-    if (options.temperature)
			
 
				-      formData.append("temperature", options.temperature.toString());
			
 
				-
			
 
				-    console.log("[Request] openai audio transcriptions payload: ", options);
			
 
				-
			
 
				-    const controller = new AbortController();
			
 
				-    options.onController?.(controller);
			
 
				-
			
 
				-    try {
			
 
				-      const path = this.path(OpenaiPath.TranscriptionPath, options.model);
			
 
				-      const headers = getHeaders(true);
			
 
				-      const payload = {
			
 
				-        method: "POST",
			
 
				-        body: formData,
			
 
				-        signal: controller.signal,
			
 
				-        headers: headers,
			
 
				-      };
			
 
				-
			
 
				-      // make a fetch request
			
 
				-      const requestTimeoutId = setTimeout(
			
 
				-        () => controller.abort(),
			
 
				-        REQUEST_TIMEOUT_MS,
			
 
				-      );
			
 
				-      const res = await fetch(path, payload);
			
 
				-      clearTimeout(requestTimeoutId);
			
 
				-      const json = await res.json();
			
 
				-      return json.text;
			
 
				-    } catch (e) {
			
 
				-      console.log("[Request] failed to make a audio transcriptions request", e);
			
 
				-      throw e;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				   async chat(options: ChatOptions) {
			
 
				     const modelConfig = {
			
 
				       ...useAppConfig.getState().modelConfig,
			
--- a/app/client/platforms/tencent.ts
+++ b/app/client/platforms/tencent.ts
@@ -9,7 +9,6 @@ import {
 
				   LLMModel,
			
 
				   MultimodalContent,
			
 
				   SpeechOptions,
			
 
				-  TranscriptionOptions,
			
 
				 } from "../api";
			
 
				 import Locale from "../../locales";
			
 
				 import {
			
@@ -94,9 +93,6 @@ export class HunyuanApi implements LLMApi {
 
				   speech(options: SpeechOptions): Promise<ArrayBuffer> {
			
 
				     throw new Error("Method not implemented.");
			
 
				   }
			
 
				-  transcription(options: TranscriptionOptions): Promise<string> {
			
 
				-    throw new Error("Method not implemented.");
			
 
				-  }
			
 
				 
			
 
				   async chat(options: ChatOptions) {
			
 
				     const visionModel = isVisionModel(options.config.model);
			
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@@ -10,7 +10,6 @@ import React, {
 
				 } from "react";
			
 
				 
			
 
				 import SendWhiteIcon from "../icons/send-white.svg";
			
 
				-import VoiceWhiteIcon from "../icons/voice-white.svg";
			
 
				 import BrainIcon from "../icons/brain.svg";
			
 
				 import RenameIcon from "../icons/rename.svg";
			
 
				 import ExportIcon from "../icons/share.svg";
			
@@ -83,7 +82,7 @@ import dynamic from "next/dynamic";
 
				 import { ChatControllerPool } from "../client/controller";
			
 
				 import { DalleSize, DalleQuality, DalleStyle } from "../typing";
			
 
				 import { Prompt, usePromptStore } from "../store/prompt";
			
 
				-import Locale, { getLang, getSTTLang } from "../locales";
			
 
				+import Locale from "../locales";
			
 
				 
			
 
				 import { IconButton } from "./button";
			
 
				 import styles from "./chat.module.scss";
			
@@ -100,9 +99,7 @@ import {
 
				 import { useNavigate } from "react-router-dom";
			
 
				 import {
			
 
				   CHAT_PAGE_SIZE,
			
 
				-  DEFAULT_STT_ENGINE,
			
 
				   DEFAULT_TTS_ENGINE,
			
 
				-  FIREFOX_DEFAULT_STT_ENGINE,
			
 
				   ModelProvider,
			
 
				   LAST_INPUT_KEY,
			
 
				   Path,
			
@@ -123,11 +120,6 @@ import { MultimodalContent } from "../client/api";
 
				 const localStorage = safeLocalStorage();
			
 
				 import { ClientApi } from "../client/api";
			
 
				 import { createTTSPlayer } from "../utils/audio";
			
 
				-import {
			
 
				-  OpenAITranscriptionApi,
			
 
				-  SpeechApi,
			
 
				-  WebTranscriptionApi,
			
 
				-} from "../utils/speech";
			
 
				 import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
			
 
				 
			
 
				 const ttsPlayer = createTTSPlayer();
			
@@ -556,44 +548,6 @@ export function ChatActions(props: {
 
				     }
			
 
				   }, [chatStore, currentModel, models]);
			
 
				 
			
 
				-  const [isListening, setIsListening] = useState(false);
			
 
				-  const [isTranscription, setIsTranscription] = useState(false);
			
 
				-  const [speechApi, setSpeechApi] = useState<any>(null);
			
 
				-
			
 
				-  useEffect(() => {
			
 
				-    if (isFirefox()) config.sttConfig.engine = FIREFOX_DEFAULT_STT_ENGINE;
			
 
				-    setSpeechApi(
			
 
				-      config.sttConfig.engine === DEFAULT_STT_ENGINE
			
 
				-        ? new WebTranscriptionApi((transcription) =>
			
 
				-            onRecognitionEnd(transcription),
			
 
				-          )
			
 
				-        : new OpenAITranscriptionApi((transcription) =>
			
 
				-            onRecognitionEnd(transcription),
			
 
				-          ),
			
 
				-    );
			
 
				-  }, []);
			
 
				-
			
 
				-  const startListening = async () => {
			
 
				-    if (speechApi) {
			
 
				-      await speechApi.start();
			
 
				-      setIsListening(true);
			
 
				-    }
			
 
				-  };
			
 
				-  const stopListening = async () => {
			
 
				-    if (speechApi) {
			
 
				-      if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
			
 
				-        setIsTranscription(true);
			
 
				-      await speechApi.stop();
			
 
				-      setIsListening(false);
			
 
				-    }
			
 
				-  };
			
 
				-  const onRecognitionEnd = (finalTranscript: string) => {
			
 
				-    console.log(finalTranscript);
			
 
				-    if (finalTranscript) props.setUserInput(finalTranscript);
			
 
				-    if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
			
 
				-      setIsTranscription(false);
			
 
				-  };
			
 
				-
			
 
				   return (
			
 
				     <div className={styles["chat-input-actions"]}>
			
 
				       {couldStop && (
			
@@ -828,16 +782,6 @@ export function ChatActions(props: {
 
				           icon={<ShortcutkeyIcon />}
			
 
				         />
			
 
				       )}
			
 
				-
			
 
				-      {config.sttConfig.enable && (
			
 
				-        <ChatAction
			
 
				-          onClick={async () =>
			
 
				-            isListening ? await stopListening() : await startListening()
			
 
				-          }
			
 
				-          text={isListening ? Locale.Chat.StopSpeak : Locale.Chat.StartSpeak}
			
 
				-          icon={<VoiceWhiteIcon />}
			
 
				-        />
			
 
				-      )}
			
 
				     </div>
			
 
				   );
			
 
				 }
			
--- a/app/components/settings.tsx
+++ b/app/components/settings.tsx
@@ -81,7 +81,6 @@ import { nanoid } from "nanoid";
 
				 import { useMaskStore } from "../store/mask";
			
 
				 import { ProviderType } from "../utils/cloud";
			
 
				 import { TTSConfigList } from "./tts-config";
			
 
				-import { STTConfigList } from "./stt-config";
			
 
				 
			
 
				 function EditPromptModal(props: { id: string; onClose: () => void }) {
			
 
				   const promptStore = usePromptStore();
			
@@ -1659,17 +1658,6 @@ export function Settings() {
 
				           />
			
 
				         </List>
			
 
				 
			
 
				-        <List>
			
 
				-          <STTConfigList
			
 
				-            sttConfig={config.sttConfig}
			
 
				-            updateConfig={(updater) => {
			
 
				-              const sttConfig = { ...config.sttConfig };
			
 
				-              updater(sttConfig);
			
 
				-              config.update((config) => (config.sttConfig = sttConfig));
			
 
				-            }}
			
 
				-          />
			
 
				-        </List>
			
 
				-
			
 
				         <DangerItems />
			
 
				       </div>
			
 
				     </ErrorBoundary>
			
--- a/app/components/stt-config.tsx
+++ b/app/components/stt-config.tsx
@@ -1,51 +0,0 @@
 
				-import { STTConfig, STTConfigValidator } from "../store";
			
 
				-
			
 
				-import Locale from "../locales";
			
 
				-import { ListItem, Select } from "./ui-lib";
			
 
				-import { DEFAULT_STT_ENGINES } from "../constant";
			
 
				-import { isFirefox } from "../utils";
			
 
				-
			
 
				-export function STTConfigList(props: {
			
 
				-  sttConfig: STTConfig;
			
 
				-  updateConfig: (updater: (config: STTConfig) => void) => void;
			
 
				-}) {
			
 
				-  return (
			
 
				-    <>
			
 
				-      <ListItem
			
 
				-        title={Locale.Settings.STT.Enable.Title}
			
 
				-        subTitle={Locale.Settings.STT.Enable.SubTitle}
			
 
				-      >
			
 
				-        <input
			
 
				-          type="checkbox"
			
 
				-          checked={props.sttConfig.enable}
			
 
				-          onChange={(e) =>
			
 
				-            props.updateConfig(
			
 
				-              (config) => (config.enable = e.currentTarget.checked),
			
 
				-            )
			
 
				-          }
			
 
				-        ></input>
			
 
				-      </ListItem>
			
 
				-      {!isFirefox() && (
			
 
				-        <ListItem title={Locale.Settings.STT.Engine.Title}>
			
 
				-          <Select
			
 
				-            value={props.sttConfig.engine}
			
 
				-            onChange={(e) => {
			
 
				-              props.updateConfig(
			
 
				-                (config) =>
			
 
				-                  (config.engine = STTConfigValidator.engine(
			
 
				-                    e.currentTarget.value,
			
 
				-                  )),
			
 
				-              );
			
 
				-            }}
			
 
				-          >
			
 
				-            {DEFAULT_STT_ENGINES.map((v, i) => (
			
 
				-              <option value={v} key={i}>
			
 
				-                {v}
			
 
				-              </option>
			
 
				-            ))}
			
 
				-          </Select>
			
 
				-        </ListItem>
			
 
				-      )}
			
 
				-    </>
			
 
				-  );
			
 
				-}
			
--- a/app/components/stt.module.scss
+++ b/app/components/stt.module.scss
@@ -1,119 +0,0 @@
 
				-@import "../styles/animation.scss";
			
 
				-.plugin-page {
			
 
				-  height: 100%;
			
 
				-  display: flex;
			
 
				-  flex-direction: column;
			
 
				-
			
 
				-  .plugin-page-body {
			
 
				-    padding: 20px;
			
 
				-    overflow-y: auto;
			
 
				-
			
 
				-    .plugin-filter {
			
 
				-      width: 100%;
			
 
				-      max-width: 100%;
			
 
				-      margin-bottom: 20px;
			
 
				-      animation: slide-in ease 0.3s;
			
 
				-      height: 40px;
			
 
				-
			
 
				-      display: flex;
			
 
				-
			
 
				-      .search-bar {
			
 
				-        flex-grow: 1;
			
 
				-        max-width: 100%;
			
 
				-        min-width: 0;
			
 
				-        outline: none;
			
 
				-      }
			
 
				-
			
 
				-      .search-bar:focus {
			
 
				-        border: 1px solid var(--primary);
			
 
				-      }
			
 
				-
			
 
				-      .plugin-filter-lang {
			
 
				-        height: 100%;
			
 
				-        margin-left: 10px;
			
 
				-      }
			
 
				-
			
 
				-      .plugin-create {
			
 
				-        height: 100%;
			
 
				-        margin-left: 10px;
			
 
				-        box-sizing: border-box;
			
 
				-        min-width: 80px;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    .plugin-item {
			
 
				-      display: flex;
			
 
				-      justify-content: space-between;
			
 
				-      padding: 20px;
			
 
				-      border: var(--border-in-light);
			
 
				-      animation: slide-in ease 0.3s;
			
 
				-
			
 
				-      &:not(:last-child) {
			
 
				-        border-bottom: 0;
			
 
				-      }
			
 
				-
			
 
				-      &:first-child {
			
 
				-        border-top-left-radius: 10px;
			
 
				-        border-top-right-radius: 10px;
			
 
				-      }
			
 
				-
			
 
				-      &:last-child {
			
 
				-        border-bottom-left-radius: 10px;
			
 
				-        border-bottom-right-radius: 10px;
			
 
				-      }
			
 
				-
			
 
				-      .plugin-header {
			
 
				-        display: flex;
			
 
				-        align-items: center;
			
 
				-
			
 
				-        .plugin-icon {
			
 
				-          display: flex;
			
 
				-          align-items: center;
			
 
				-          justify-content: center;
			
 
				-          margin-right: 10px;
			
 
				-        }
			
 
				-
			
 
				-        .plugin-title {
			
 
				-          .plugin-name {
			
 
				-            font-size: 14px;
			
 
				-            font-weight: bold;
			
 
				-          }
			
 
				-          .plugin-info {
			
 
				-            font-size: 12px;
			
 
				-          }
			
 
				-          .plugin-runtime-warning {
			
 
				-            font-size: 12px;
			
 
				-            color: #f86c6c;
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      .plugin-actions {
			
 
				-        display: flex;
			
 
				-        flex-wrap: nowrap;
			
 
				-        transition: all ease 0.3s;
			
 
				-        justify-content: center;
			
 
				-        align-items: center;
			
 
				-      }
			
 
				-
			
 
				-      @media screen and (max-width: 600px) {
			
 
				-        display: flex;
			
 
				-        flex-direction: column;
			
 
				-        padding-bottom: 10px;
			
 
				-        border-radius: 10px;
			
 
				-        margin-bottom: 20px;
			
 
				-        box-shadow: var(--card-shadow);
			
 
				-
			
 
				-        &:not(:last-child) {
			
 
				-          border-bottom: var(--border-in-light);
			
 
				-        }
			
 
				-
			
 
				-        .plugin-actions {
			
 
				-          width: 100%;
			
 
				-          justify-content: space-between;
			
 
				-          padding-top: 10px;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -153,7 +153,6 @@ export const Anthropic = {
 
				 export const OpenaiPath = {
			
 
				   ChatPath: "v1/chat/completions",
			
 
				   SpeechPath: "v1/audio/speech",
			
 
				-  TranscriptionPath: "v1/audio/transcriptions",
			
 
				   ImagePath: "v1/images/generations",
			
 
				   UsagePath: "dashboard/billing/usage",
			
 
				   SubsPath: "dashboard/billing/subscription",
			
@@ -274,10 +273,6 @@ export const DEFAULT_TTS_VOICES = [
 
				   "shimmer",
			
 
				 ];
			
 
				 
			
 
				-export const DEFAULT_STT_ENGINE = "WebAPI";
			
 
				-export const DEFAULT_STT_ENGINES = ["WebAPI", "OpenAI Whisper"];
			
 
				-export const FIREFOX_DEFAULT_STT_ENGINE = "OpenAI Whisper";
			
 
				-
			
 
				 const openaiModels = [
			
 
				   "gpt-3.5-turbo",
			
 
				   "gpt-3.5-turbo-1106",
			
--- a/app/locales/cn.ts
+++ b/app/locales/cn.ts
@@ -520,16 +520,6 @@ const cn = {
 
				         SubTitle: "生成语音的速度",
			
 
				       },
			
 
				     },
			
 
				-    STT: {
			
 
				-      Enable: {
			
 
				-        Title: "启用语音转文本",
			
 
				-        SubTitle: "启用语音转文本",
			
 
				-      },
			
 
				-      Engine: {
			
 
				-        Title: "转换引擎",
			
 
				-        SubTitle: "音频转换引擎",
			
 
				-      },
			
 
				-    },
			
 
				   },
			
 
				   Store: {
			
 
				     DefaultTopic: "新的聊天",
			
--- a/app/locales/en.ts
+++ b/app/locales/en.ts
@@ -527,16 +527,6 @@ const en: LocaleType = {
 
				       },
			
 
				       Engine: "TTS Engine",
			
 
				     },
			
 
				-    STT: {
			
 
				-      Enable: {
			
 
				-        Title: "Enable STT",
			
 
				-        SubTitle: "Enable Speech-to-Text",
			
 
				-      },
			
 
				-      Engine: {
			
 
				-        Title: "STT Engine",
			
 
				-        SubTitle: "Text-to-Speech Engine",
			
 
				-      },
			
 
				-    },
			
 
				   },
			
 
				   Store: {
			
 
				     DefaultTopic: "New Conversation",
			
--- a/app/store/config.ts
+++ b/app/store/config.ts
@@ -5,8 +5,6 @@ import {
 
				   DEFAULT_INPUT_TEMPLATE,
			
 
				   DEFAULT_MODELS,
			
 
				   DEFAULT_SIDEBAR_WIDTH,
			
 
				-  DEFAULT_STT_ENGINE,
			
 
				-  DEFAULT_STT_ENGINES,
			
 
				   DEFAULT_TTS_ENGINE,
			
 
				   DEFAULT_TTS_ENGINES,
			
 
				   DEFAULT_TTS_MODEL,
			
@@ -23,8 +21,6 @@ export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
 
				 export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
			
 
				 export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number];
			
 
				 
			
 
				-export type STTEngineType = (typeof DEFAULT_STT_ENGINES)[number];
			
 
				-
			
 
				 export enum SubmitKey {
			
 
				   Enter = "Enter",
			
 
				   CtrlEnter = "Ctrl + Enter",
			
@@ -90,17 +86,12 @@ export const DEFAULT_CONFIG = {
 
				     voice: DEFAULT_TTS_VOICE,
			
 
				     speed: 1.0,
			
 
				   },
			
 
				-  sttConfig: {
			
 
				-    enable: false,
			
 
				-    engine: DEFAULT_STT_ENGINE,
			
 
				-  },
			
 
				 };
			
 
				 
			
 
				 export type ChatConfig = typeof DEFAULT_CONFIG;
			
 
				 
			
 
				 export type ModelConfig = ChatConfig["modelConfig"];
			
 
				 export type TTSConfig = ChatConfig["ttsConfig"];
			
 
				-export type STTConfig = ChatConfig["sttConfig"];
			
 
				 
			
 
				 export function limitNumber(
			
 
				   x: number,
			
@@ -130,12 +121,6 @@ export const TTSConfigValidator = {
 
				   },
			
 
				 };
			
 
				 
			
 
				-export const STTConfigValidator = {
			
 
				-  engine(x: string) {
			
 
				-    return x as STTEngineType;
			
 
				-  },
			
 
				-};
			
 
				-
			
 
				 export const ModalConfigValidator = {
			
 
				   model(x: string) {
			
 
				     return x as ModelType;
			
--- a/app/utils/speech.ts
+++ b/app/utils/speech.ts
@@ -1,126 +0,0 @@
 
				-import { ChatGPTApi } from "../client/platforms/openai";
			
 
				-import { getSTTLang } from "../locales";
			
 
				-import { isFirefox } from "../utils";
			
 
				-
			
 
				-export type TranscriptionCallback = (transcription: string) => void;
			
 
				-
			
 
				-export abstract class SpeechApi {
			
 
				-  protected onTranscription: TranscriptionCallback = () => {};
			
 
				-
			
 
				-  abstract isListening(): boolean;
			
 
				-  abstract start(): Promise<void>;
			
 
				-  abstract stop(): Promise<void>;
			
 
				-
			
 
				-  onTranscriptionReceived(callback: TranscriptionCallback) {
			
 
				-    this.onTranscription = callback;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-export class OpenAITranscriptionApi extends SpeechApi {
			
 
				-  private listeningStatus = false;
			
 
				-  private mediaRecorder: MediaRecorder | null = null;
			
 
				-  private stream: MediaStream | null = null;
			
 
				-  private audioChunks: Blob[] = [];
			
 
				-
			
 
				-  isListening = () => this.listeningStatus;
			
 
				-
			
 
				-  constructor(transcriptionCallback?: TranscriptionCallback) {
			
 
				-    super();
			
 
				-    if (transcriptionCallback) {
			
 
				-      this.onTranscriptionReceived(transcriptionCallback);
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  async start(): Promise<void> {
			
 
				-    // @ts-ignore
			
 
				-    navigator.getUserMedia =
			
 
				-      // @ts-ignore
			
 
				-      navigator.getUserMedia ||
			
 
				-      // @ts-ignore
			
 
				-      navigator.webkitGetUserMedia ||
			
 
				-      // @ts-ignore
			
 
				-      navigator.mozGetUserMedia ||
			
 
				-      // @ts-ignore
			
 
				-      navigator.msGetUserMedia;
			
 
				-    if (navigator.mediaDevices) {
			
 
				-      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
			
 
				-      this.mediaRecorder = new MediaRecorder(stream);
			
 
				-      this.mediaRecorder.ondataavailable = (e) => {
			
 
				-        if (e.data && e.data.size > 0) {
			
 
				-          this.audioChunks.push(e.data);
			
 
				-        }
			
 
				-      };
			
 
				-
			
 
				-      this.stream = stream;
			
 
				-    } else {
			
 
				-      console.warn("Media Decives will work only with SSL");
			
 
				-      return;
			
 
				-    }
			
 
				-
			
 
				-    this.audioChunks = [];
			
 
				-
			
 
				-    // this.recorder.addEventListener("dataavailable", (event) => {
			
 
				-    //     this.audioChunks.push(event.data);
			
 
				-    // });
			
 
				-
			
 
				-    this.mediaRecorder.start(1000);
			
 
				-    this.listeningStatus = true;
			
 
				-  }
			
 
				-
			
 
				-  async stop(): Promise<void> {
			
 
				-    if (!this.mediaRecorder || !this.listeningStatus) {
			
 
				-      return;
			
 
				-    }
			
 
				-
			
 
				-    return new Promise((resolve) => {
			
 
				-      this.mediaRecorder!.addEventListener("stop", async () => {
			
 
				-        const audioBlob = new Blob(this.audioChunks, { type: "audio/wav" });
			
 
				-        const llm = new ChatGPTApi();
			
 
				-        const transcription = await llm.transcription({ file: audioBlob });
			
 
				-        this.onTranscription(transcription);
			
 
				-        this.listeningStatus = false;
			
 
				-        resolve();
			
 
				-      });
			
 
				-
			
 
				-      this.mediaRecorder!.stop();
			
 
				-    });
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-export class WebTranscriptionApi extends SpeechApi {
			
 
				-  private listeningStatus = false;
			
 
				-  private recognitionInstance: any | null = null;
			
 
				-
			
 
				-  isListening = () => this.listeningStatus;
			
 
				-
			
 
				-  constructor(transcriptionCallback?: TranscriptionCallback) {
			
 
				-    super();
			
 
				-    if (isFirefox()) return;
			
 
				-    const SpeechRecognition =
			
 
				-      (window as any).SpeechRecognition ||
			
 
				-      (window as any).webkitSpeechRecognition;
			
 
				-    this.recognitionInstance = new SpeechRecognition();
			
 
				-    this.recognitionInstance.continuous = true;
			
 
				-    this.recognitionInstance.interimResults = true;
			
 
				-    this.recognitionInstance.lang = getSTTLang();
			
 
				-    if (transcriptionCallback) {
			
 
				-      this.onTranscriptionReceived(transcriptionCallback);
			
 
				-    }
			
 
				-    this.recognitionInstance.onresult = (event: any) => {
			
 
				-      const result = event.results[event.results.length - 1];
			
 
				-      if (result.isFinal) {
			
 
				-        this.onTranscription(result[0].transcript);
			
 
				-      }
			
 
				-    };
			
 
				-  }
			
 
				-
			
 
				-  async start(): Promise<void> {
			
 
				-    this.listeningStatus = true;
			
 
				-    await this.recognitionInstance.start();
			
 
				-  }
			
 
				-
			
 
				-  async stop(): Promise<void> {
			
 
				-    this.listeningStatus = false;
			
 
				-    await this.recognitionInstance.stop();
			
 
				-  }
			
 
				-}