DDMeaqua 1 жил өмнө
parent
commit
3ae8ec1af6

+ 0 - 11
app/client/api.ts

@@ -64,16 +64,6 @@ export interface SpeechOptions {
   onController?: (controller: AbortController) => void;
 }
 
-export interface TranscriptionOptions {
-  model?: "whisper-1";
-  file: Blob;
-  language?: string;
-  prompt?: string;
-  response_format?: "json" | "text" | "srt" | "verbose_json" | "vtt";
-  temperature?: number;
-  onController?: (controller: AbortController) => void;
-}
-
 export interface ChatOptions {
   messages: RequestMessage[];
   config: LLMConfig;
@@ -109,7 +99,6 @@ export interface LLMModelProvider {
 export abstract class LLMApi {
   abstract chat(options: ChatOptions): Promise<void>;
   abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
-  abstract transcription(options: TranscriptionOptions): Promise<string>;
   abstract usage(): Promise<LLMUsage>;
   abstract models(): Promise<LLMModel[]>;
 }

+ 0 - 4
app/client/platforms/alibaba.ts

@@ -13,7 +13,6 @@ import {
   LLMApi,
   LLMModel,
   SpeechOptions,
-  TranscriptionOptions,
   MultimodalContent,
 } from "../api";
 import Locale from "../../locales";
@@ -88,9 +87,6 @@ export class QwenApi implements LLMApi {
   speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
 
   async chat(options: ChatOptions) {
     const messages = options.messages.map((v) => ({

+ 0 - 4
app/client/platforms/anthropic.ts

@@ -5,7 +5,6 @@ import {
   LLMApi,
   MultimodalContent,
   SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import {
   useAccessStore,
@@ -90,9 +89,6 @@ export class ClaudeApi implements LLMApi {
   speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
 
   extractMessage(res: any) {
     console.log("[Response] claude response: ", res);

+ 0 - 4
app/client/platforms/baidu.ts

@@ -15,7 +15,6 @@ import {
   LLMModel,
   MultimodalContent,
   SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -80,9 +79,6 @@ export class ErnieApi implements LLMApi {
   speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
 
   async chat(options: ChatOptions) {
     const messages = options.messages.map((v) => ({

+ 0 - 4
app/client/platforms/bytedance.ts

@@ -14,7 +14,6 @@ import {
   LLMModel,
   MultimodalContent,
   SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -82,9 +81,6 @@ export class DoubaoApi implements LLMApi {
   speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
 
   async chat(options: ChatOptions) {
     const messages = options.messages.map((v) => ({

+ 1 - 4
app/client/platforms/google.ts

@@ -6,7 +6,6 @@ import {
   LLMModel,
   LLMUsage,
   SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
 import { getClientConfig } from "@/app/config/client";
@@ -67,9 +66,7 @@ export class GeminiProApi implements LLMApi {
   speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
+
   async chat(options: ChatOptions): Promise<void> {
     const apiClient = this;
     let multimodal = false;

+ 0 - 4
app/client/platforms/iflytek.ts

@@ -13,7 +13,6 @@ import {
   LLMApi,
   LLMModel,
   SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -63,9 +62,6 @@ export class SparkApi implements LLMApi {
   speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
 
   async chat(options: ChatOptions) {
     const messages: ChatOptions["messages"] = [];

+ 0 - 4
app/client/platforms/moonshot.ts

@@ -27,7 +27,6 @@ import {
   LLMUsage,
   MultimodalContent,
   SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -77,9 +76,6 @@ export class MoonshotApi implements LLMApi {
   speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
 
   async chat(options: ChatOptions) {
     const messages: ChatOptions["messages"] = [];

+ 0 - 42
app/client/platforms/openai.ts

@@ -34,7 +34,6 @@ import {
   LLMUsage,
   MultimodalContent,
   SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -187,47 +186,6 @@ export class ChatGPTApi implements LLMApi {
     }
   }
 
-  async transcription(options: TranscriptionOptions): Promise<string> {
-    const formData = new FormData();
-    formData.append("file", options.file, "audio.wav");
-    formData.append("model", options.model ?? "whisper-1");
-    if (options.language) formData.append("language", options.language);
-    if (options.prompt) formData.append("prompt", options.prompt);
-    if (options.response_format)
-      formData.append("response_format", options.response_format);
-    if (options.temperature)
-      formData.append("temperature", options.temperature.toString());
-
-    console.log("[Request] openai audio transcriptions payload: ", options);
-
-    const controller = new AbortController();
-    options.onController?.(controller);
-
-    try {
-      const path = this.path(OpenaiPath.TranscriptionPath, options.model);
-      const headers = getHeaders(true);
-      const payload = {
-        method: "POST",
-        body: formData,
-        signal: controller.signal,
-        headers: headers,
-      };
-
-      // make a fetch request
-      const requestTimeoutId = setTimeout(
-        () => controller.abort(),
-        REQUEST_TIMEOUT_MS,
-      );
-      const res = await fetch(path, payload);
-      clearTimeout(requestTimeoutId);
-      const json = await res.json();
-      return json.text;
-    } catch (e) {
-      console.log("[Request] failed to make a audio transcriptions request", e);
-      throw e;
-    }
-  }
-
   async chat(options: ChatOptions) {
     const modelConfig = {
       ...useAppConfig.getState().modelConfig,

+ 0 - 4
app/client/platforms/tencent.ts

@@ -9,7 +9,6 @@ import {
   LLMModel,
   MultimodalContent,
   SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -94,9 +93,6 @@ export class HunyuanApi implements LLMApi {
   speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
 
   async chat(options: ChatOptions) {
     const visionModel = isVisionModel(options.config.model);

+ 1 - 57
app/components/chat.tsx

@@ -10,7 +10,6 @@ import React, {
 } from "react";
 
 import SendWhiteIcon from "../icons/send-white.svg";
-import VoiceWhiteIcon from "../icons/voice-white.svg";
 import BrainIcon from "../icons/brain.svg";
 import RenameIcon from "../icons/rename.svg";
 import ExportIcon from "../icons/share.svg";
@@ -83,7 +82,7 @@ import dynamic from "next/dynamic";
 import { ChatControllerPool } from "../client/controller";
 import { DalleSize, DalleQuality, DalleStyle } from "../typing";
 import { Prompt, usePromptStore } from "../store/prompt";
-import Locale, { getLang, getSTTLang } from "../locales";
+import Locale from "../locales";
 
 import { IconButton } from "./button";
 import styles from "./chat.module.scss";
@@ -100,9 +99,7 @@ import {
 import { useNavigate } from "react-router-dom";
 import {
   CHAT_PAGE_SIZE,
-  DEFAULT_STT_ENGINE,
   DEFAULT_TTS_ENGINE,
-  FIREFOX_DEFAULT_STT_ENGINE,
   ModelProvider,
   LAST_INPUT_KEY,
   Path,
@@ -123,11 +120,6 @@ import { MultimodalContent } from "../client/api";
 const localStorage = safeLocalStorage();
 import { ClientApi } from "../client/api";
 import { createTTSPlayer } from "../utils/audio";
-import {
-  OpenAITranscriptionApi,
-  SpeechApi,
-  WebTranscriptionApi,
-} from "../utils/speech";
 import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
 
 const ttsPlayer = createTTSPlayer();
@@ -556,44 +548,6 @@ export function ChatActions(props: {
     }
   }, [chatStore, currentModel, models]);
 
-  const [isListening, setIsListening] = useState(false);
-  const [isTranscription, setIsTranscription] = useState(false);
-  const [speechApi, setSpeechApi] = useState<any>(null);
-
-  useEffect(() => {
-    if (isFirefox()) config.sttConfig.engine = FIREFOX_DEFAULT_STT_ENGINE;
-    setSpeechApi(
-      config.sttConfig.engine === DEFAULT_STT_ENGINE
-        ? new WebTranscriptionApi((transcription) =>
-            onRecognitionEnd(transcription),
-          )
-        : new OpenAITranscriptionApi((transcription) =>
-            onRecognitionEnd(transcription),
-          ),
-    );
-  }, []);
-
-  const startListening = async () => {
-    if (speechApi) {
-      await speechApi.start();
-      setIsListening(true);
-    }
-  };
-  const stopListening = async () => {
-    if (speechApi) {
-      if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
-        setIsTranscription(true);
-      await speechApi.stop();
-      setIsListening(false);
-    }
-  };
-  const onRecognitionEnd = (finalTranscript: string) => {
-    console.log(finalTranscript);
-    if (finalTranscript) props.setUserInput(finalTranscript);
-    if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
-      setIsTranscription(false);
-  };
-
   return (
     <div className={styles["chat-input-actions"]}>
       {couldStop && (
@@ -828,16 +782,6 @@ export function ChatActions(props: {
           icon={<ShortcutkeyIcon />}
         />
       )}
-
-      {config.sttConfig.enable && (
-        <ChatAction
-          onClick={async () =>
-            isListening ? await stopListening() : await startListening()
-          }
-          text={isListening ? Locale.Chat.StopSpeak : Locale.Chat.StartSpeak}
-          icon={<VoiceWhiteIcon />}
-        />
-      )}
     </div>
   );
 }

+ 0 - 12
app/components/settings.tsx

@@ -81,7 +81,6 @@ import { nanoid } from "nanoid";
 import { useMaskStore } from "../store/mask";
 import { ProviderType } from "../utils/cloud";
 import { TTSConfigList } from "./tts-config";
-import { STTConfigList } from "./stt-config";
 
 function EditPromptModal(props: { id: string; onClose: () => void }) {
   const promptStore = usePromptStore();
@@ -1659,17 +1658,6 @@ export function Settings() {
           />
         </List>
 
-        <List>
-          <STTConfigList
-            sttConfig={config.sttConfig}
-            updateConfig={(updater) => {
-              const sttConfig = { ...config.sttConfig };
-              updater(sttConfig);
-              config.update((config) => (config.sttConfig = sttConfig));
-            }}
-          />
-        </List>
-
         <DangerItems />
       </div>
     </ErrorBoundary>

+ 0 - 51
app/components/stt-config.tsx

@@ -1,51 +0,0 @@
-import { STTConfig, STTConfigValidator } from "../store";
-
-import Locale from "../locales";
-import { ListItem, Select } from "./ui-lib";
-import { DEFAULT_STT_ENGINES } from "../constant";
-import { isFirefox } from "../utils";
-
-export function STTConfigList(props: {
-  sttConfig: STTConfig;
-  updateConfig: (updater: (config: STTConfig) => void) => void;
-}) {
-  return (
-    <>
-      <ListItem
-        title={Locale.Settings.STT.Enable.Title}
-        subTitle={Locale.Settings.STT.Enable.SubTitle}
-      >
-        <input
-          type="checkbox"
-          checked={props.sttConfig.enable}
-          onChange={(e) =>
-            props.updateConfig(
-              (config) => (config.enable = e.currentTarget.checked),
-            )
-          }
-        ></input>
-      </ListItem>
-      {!isFirefox() && (
-        <ListItem title={Locale.Settings.STT.Engine.Title}>
-          <Select
-            value={props.sttConfig.engine}
-            onChange={(e) => {
-              props.updateConfig(
-                (config) =>
-                  (config.engine = STTConfigValidator.engine(
-                    e.currentTarget.value,
-                  )),
-              );
-            }}
-          >
-            {DEFAULT_STT_ENGINES.map((v, i) => (
-              <option value={v} key={i}>
-                {v}
-              </option>
-            ))}
-          </Select>
-        </ListItem>
-      )}
-    </>
-  );
-}

+ 0 - 119
app/components/stt.module.scss

@@ -1,119 +0,0 @@
-@import "../styles/animation.scss";
-.plugin-page {
-  height: 100%;
-  display: flex;
-  flex-direction: column;
-
-  .plugin-page-body {
-    padding: 20px;
-    overflow-y: auto;
-
-    .plugin-filter {
-      width: 100%;
-      max-width: 100%;
-      margin-bottom: 20px;
-      animation: slide-in ease 0.3s;
-      height: 40px;
-
-      display: flex;
-
-      .search-bar {
-        flex-grow: 1;
-        max-width: 100%;
-        min-width: 0;
-        outline: none;
-      }
-
-      .search-bar:focus {
-        border: 1px solid var(--primary);
-      }
-
-      .plugin-filter-lang {
-        height: 100%;
-        margin-left: 10px;
-      }
-
-      .plugin-create {
-        height: 100%;
-        margin-left: 10px;
-        box-sizing: border-box;
-        min-width: 80px;
-      }
-    }
-
-    .plugin-item {
-      display: flex;
-      justify-content: space-between;
-      padding: 20px;
-      border: var(--border-in-light);
-      animation: slide-in ease 0.3s;
-
-      &:not(:last-child) {
-        border-bottom: 0;
-      }
-
-      &:first-child {
-        border-top-left-radius: 10px;
-        border-top-right-radius: 10px;
-      }
-
-      &:last-child {
-        border-bottom-left-radius: 10px;
-        border-bottom-right-radius: 10px;
-      }
-
-      .plugin-header {
-        display: flex;
-        align-items: center;
-
-        .plugin-icon {
-          display: flex;
-          align-items: center;
-          justify-content: center;
-          margin-right: 10px;
-        }
-
-        .plugin-title {
-          .plugin-name {
-            font-size: 14px;
-            font-weight: bold;
-          }
-          .plugin-info {
-            font-size: 12px;
-          }
-          .plugin-runtime-warning {
-            font-size: 12px;
-            color: #f86c6c;
-          }
-        }
-      }
-
-      .plugin-actions {
-        display: flex;
-        flex-wrap: nowrap;
-        transition: all ease 0.3s;
-        justify-content: center;
-        align-items: center;
-      }
-
-      @media screen and (max-width: 600px) {
-        display: flex;
-        flex-direction: column;
-        padding-bottom: 10px;
-        border-radius: 10px;
-        margin-bottom: 20px;
-        box-shadow: var(--card-shadow);
-
-        &:not(:last-child) {
-          border-bottom: var(--border-in-light);
-        }
-
-        .plugin-actions {
-          width: 100%;
-          justify-content: space-between;
-          padding-top: 10px;
-        }
-      }
-    }
-  }
-}

+ 0 - 5
app/constant.ts

@@ -153,7 +153,6 @@ export const Anthropic = {
 export const OpenaiPath = {
   ChatPath: "v1/chat/completions",
   SpeechPath: "v1/audio/speech",
-  TranscriptionPath: "v1/audio/transcriptions",
   ImagePath: "v1/images/generations",
   UsagePath: "dashboard/billing/usage",
   SubsPath: "dashboard/billing/subscription",
@@ -274,10 +273,6 @@ export const DEFAULT_TTS_VOICES = [
   "shimmer",
 ];
 
-export const DEFAULT_STT_ENGINE = "WebAPI";
-export const DEFAULT_STT_ENGINES = ["WebAPI", "OpenAI Whisper"];
-export const FIREFOX_DEFAULT_STT_ENGINE = "OpenAI Whisper";
-
 const openaiModels = [
   "gpt-3.5-turbo",
   "gpt-3.5-turbo-1106",

+ 0 - 10
app/locales/cn.ts

@@ -520,16 +520,6 @@ const cn = {
         SubTitle: "生成语音的速度",
       },
     },
-    STT: {
-      Enable: {
-        Title: "启用语音转文本",
-        SubTitle: "启用语音转文本",
-      },
-      Engine: {
-        Title: "转换引擎",
-        SubTitle: "音频转换引擎",
-      },
-    },
   },
   Store: {
     DefaultTopic: "新的聊天",

+ 0 - 10
app/locales/en.ts

@@ -527,16 +527,6 @@ const en: LocaleType = {
       },
       Engine: "TTS Engine",
     },
-    STT: {
-      Enable: {
-        Title: "Enable STT",
-        SubTitle: "Enable Speech-to-Text",
-      },
-      Engine: {
-        Title: "STT Engine",
-        SubTitle: "Text-to-Speech Engine",
-      },
-    },
   },
   Store: {
     DefaultTopic: "New Conversation",

+ 0 - 15
app/store/config.ts

@@ -5,8 +5,6 @@ import {
   DEFAULT_INPUT_TEMPLATE,
   DEFAULT_MODELS,
   DEFAULT_SIDEBAR_WIDTH,
-  DEFAULT_STT_ENGINE,
-  DEFAULT_STT_ENGINES,
   DEFAULT_TTS_ENGINE,
   DEFAULT_TTS_ENGINES,
   DEFAULT_TTS_MODEL,
@@ -23,8 +21,6 @@ export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
 export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
 export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number];
 
-export type STTEngineType = (typeof DEFAULT_STT_ENGINES)[number];
-
 export enum SubmitKey {
   Enter = "Enter",
   CtrlEnter = "Ctrl + Enter",
@@ -90,17 +86,12 @@ export const DEFAULT_CONFIG = {
     voice: DEFAULT_TTS_VOICE,
     speed: 1.0,
   },
-  sttConfig: {
-    enable: false,
-    engine: DEFAULT_STT_ENGINE,
-  },
 };
 
 export type ChatConfig = typeof DEFAULT_CONFIG;
 
 export type ModelConfig = ChatConfig["modelConfig"];
 export type TTSConfig = ChatConfig["ttsConfig"];
-export type STTConfig = ChatConfig["sttConfig"];
 
 export function limitNumber(
   x: number,
@@ -130,12 +121,6 @@ export const TTSConfigValidator = {
   },
 };
 
-export const STTConfigValidator = {
-  engine(x: string) {
-    return x as STTEngineType;
-  },
-};
-
 export const ModalConfigValidator = {
   model(x: string) {
     return x as ModelType;

+ 0 - 126
app/utils/speech.ts

@@ -1,126 +0,0 @@
-import { ChatGPTApi } from "../client/platforms/openai";
-import { getSTTLang } from "../locales";
-import { isFirefox } from "../utils";
-
-export type TranscriptionCallback = (transcription: string) => void;
-
-export abstract class SpeechApi {
-  protected onTranscription: TranscriptionCallback = () => {};
-
-  abstract isListening(): boolean;
-  abstract start(): Promise<void>;
-  abstract stop(): Promise<void>;
-
-  onTranscriptionReceived(callback: TranscriptionCallback) {
-    this.onTranscription = callback;
-  }
-}
-
-export class OpenAITranscriptionApi extends SpeechApi {
-  private listeningStatus = false;
-  private mediaRecorder: MediaRecorder | null = null;
-  private stream: MediaStream | null = null;
-  private audioChunks: Blob[] = [];
-
-  isListening = () => this.listeningStatus;
-
-  constructor(transcriptionCallback?: TranscriptionCallback) {
-    super();
-    if (transcriptionCallback) {
-      this.onTranscriptionReceived(transcriptionCallback);
-    }
-  }
-
-  async start(): Promise<void> {
-    // @ts-ignore
-    navigator.getUserMedia =
-      // @ts-ignore
-      navigator.getUserMedia ||
-      // @ts-ignore
-      navigator.webkitGetUserMedia ||
-      // @ts-ignore
-      navigator.mozGetUserMedia ||
-      // @ts-ignore
-      navigator.msGetUserMedia;
-    if (navigator.mediaDevices) {
-      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-      this.mediaRecorder = new MediaRecorder(stream);
-      this.mediaRecorder.ondataavailable = (e) => {
-        if (e.data && e.data.size > 0) {
-          this.audioChunks.push(e.data);
-        }
-      };
-
-      this.stream = stream;
-    } else {
-      console.warn("Media Decives will work only with SSL");
-      return;
-    }
-
-    this.audioChunks = [];
-
-    // this.recorder.addEventListener("dataavailable", (event) => {
-    //     this.audioChunks.push(event.data);
-    // });
-
-    this.mediaRecorder.start(1000);
-    this.listeningStatus = true;
-  }
-
-  async stop(): Promise<void> {
-    if (!this.mediaRecorder || !this.listeningStatus) {
-      return;
-    }
-
-    return new Promise((resolve) => {
-      this.mediaRecorder!.addEventListener("stop", async () => {
-        const audioBlob = new Blob(this.audioChunks, { type: "audio/wav" });
-        const llm = new ChatGPTApi();
-        const transcription = await llm.transcription({ file: audioBlob });
-        this.onTranscription(transcription);
-        this.listeningStatus = false;
-        resolve();
-      });
-
-      this.mediaRecorder!.stop();
-    });
-  }
-}
-
-export class WebTranscriptionApi extends SpeechApi {
-  private listeningStatus = false;
-  private recognitionInstance: any | null = null;
-
-  isListening = () => this.listeningStatus;
-
-  constructor(transcriptionCallback?: TranscriptionCallback) {
-    super();
-    if (isFirefox()) return;
-    const SpeechRecognition =
-      (window as any).SpeechRecognition ||
-      (window as any).webkitSpeechRecognition;
-    this.recognitionInstance = new SpeechRecognition();
-    this.recognitionInstance.continuous = true;
-    this.recognitionInstance.interimResults = true;
-    this.recognitionInstance.lang = getSTTLang();
-    if (transcriptionCallback) {
-      this.onTranscriptionReceived(transcriptionCallback);
-    }
-    this.recognitionInstance.onresult = (event: any) => {
-      const result = event.results[event.results.length - 1];
-      if (result.isFinal) {
-        this.onTranscription(result[0].transcript);
-      }
-    };
-  }
-
-  async start(): Promise<void> {
-    this.listeningStatus = true;
-    await this.recognitionInstance.start();
-  }
-
-  async stop(): Promise<void> {
-    this.listeningStatus = false;
-    await this.recognitionInstance.stop();
-  }
-}