1 éve · 1d15666713
--- a/README.md
+++ b/README.md
@@ -355,6 +355,13 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name
 
				 
			
 
				 Change default model
			
 
				 
			
 
				+### `VISION_MODELS` (optional)
			
 
				+
			
 
				+> Default: Empty
			
 
				+> Example: `gpt-4-vision,claude-3-opus,my-custom-model` means add vision capabilities to these models in addition to the default pattern matches (which detect models containing keywords like "vision", "claude-3", "gemini-1.5", etc).
			
 
				+
			
 
				+Add additional models to have vision capabilities, beyond the default pattern matching. Multiple models should be separated by commas.
			
 
				+
			
 
				 ### `WHITE_WEBDAV_ENDPOINTS` (optional)
			
 
				 
			
 
				 You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format：
			
--- a/README_CN.md
+++ b/README_CN.md
@@ -235,6 +235,13 @@ ChatGLM Api Url.
 
				 
			
 
				 更改默认模型
			
 
				 
			
 
				+### `VISION_MODELS` (可选)
			
 
				+
			
 
				+> 默认值：空
			
 
				+> 示例：`gpt-4-vision,claude-3-opus,my-custom-model` 表示为这些模型添加视觉能力，作为对默认模式匹配的补充（默认会检测包含"vision"、"claude-3"、"gemini-1.5"等关键词的模型）。
			
 
				+
			
 
				+在默认模式匹配之外，添加更多具有视觉能力的模型。多个模型用逗号分隔。
			
 
				+
			
 
				 ### `DEFAULT_INPUT_TEMPLATE` （可选）
			
 
				 
			
 
				 自定义默认的 template，用于初始化『设置』中的『用户输入预处理』配置项
			
--- a/README_JA.md
+++ b/README_JA.md
@@ -217,6 +217,13 @@ ByteDance モードでは、`modelName@bytedance=deploymentName` 形式でモデ
 
				 
			
 
				 デフォルトのモデルを変更します。
			
 
				 
			
 
				+### `VISION_MODELS` (オプション)
			
 
				+
			
 
				+> デフォルト：空
			
 
				+> 例：`gpt-4-vision,claude-3-opus,my-custom-model` は、これらのモデルにビジョン機能を追加します。これはデフォルトのパターンマッチング（"vision"、"claude-3"、"gemini-1.5"などのキーワードを含むモデルを検出）に加えて適用されます。
			
 
				+
			
 
				+デフォルトのパターンマッチングに加えて、追加のモデルにビジョン機能を付与します。複数のモデルはカンマで区切ります。
			
 
				+
			
 
				 ### `DEFAULT_INPUT_TEMPLATE` （オプション）
			
 
				 
			
 
				 『設定』の『ユーザー入力前処理』の初期設定に使用するテンプレートをカスタマイズします。
			
--- a/app/config/build.ts
+++ b/app/config/build.ts
@@ -40,6 +40,7 @@ export const getBuildConfig = () => {
 
				     buildMode,
			
 
				     isApp,
			
 
				     template: process.env.DEFAULT_INPUT_TEMPLATE ?? DEFAULT_INPUT_TEMPLATE,
			
 
				+    visionModels: process.env.VISION_MODELS || "",
			
 
				   };
			
 
				 };
			
 
				 
			
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -291,6 +291,22 @@ export const DEFAULT_TTS_VOICES = [
 
				   "shimmer",
			
 
				 ];
			
 
				 
			
 
				+export const VISION_MODEL_REGEXES = [
			
 
				+  /vision/,
			
 
				+  /gpt-4o/,
			
 
				+  /claude-3/,
			
 
				+  /gemini-1\.5/,
			
 
				+  /gemini-exp/,
			
 
				+  /gemini-2\.0/,
			
 
				+  /learnlm/,
			
 
				+  /qwen-vl/,
			
 
				+  /qwen2-vl/,
			
 
				+  /gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview"
			
 
				+  /^dall-e-3$/, // Matches exactly "dall-e-3"
			
 
				+];
			
 
				+
			
 
				+export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];
			
 
				+
			
 
				 const openaiModels = [
			
 
				   "gpt-3.5-turbo",
			
 
				   "gpt-3.5-turbo-1106",
			
--- a/app/utils.ts
+++ b/app/utils.ts
@@ -5,6 +5,8 @@ import { RequestMessage } from "./client/api";
 
				 import { ServiceProvider } from "./constant";
			
 
				 // import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
			
 
				 import { fetch as tauriStreamFetch } from "./utils/stream";
			
 
				+import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant";
			
 
				+import { getClientConfig } from "./config/client";
			
 
				 
			
 
				 export function trimTopic(topic: string) {
			
 
				   // Fix an issue where double quotes still show in the Indonesian language
			
@@ -252,28 +254,16 @@ export function getMessageImages(message: RequestMessage): string[] {
 
				 }
			
 
				 
			
 
				 export function isVisionModel(model: string) {
			
 
				-  // Note: This is a better way using the TypeScript feature instead of `&&` or `||` (ts v5.5.0-dev.20240314 I've been using)
			
 
				-
			
 
				-  const excludeKeywords = ["claude-3-5-haiku-20241022"];
			
 
				-  const visionKeywords = [
			
 
				-    "vision",
			
 
				-    "gpt-4o",
			
 
				-    "claude-3",
			
 
				-    "gemini-1.5",
			
 
				-    "gemini-exp",
			
 
				-    "gemini-2.0",
			
 
				-    "learnlm",
			
 
				-    "qwen-vl",
			
 
				-    "qwen2-vl",
			
 
				-  ];
			
 
				-  const isGpt4Turbo =
			
 
				-    model.includes("gpt-4-turbo") && !model.includes("preview");
			
 
				-
			
 
				+  const clientConfig = getClientConfig();
			
 
				+  const envVisionModels = clientConfig?.visionModels
			
 
				+    ?.split(",")
			
 
				+    .map((m) => m.trim());
			
 
				+  if (envVisionModels?.includes(model)) {
			
 
				+    return true;
			
 
				+  }
			
 
				   return (
			
 
				-    !excludeKeywords.some((keyword) => model.includes(keyword)) &&
			
 
				-    (visionKeywords.some((keyword) => model.includes(keyword)) ||
			
 
				-      isGpt4Turbo ||
			
 
				-      isDalle3(model))
			
 
				+    !EXCLUDE_VISION_MODEL_REGEXES.some((regex) => regex.test(model)) &&
			
 
				+    VISION_MODEL_REGEXES.some((regex) => regex.test(model))
			
 
				   );
			
 
				 }
			
 
				 
			
--- a/test/vision-model-checker.test.ts
+++ b/test/vision-model-checker.test.ts
@@ -0,0 +1,67 @@
 
				+import { isVisionModel } from "../app/utils";
			
 
				+
			
 
				+describe("isVisionModel", () => {
			
 
				+  const originalEnv = process.env;
			
 
				+
			
 
				+  beforeEach(() => {
			
 
				+    jest.resetModules();
			
 
				+    process.env = { ...originalEnv };
			
 
				+  });
			
 
				+
			
 
				+  afterEach(() => {
			
 
				+    process.env = originalEnv;
			
 
				+  });
			
 
				+
			
 
				+  test("should identify vision models using regex patterns", () => {
			
 
				+    const visionModels = [
			
 
				+      "gpt-4-vision",
			
 
				+      "claude-3-opus",
			
 
				+      "gemini-1.5-pro",
			
 
				+      "gemini-2.0",
			
 
				+      "gemini-exp-vision",
			
 
				+      "learnlm-vision",
			
 
				+      "qwen-vl-max",
			
 
				+      "qwen2-vl-max",
			
 
				+      "gpt-4-turbo",
			
 
				+      "dall-e-3",
			
 
				+    ];
			
 
				+
			
 
				+    visionModels.forEach((model) => {
			
 
				+      expect(isVisionModel(model)).toBe(true);
			
 
				+    });
			
 
				+  });
			
 
				+
			
 
				+  test("should exclude specific models", () => {
			
 
				+    expect(isVisionModel("claude-3-5-haiku-20241022")).toBe(false);
			
 
				+  });
			
 
				+
			
 
				+  test("should not identify non-vision models", () => {
			
 
				+    const nonVisionModels = [
			
 
				+      "gpt-3.5-turbo",
			
 
				+      "gpt-4-turbo-preview",
			
 
				+      "claude-2",
			
 
				+      "regular-model",
			
 
				+    ];
			
 
				+
			
 
				+    nonVisionModels.forEach((model) => {
			
 
				+      expect(isVisionModel(model)).toBe(false);
			
 
				+    });
			
 
				+  });
			
 
				+
			
 
				+  test("should identify models from VISION_MODELS env var", () => {
			
 
				+    process.env.VISION_MODELS = "custom-vision-model,another-vision-model";
			
 
				+    
			
 
				+    expect(isVisionModel("custom-vision-model")).toBe(true);
			
 
				+    expect(isVisionModel("another-vision-model")).toBe(true);
			
 
				+    expect(isVisionModel("unrelated-model")).toBe(false);
			
 
				+  });
			
 
				+
			
 
				+  test("should handle empty or missing VISION_MODELS", () => {
			
 
				+    process.env.VISION_MODELS = "";
			
 
				+    expect(isVisionModel("unrelated-model")).toBe(false);
			
 
				+
			
 
				+    delete process.env.VISION_MODELS;
			
 
				+    expect(isVisionModel("unrelated-model")).toBe(false);
			
 
				+    expect(isVisionModel("gpt-4-vision")).toBe(true);
			
 
				+  });
			
 
				+});