Merge pull request #5919 from Yiming3/feature/flexible-visual-model

feat: runtime configuration of vision-capable models
This commit is contained in:
Dogtiti 2024-12-22 10:37:57 +08:00 committed by GitHub
commit 1d15666713
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 116 additions and 21 deletions

View File

@ -355,6 +355,13 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name
Change default model Change default model
### `VISION_MODELS` (optional)
> Default: Empty
> Example: `gpt-4-vision,claude-3-opus,my-custom-model` means add vision capabilities to these models in addition to the default pattern matches (which detect models containing keywords like "vision", "claude-3", "gemini-1.5", etc).
Add additional models to have vision capabilities, beyond the default pattern matching. Multiple models should be separated by commas.
### `WHITE_WEBDAV_ENDPOINTS` (optional) ### `WHITE_WEBDAV_ENDPOINTS` (optional)
You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format

View File

@ -235,6 +235,13 @@ ChatGLM Api Url.
更改默认模型 更改默认模型
### `VISION_MODELS` (可选)
> 默认值:空
> 示例:`gpt-4-vision,claude-3-opus,my-custom-model` 表示为这些模型添加视觉能力,作为对默认模式匹配的补充(默认会检测包含"vision"、"claude-3"、"gemini-1.5"等关键词的模型)。
在默认模式匹配之外,添加更多具有视觉能力的模型。多个模型用逗号分隔。
### `DEFAULT_INPUT_TEMPLATE` (可选) ### `DEFAULT_INPUT_TEMPLATE` (可选)
自定义默认的 template用于初始化『设置』中的『用户输入预处理』配置项 自定义默认的 template用于初始化『设置』中的『用户输入预处理』配置项

View File

@ -217,6 +217,13 @@ ByteDance モードでは、`modelName@bytedance=deploymentName` 形式でモデ
デフォルトのモデルを変更します。 デフォルトのモデルを変更します。
### `VISION_MODELS` (オプション)
> デフォルト:空
> 例:`gpt-4-vision,claude-3-opus,my-custom-model` は、これらのモデルにビジョン機能を追加します。これはデフォルトのパターンマッチング("vision"、"claude-3"、"gemini-1.5"などのキーワードを含むモデルを検出)に加えて適用されます。
デフォルトのパターンマッチングに加えて、追加のモデルにビジョン機能を付与します。複数のモデルはカンマで区切ります。
### `DEFAULT_INPUT_TEMPLATE` (オプション) ### `DEFAULT_INPUT_TEMPLATE` (オプション)
『設定』の『ユーザー入力前処理』の初期設定に使用するテンプレートをカスタマイズします。 『設定』の『ユーザー入力前処理』の初期設定に使用するテンプレートをカスタマイズします。

View File

@ -40,6 +40,7 @@ export const getBuildConfig = () => {
buildMode, buildMode,
isApp, isApp,
template: process.env.DEFAULT_INPUT_TEMPLATE ?? DEFAULT_INPUT_TEMPLATE, template: process.env.DEFAULT_INPUT_TEMPLATE ?? DEFAULT_INPUT_TEMPLATE,
visionModels: process.env.VISION_MODELS || "",
}; };
}; };

View File

@ -291,6 +291,22 @@ export const DEFAULT_TTS_VOICES = [
"shimmer", "shimmer",
]; ];
export const VISION_MODEL_REGEXES = [
/vision/,
/gpt-4o/,
/claude-3/,
/gemini-1\.5/,
/gemini-exp/,
/gemini-2\.0/,
/learnlm/,
/qwen-vl/,
/qwen2-vl/,
/gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview"
/^dall-e-3$/, // Matches exactly "dall-e-3"
];
export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];
const openaiModels = [ const openaiModels = [
"gpt-3.5-turbo", "gpt-3.5-turbo",
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-1106",

View File

@ -5,6 +5,8 @@ import { RequestMessage } from "./client/api";
import { ServiceProvider } from "./constant"; import { ServiceProvider } from "./constant";
// import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http"; // import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
import { fetch as tauriStreamFetch } from "./utils/stream"; import { fetch as tauriStreamFetch } from "./utils/stream";
import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant";
import { getClientConfig } from "./config/client";
export function trimTopic(topic: string) { export function trimTopic(topic: string) {
// Fix an issue where double quotes still show in the Indonesian language // Fix an issue where double quotes still show in the Indonesian language
@ -252,28 +254,16 @@ export function getMessageImages(message: RequestMessage): string[] {
} }
export function isVisionModel(model: string) { export function isVisionModel(model: string) {
// Note: This is a better way using the TypeScript feature instead of `&&` or `||` (ts v5.5.0-dev.20240314 I've been using) const clientConfig = getClientConfig();
const envVisionModels = clientConfig?.visionModels
const excludeKeywords = ["claude-3-5-haiku-20241022"]; ?.split(",")
const visionKeywords = [ .map((m) => m.trim());
"vision", if (envVisionModels?.includes(model)) {
"gpt-4o", return true;
"claude-3", }
"gemini-1.5",
"gemini-exp",
"gemini-2.0",
"learnlm",
"qwen-vl",
"qwen2-vl",
];
const isGpt4Turbo =
model.includes("gpt-4-turbo") && !model.includes("preview");
return ( return (
!excludeKeywords.some((keyword) => model.includes(keyword)) && !EXCLUDE_VISION_MODEL_REGEXES.some((regex) => regex.test(model)) &&
(visionKeywords.some((keyword) => model.includes(keyword)) || VISION_MODEL_REGEXES.some((regex) => regex.test(model))
isGpt4Turbo ||
isDalle3(model))
); );
} }

View File

@ -0,0 +1,67 @@
import { isVisionModel } from "../app/utils";
describe("isVisionModel", () => {
const originalEnv = process.env;
beforeEach(() => {
jest.resetModules();
process.env = { ...originalEnv };
});
afterEach(() => {
process.env = originalEnv;
});
test("should identify vision models using regex patterns", () => {
const visionModels = [
"gpt-4-vision",
"claude-3-opus",
"gemini-1.5-pro",
"gemini-2.0",
"gemini-exp-vision",
"learnlm-vision",
"qwen-vl-max",
"qwen2-vl-max",
"gpt-4-turbo",
"dall-e-3",
];
visionModels.forEach((model) => {
expect(isVisionModel(model)).toBe(true);
});
});
test("should exclude specific models", () => {
expect(isVisionModel("claude-3-5-haiku-20241022")).toBe(false);
});
test("should not identify non-vision models", () => {
const nonVisionModels = [
"gpt-3.5-turbo",
"gpt-4-turbo-preview",
"claude-2",
"regular-model",
];
nonVisionModels.forEach((model) => {
expect(isVisionModel(model)).toBe(false);
});
});
test("should identify models from VISION_MODELS env var", () => {
process.env.VISION_MODELS = "custom-vision-model,another-vision-model";
expect(isVisionModel("custom-vision-model")).toBe(true);
expect(isVisionModel("another-vision-model")).toBe(true);
expect(isVisionModel("unrelated-model")).toBe(false);
});
test("should handle empty or missing VISION_MODELS", () => {
process.env.VISION_MODELS = "";
expect(isVisionModel("unrelated-model")).toBe(false);
delete process.env.VISION_MODELS;
expect(isVisionModel("unrelated-model")).toBe(false);
expect(isVisionModel("gpt-4-vision")).toBe(true);
});
});