From 5d4e3933024534b36925d1422e84552a7f871d9f Mon Sep 17 00:00:00 2001 From: Davidlasky Date: Thu, 17 Apr 2025 20:03:31 -0500 Subject: [PATCH] add o4-mini-high and remove all unused models --- app/client/platforms/openai.ts | 25 ++--- app/constant.ts | 147 ++++++------------------------ app/utils.ts | 1 + test/vision-model-checker.test.ts | 12 +-- 4 files changed, 43 insertions(+), 142 deletions(-) diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index 72e882467..8d025faba 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -200,7 +200,8 @@ export class ChatGPTApi implements LLMApi { const isDalle3 = _isDalle3(options.config.model); const isO1 = options.config.model.startsWith("o1"); const isO3 = options.config.model.startsWith("o3"); - const isO1OrO3 = isO1 || isO3; + const isO4 = options.config.model.startsWith("o4"); + const isO1OrO3orO4 = isO1 || isO3 || isO4; if (isDalle3) { const prompt = getMessageTextContent( options.messages.slice(-1)?.pop() as any, @@ -222,7 +223,7 @@ export class ChatGPTApi implements LLMApi { const content = visionModel ? await preProcessImageContent(v.content) : getMessageTextContent(v); - if (!(isO1OrO3 && v.role === "system")) + if (!(isO1OrO3orO4 && v.role === "system")) messages.push({ role: v.role, content }); } @@ -231,28 +232,28 @@ export class ChatGPTApi implements LLMApi { messages, stream: options.config.stream, model: modelConfig.model, - temperature: !isO1OrO3 ? modelConfig.temperature : 1, - presence_penalty: !isO1OrO3 ? modelConfig.presence_penalty : 0, - frequency_penalty: !isO1OrO3 ? modelConfig.frequency_penalty : 0, - top_p: !isO1OrO3 ? modelConfig.top_p : 1, + temperature: !isO1OrO3orO4 ? modelConfig.temperature : 1, + presence_penalty: !isO1OrO3orO4 ? modelConfig.presence_penalty : 0, + frequency_penalty: !isO1OrO3orO4 ? modelConfig.frequency_penalty : 0, + top_p: !isO1OrO3orO4 ? modelConfig.top_p : 1, // max_tokens: Math.max(modelConfig.max_tokens, 1024), // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore. }; // O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs) - if (isO1OrO3) { - requestPayload["max_completion_tokens"] = modelConfig.max_tokens; + if (isO1OrO3orO4) { + requestPayload["max_completion_tokens"] = 20000; } - if (isO3) { + if (isO4) { requestPayload["reasoning_effort"] = "high"; - // make o3-mini defaults to high reasoning effort + // make o4-mini defaults to high reasoning effort } // add max_tokens to vision model if (visionModel) { - if (isO1) { - requestPayload["max_completion_tokens"] = modelConfig.max_tokens; + if (isO1OrO3orO4) { + requestPayload["max_completion_tokens"] = 20000; } else { requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000); } diff --git a/app/constant.ts b/app/constant.ts index ca482504a..f9153e159 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -422,6 +422,7 @@ export const KnowledgeCutOffDate: Record = { "claude-3-5-haiku-latest": "2024-10", "gpt-4.1": "2024-06", "gpt-4.1-mini": "2024-06", + "o4-mini": "2024-06", "deepseek-chat": "2024-07", "deepseek-coder": "2024-07", }; @@ -442,141 +443,45 @@ export const DEFAULT_TTS_VOICES = [ export const VISION_MODEL_REGEXES = [ /vision/, - /gpt-4\.1/, + /gpt-4/, /claude-3/, - /gemini-1\.5/, - /gemini-exp/, - /gemini-2\.0/, - /gemini-2\.5-pro/, - /learnlm/, - /qwen-vl/, - /qwen2-vl/, - /gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview" + /gemini/, /^dall-e-3$/, // Matches exactly "dall-e-3" - /glm-4v/, - /vl/i, /o1/, + /o3/, + /o4/, ]; export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/]; -const openaiModels = ["dall-e-3", "o1", "o3-mini", "gpt-4.1", "gpt-4.1-mini"]; +const openaiModels = ["dall-e-3", "o1", "o4-mini", "gpt-4.1", "gpt-4.1-mini"]; -const googleModels = [ - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.5-pro-exp-03-25", -]; +const googleModels = ["gemini-2.0-flash", "gemini-2.5-pro-exp-03-25"]; const anthropicModels = [ - "claude-3-opus-latest", - "claude-3-5-haiku-latest", "claude-3-5-sonnet-latest", "claude-3-7-sonnet-latest", ]; -const baiduModels = [ - "ernie-4.0-turbo-8k", - "ernie-4.0-8k", - "ernie-4.0-8k-preview", - "ernie-4.0-8k-preview-0518", - "ernie-4.0-8k-latest", - "ernie-3.5-8k", - "ernie-3.5-8k-0205", - "ernie-speed-128k", - "ernie-speed-8k", - "ernie-lite-8k", - "ernie-tiny-8k", -]; +const baiduModels = ["ernie-4.0-turbo-8k"]; -const bytedanceModels = [ - "Doubao-lite-4k", - "Doubao-lite-32k", - "Doubao-lite-128k", - "Doubao-pro-4k", - "Doubao-pro-32k", - "Doubao-pro-128k", -]; +const bytedanceModels = ["Doubao-pro-128k"]; -const alibabaModes = [ - "qwen-turbo", - "qwen-plus", - "qwen-max", - "qwen-max-0428", - "qwen-max-0403", - "qwen-max-0107", - "qwen-max-longcontext", - "qwen-omni-turbo", - "qwen-vl-plus", - "qwen-vl-max", -]; +const alibabaModes = ["qwen-vl-max"]; -const tencentModels = [ - "hunyuan-pro", - "hunyuan-standard", - "hunyuan-lite", - "hunyuan-role", - "hunyuan-functioncall", - "hunyuan-code", - "hunyuan-vision", -]; +const tencentModels = ["hunyuan-code"]; -const moonshotModes = ["moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"]; +const moonshotModes = ["moonshot-v1-128k"]; -const iflytekModels = [ - "general", - "generalv3", - "pro-128k", - "generalv3.5", - "4.0Ultra", -]; +const iflytekModels = ["4.0Ultra"]; const deepseekModels = ["deepseek-chat", "deepseek-coder", "deepseek-reasoner"]; -const xAIModes = [ - "grok-beta", - "grok-2", - "grok-2-1212", - "grok-2-latest", - "grok-vision-beta", - "grok-2-vision-1212", - "grok-2-vision", - "grok-2-vision-latest", -]; +const xAIModes = ["grok-2-vision-latest"]; -const chatglmModels = [ - "glm-4-plus", - "glm-4-0520", - "glm-4", - "glm-4-air", - "glm-4-airx", - "glm-4-long", - "glm-4-flashx", - "glm-4-flash", - "glm-4v-plus", - "glm-4v", - "glm-4v-flash", // free - "cogview-3-plus", - "cogview-3", - "cogview-3-flash", // free - // 目前无法适配轮询任务 - // "cogvideox", - // "cogvideox-flash", // free -]; +const chatglmModels = ["glm-4-plus"]; const siliconflowModels = [ - "Qwen/Qwen2.5-7B-Instruct", - "Qwen/Qwen2.5-72B-Instruct", - "deepseek-ai/DeepSeek-R1", - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - "deepseek-ai/DeepSeek-V3", - "meta-llama/Llama-3.3-70B-Instruct", - "THUDM/glm-4-9b-chat", "Pro/deepseek-ai/DeepSeek-R1", "Pro/deepseek-ai/DeepSeek-V3", ]; @@ -596,7 +501,7 @@ export const DEFAULT_MODELS = [ })), ...openaiModels.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "azure", @@ -629,7 +534,7 @@ export const DEFAULT_MODELS = [ })), ...baiduModels.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "baidu", @@ -640,7 +545,7 @@ export const DEFAULT_MODELS = [ })), ...bytedanceModels.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "bytedance", @@ -651,7 +556,7 @@ export const DEFAULT_MODELS = [ })), ...alibabaModes.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "alibaba", @@ -662,7 +567,7 @@ export const DEFAULT_MODELS = [ })), ...tencentModels.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "tencent", @@ -673,7 +578,7 @@ export const DEFAULT_MODELS = [ })), ...moonshotModes.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "moonshot", @@ -684,7 +589,7 @@ export const DEFAULT_MODELS = [ })), ...iflytekModels.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "iflytek", @@ -695,7 +600,7 @@ export const DEFAULT_MODELS = [ })), ...xAIModes.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "xai", @@ -706,7 +611,7 @@ export const DEFAULT_MODELS = [ })), ...chatglmModels.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "chatglm", @@ -717,7 +622,7 @@ export const DEFAULT_MODELS = [ })), ...deepseekModels.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "deepseek", @@ -728,7 +633,7 @@ export const DEFAULT_MODELS = [ })), ...siliconflowModels.map((name) => ({ name, - available: true, + available: false, sorted: seq++, provider: { id: "siliconflow", diff --git a/app/utils.ts b/app/utils.ts index f2454e5cc..3f81a374e 100644 --- a/app/utils.ts +++ b/app/utils.ts @@ -303,6 +303,7 @@ export function getTimeoutMSByModel(model: string) { model.startsWith("dalle") || model.startsWith("o1") || model.startsWith("o3") || + model.startsWith("o4") || model.includes("deepseek-r") || model.includes("-thinking") || model.includes("pro") diff --git a/test/vision-model-checker.test.ts b/test/vision-model-checker.test.ts index 8caf1cc39..fa255e30b 100644 --- a/test/vision-model-checker.test.ts +++ b/test/vision-model-checker.test.ts @@ -17,15 +17,14 @@ describe("isVisionModel", () => { const visionModels = [ "gpt-4.1", "claude-3-opus", - "gemini-1.5-pro", "gemini-2.0", "gemini-2.5-pro", - "gemini-exp-vision", "learnlm-vision", "qwen-vl-max", "qwen2-vl-max", - "gpt-4-turbo", + "o4-mini", "dall-e-3", + "o1", ]; visionModels.forEach((model) => { @@ -38,12 +37,7 @@ describe("isVisionModel", () => { }); test("should not identify non-vision models", () => { - const nonVisionModels = [ - "gpt-3.5-turbo", - "gpt-4-turbo-preview", - "claude-2", - "regular-model", - ]; + const nonVisionModels = ["gpt-3.5-turbo", "claude-2", "regular-model"]; nonVisionModels.forEach((model) => { expect(isVisionModel(model)).toBe(false);