diff --git a/README.md b/README.md index 15c16eb68..7d3f7145c 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ -

NextChat (ChatGPT Next Web)

+

NextChat

English / [简体中文](./README_CN.md) @@ -22,7 +22,6 @@ English / [简体中文](./README_CN.md) [![MacOS][MacOS-image]][download-url] [![Linux][Linux-image]][download-url] -[NextChatAI](https://nextchat.dev/chat?utm_source=readme) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases) [NextChatAI](https://nextchat.club?utm_source=readme) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases) / [Discord](https://discord.gg/YCkeafCafC) / [Enterprise Edition](#enterprise-edition) / [Twitter](https://twitter.com/NextChatDev) diff --git a/app/client/api.ts b/app/client/api.ts index 64ac82b2a..f5288593d 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -40,6 +40,11 @@ export interface MultimodalContent { }; } +export interface MultimodalContentForAlibaba { + text?: string; + image?: string; +} + export interface RequestMessage { role: MessageRole; content: string | MultimodalContent[]; diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts index 88511768c..4875e5c02 100644 --- a/app/client/platforms/alibaba.ts +++ b/app/client/platforms/alibaba.ts @@ -7,7 +7,10 @@ import { ChatMessageTool, usePluginStore, } from "@/app/store"; -import { streamWithThink } from "@/app/utils/chat"; +import { + preProcessImageContentForAlibabaDashScope, + streamWithThink, +} from "@/app/utils/chat"; import { ChatOptions, getHeaders, @@ -15,12 +18,14 @@ import { LLMModel, SpeechOptions, MultimodalContent, + MultimodalContentForAlibaba, } from "../api"; import { getClientConfig } from "@/app/config/client"; import { getMessageTextContent, getMessageTextContentWithoutThinking, getTimeoutMSByModel, + isVisionModel, } from "@/app/utils"; import { fetch } from "@/app/utils/stream"; @@ -89,14 +94,6 @@ export class QwenApi implements LLMApi { } async chat(options: ChatOptions) { - const messages = options.messages.map((v) => ({ - role: v.role, - content: - v.role === "assistant" - ? getMessageTextContentWithoutThinking(v) - : getMessageTextContent(v), - })); - const modelConfig = { ...useAppConfig.getState().modelConfig, ...useChatStore.getState().currentSession().mask.modelConfig, @@ -105,6 +102,21 @@ export class QwenApi implements LLMApi { }, }; + const visionModel = isVisionModel(options.config.model); + + const messages: ChatOptions["messages"] = []; + for (const v of options.messages) { + const content = ( + visionModel + ? await preProcessImageContentForAlibabaDashScope(v.content) + : v.role === "assistant" + ? getMessageTextContentWithoutThinking(v) + : getMessageTextContent(v) + ) as any; + + messages.push({ role: v.role, content }); + } + const shouldStream = !!options.config.stream; const requestPayload: RequestPayload = { model: modelConfig.model, @@ -129,7 +141,7 @@ export class QwenApi implements LLMApi { "X-DashScope-SSE": shouldStream ? "enable" : "disable", }; - const chatPath = this.path(Alibaba.ChatPath); + const chatPath = this.path(Alibaba.ChatPath(modelConfig.model)); const chatPayload = { method: "POST", body: JSON.stringify(requestPayload), @@ -162,7 +174,7 @@ export class QwenApi implements LLMApi { const json = JSON.parse(text); const choices = json.output.choices as Array<{ message: { - content: string | null; + content: string | null | MultimodalContentForAlibaba[]; tool_calls: ChatMessageTool[]; reasoning_content: string | null; }; @@ -212,7 +224,9 @@ export class QwenApi implements LLMApi { } else if (content && content.length > 0) { return { isThinking: false, - content: content, + content: Array.isArray(content) + ? content.map((item) => item.text).join(",") + : content, }; } diff --git a/app/client/platforms/deepseek.ts b/app/client/platforms/deepseek.ts index b21d24cef..1b38b40cc 100644 --- a/app/client/platforms/deepseek.ts +++ b/app/client/platforms/deepseek.ts @@ -75,6 +75,25 @@ export class DeepSeekApi implements LLMApi { } } + // 检测并修复消息顺序,确保除system外的第一个消息是user + const filteredMessages: ChatOptions["messages"] = []; + let hasFoundFirstUser = false; + + for (const msg of messages) { + if (msg.role === "system") { + // Keep all system messages + filteredMessages.push(msg); + } else if (msg.role === "user") { + // User message directly added + filteredMessages.push(msg); + hasFoundFirstUser = true; + } else if (hasFoundFirstUser) { + // After finding the first user message, all subsequent non-system messages are retained. + filteredMessages.push(msg); + } + // If hasFoundFirstUser is false and it is not a system message, it will be skipped. + } + const modelConfig = { ...useAppConfig.getState().modelConfig, ...useChatStore.getState().currentSession().mask.modelConfig, @@ -85,7 +104,7 @@ export class DeepSeekApi implements LLMApi { }; const requestPayload: RequestPayload = { - messages, + messages: filteredMessages, stream: options.config.stream, model: modelConfig.model, temperature: modelConfig.temperature, diff --git a/app/components/emoji.tsx b/app/components/emoji.tsx index 1bf39ac1d..31d7f0ac6 100644 --- a/app/components/emoji.tsx +++ b/app/components/emoji.tsx @@ -66,11 +66,11 @@ export function Avatar(props: { model?: ModelType; avatar?: string }) { LlmIcon = BotIconGemma; } else if (modelName.startsWith("claude")) { LlmIcon = BotIconClaude; - } else if (modelName.toLowerCase().includes("llama")) { + } else if (modelName.includes("llama")) { LlmIcon = BotIconMeta; - } else if (modelName.startsWith("mixtral")) { + } else if (modelName.startsWith("mixtral") || modelName.startsWith("codestral")) { LlmIcon = BotIconMistral; - } else if (modelName.toLowerCase().includes("deepseek")) { + } else if (modelName.includes("deepseek")) { LlmIcon = BotIconDeepseek; } else if (modelName.startsWith("moonshot")) { LlmIcon = BotIconMoonshot; @@ -85,7 +85,7 @@ export function Avatar(props: { model?: ModelType; avatar?: string }) { } else if (modelName.startsWith("doubao") || modelName.startsWith("ep-")) { LlmIcon = BotIconDoubao; } else if ( - modelName.toLowerCase().includes("glm") || + modelName.includes("glm") || modelName.startsWith("cogview-") || modelName.startsWith("cogvideox-") ) { diff --git a/app/constant.ts b/app/constant.ts index 50aaf7921..422c42629 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -221,7 +221,12 @@ export const ByteDance = { export const Alibaba = { ExampleEndpoint: ALIBABA_BASE_URL, - ChatPath: "v1/services/aigc/text-generation/generation", + ChatPath: (modelName: string) => { + if (modelName.includes("vl") || modelName.includes("omni")) { + return "v1/services/aigc/multimodal-generation/generation"; + } + return `v1/services/aigc/text-generation/generation`; + }, }; export const Tencent = { @@ -412,6 +417,14 @@ export const KnowledgeCutOffDate: Record = { "gpt-4-turbo": "2023-12", "gpt-4-turbo-2024-04-09": "2023-12", "gpt-4-turbo-preview": "2023-12", + "gpt-4.1": "2024-06", + "gpt-4.1-2025-04-14": "2024-06", + "gpt-4.1-mini": "2024-06", + "gpt-4.1-mini-2025-04-14": "2024-06", + "gpt-4.1-nano": "2024-06", + "gpt-4.1-nano-2025-04-14": "2024-06", + "gpt-4.5-preview": "2023-10", + "gpt-4.5-preview-2025-02-27": "2023-10", "gpt-4o": "2023-10", "gpt-4o-2024-05-13": "2023-10", "gpt-4o-2024-08-06": "2023-10", @@ -453,6 +466,7 @@ export const DEFAULT_TTS_VOICES = [ export const VISION_MODEL_REGEXES = [ /vision/, /gpt-4o/, + /gpt-4\.1/, /claude-3/, /gemini-1\.5/, /gemini-exp/, @@ -480,6 +494,14 @@ const openaiModels = [ "gpt-4-32k-0613", "gpt-4-turbo", "gpt-4-turbo-preview", + "gpt-4.1", + "gpt-4.1-2025-04-14", + "gpt-4.1-mini", + "gpt-4.1-mini-2025-04-14", + "gpt-4.1-nano", + "gpt-4.1-nano-2025-04-14", + "gpt-4.5-preview", + "gpt-4.5-preview-2025-02-27", "gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", @@ -535,6 +557,8 @@ const anthropicModels = [ "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-latest", + "claude-3-7-sonnet-20250219", + "claude-3-7-sonnet-latest", ]; const baiduModels = [ @@ -568,6 +592,9 @@ const alibabaModes = [ "qwen-max-0403", "qwen-max-0107", "qwen-max-longcontext", + "qwen-omni-turbo", + "qwen-vl-plus", + "qwen-vl-max", ]; const tencentModels = [ diff --git a/app/utils/chat.ts b/app/utils/chat.ts index efc496f2c..cae775512 100644 --- a/app/utils/chat.ts +++ b/app/utils/chat.ts @@ -3,7 +3,7 @@ import { UPLOAD_URL, REQUEST_TIMEOUT_MS, } from "@/app/constant"; -import { RequestMessage } from "@/app/client/api"; +import { MultimodalContent, RequestMessage } from "@/app/client/api"; import Locale from "@/app/locales"; import { EventStreamContentType, @@ -70,8 +70,9 @@ export function compressImage(file: Blob, maxSize: number): Promise { }); } -export async function preProcessImageContent( +export async function preProcessImageContentBase( content: RequestMessage["content"], + transformImageUrl: (url: string) => Promise<{ [key: string]: any }>, ) { if (typeof content === "string") { return content; @@ -81,7 +82,7 @@ export async function preProcessImageContent( if (part?.type == "image_url" && part?.image_url?.url) { try { const url = await cacheImageToBase64Image(part?.image_url?.url); - result.push({ type: part.type, image_url: { url } }); + result.push(await transformImageUrl(url)); } catch (error) { console.error("Error processing image URL:", error); } @@ -92,6 +93,23 @@ export async function preProcessImageContent( return result; } +export async function preProcessImageContent( + content: RequestMessage["content"], +) { + return preProcessImageContentBase(content, async (url) => ({ + type: "image_url", + image_url: { url }, + })) as Promise; +} + +export async function preProcessImageContentForAlibabaDashScope( + content: RequestMessage["content"], +) { + return preProcessImageContentBase(content, async (url) => ({ + image: url, + })); +} + const imageCaches: Record = {}; export function cacheImageToBase64Image(imageUrl: string) { if (imageUrl.includes(CACHE_URL_PREFIX)) { diff --git a/jest.config.ts b/jest.config.ts index cd25e8e7a..5f095dffb 100644 --- a/jest.config.ts +++ b/jest.config.ts @@ -15,6 +15,8 @@ const config: Config = { moduleNameMapper: { "^@/(.*)$": "/$1", }, + extensionsToTreatAsEsm: [".ts", ".tsx"], + injectGlobals: true, }; // createJestConfig is exported this way to ensure that next/jest can load the Next.js config which is async diff --git a/jest.setup.ts b/jest.setup.ts index bc515f9a1..9ba27fef2 100644 --- a/jest.setup.ts +++ b/jest.setup.ts @@ -1,24 +1,22 @@ // Learn more: https://github.com/testing-library/jest-dom import "@testing-library/jest-dom"; +import { jest } from "@jest/globals"; global.fetch = jest.fn(() => Promise.resolve({ ok: true, status: 200, - json: () => Promise.resolve({}), + json: () => Promise.resolve([]), headers: new Headers(), redirected: false, statusText: "OK", type: "basic", url: "", - clone: function () { - return this; - }, body: null, bodyUsed: false, arrayBuffer: () => Promise.resolve(new ArrayBuffer(0)), blob: () => Promise.resolve(new Blob()), formData: () => Promise.resolve(new FormData()), text: () => Promise.resolve(""), - }), + } as Response), ); diff --git a/package.json b/package.json index 0efe27b39..ceb92d7fc 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,8 @@ "prompts": "node ./scripts/fetch-prompts.mjs", "prepare": "husky install", "proxy-dev": "sh ./scripts/init-proxy.sh && proxychains -f ./scripts/proxychains.conf yarn dev", - "test": "jest --watch", - "test:ci": "jest --ci" + "test": "node --no-warnings --experimental-vm-modules $(yarn bin jest) --watch", + "test:ci": "node --no-warnings --experimental-vm-modules $(yarn bin jest) --ci" }, "dependencies": { "@fortaine/fetch-event-source": "^3.0.6", diff --git a/test/vision-model-checker.test.ts b/test/vision-model-checker.test.ts index 734e992d8..c25d92337 100644 --- a/test/vision-model-checker.test.ts +++ b/test/vision-model-checker.test.ts @@ -1,3 +1,4 @@ +import { jest } from "@jest/globals"; import { isVisionModel } from "../app/utils"; describe("isVisionModel", () => { @@ -50,7 +51,7 @@ describe("isVisionModel", () => { test("should identify models from VISION_MODELS env var", () => { process.env.VISION_MODELS = "custom-vision-model,another-vision-model"; - + expect(isVisionModel("custom-vision-model")).toBe(true); expect(isVisionModel("another-vision-model")).toBe(true); expect(isVisionModel("unrelated-model")).toBe(false); @@ -64,4 +65,4 @@ describe("isVisionModel", () => { expect(isVisionModel("unrelated-model")).toBe(false); expect(isVisionModel("gpt-4-vision")).toBe(true); }); -}); \ No newline at end of file +});