diff --git a/README.md b/README.md
index 15c16eb68..7d3f7145c 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
-
NextChat (ChatGPT Next Web)
+NextChat
English / [简体中文](./README_CN.md)
@@ -22,7 +22,6 @@ English / [简体中文](./README_CN.md)
[![MacOS][MacOS-image]][download-url]
[![Linux][Linux-image]][download-url]
-[NextChatAI](https://nextchat.dev/chat?utm_source=readme) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases)
[NextChatAI](https://nextchat.club?utm_source=readme) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases) / [Discord](https://discord.gg/YCkeafCafC) / [Enterprise Edition](#enterprise-edition) / [Twitter](https://twitter.com/NextChatDev)
diff --git a/app/client/api.ts b/app/client/api.ts
index 64ac82b2a..f5288593d 100644
--- a/app/client/api.ts
+++ b/app/client/api.ts
@@ -40,6 +40,11 @@ export interface MultimodalContent {
};
}
+export interface MultimodalContentForAlibaba {
+ text?: string;
+ image?: string;
+}
+
export interface RequestMessage {
role: MessageRole;
content: string | MultimodalContent[];
diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts
index 88511768c..4875e5c02 100644
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -7,7 +7,10 @@ import {
ChatMessageTool,
usePluginStore,
} from "@/app/store";
-import { streamWithThink } from "@/app/utils/chat";
+import {
+ preProcessImageContentForAlibabaDashScope,
+ streamWithThink,
+} from "@/app/utils/chat";
import {
ChatOptions,
getHeaders,
@@ -15,12 +18,14 @@ import {
LLMModel,
SpeechOptions,
MultimodalContent,
+ MultimodalContentForAlibaba,
} from "../api";
import { getClientConfig } from "@/app/config/client";
import {
getMessageTextContent,
getMessageTextContentWithoutThinking,
getTimeoutMSByModel,
+ isVisionModel,
} from "@/app/utils";
import { fetch } from "@/app/utils/stream";
@@ -89,14 +94,6 @@ export class QwenApi implements LLMApi {
}
async chat(options: ChatOptions) {
- const messages = options.messages.map((v) => ({
- role: v.role,
- content:
- v.role === "assistant"
- ? getMessageTextContentWithoutThinking(v)
- : getMessageTextContent(v),
- }));
-
const modelConfig = {
...useAppConfig.getState().modelConfig,
...useChatStore.getState().currentSession().mask.modelConfig,
@@ -105,6 +102,21 @@ export class QwenApi implements LLMApi {
},
};
+ const visionModel = isVisionModel(options.config.model);
+
+ const messages: ChatOptions["messages"] = [];
+ for (const v of options.messages) {
+ const content = (
+ visionModel
+ ? await preProcessImageContentForAlibabaDashScope(v.content)
+ : v.role === "assistant"
+ ? getMessageTextContentWithoutThinking(v)
+ : getMessageTextContent(v)
+ ) as any;
+
+ messages.push({ role: v.role, content });
+ }
+
const shouldStream = !!options.config.stream;
const requestPayload: RequestPayload = {
model: modelConfig.model,
@@ -129,7 +141,7 @@ export class QwenApi implements LLMApi {
"X-DashScope-SSE": shouldStream ? "enable" : "disable",
};
- const chatPath = this.path(Alibaba.ChatPath);
+ const chatPath = this.path(Alibaba.ChatPath(modelConfig.model));
const chatPayload = {
method: "POST",
body: JSON.stringify(requestPayload),
@@ -162,7 +174,7 @@ export class QwenApi implements LLMApi {
const json = JSON.parse(text);
const choices = json.output.choices as Array<{
message: {
- content: string | null;
+ content: string | null | MultimodalContentForAlibaba[];
tool_calls: ChatMessageTool[];
reasoning_content: string | null;
};
@@ -212,7 +224,9 @@ export class QwenApi implements LLMApi {
} else if (content && content.length > 0) {
return {
isThinking: false,
- content: content,
+ content: Array.isArray(content)
+ ? content.map((item) => item.text).join(",")
+ : content,
};
}
diff --git a/app/client/platforms/deepseek.ts b/app/client/platforms/deepseek.ts
index b21d24cef..1b38b40cc 100644
--- a/app/client/platforms/deepseek.ts
+++ b/app/client/platforms/deepseek.ts
@@ -75,6 +75,25 @@ export class DeepSeekApi implements LLMApi {
}
}
+ // 检测并修复消息顺序,确保除system外的第一个消息是user
+ const filteredMessages: ChatOptions["messages"] = [];
+ let hasFoundFirstUser = false;
+
+ for (const msg of messages) {
+ if (msg.role === "system") {
+ // Keep all system messages
+ filteredMessages.push(msg);
+ } else if (msg.role === "user") {
+ // User message directly added
+ filteredMessages.push(msg);
+ hasFoundFirstUser = true;
+ } else if (hasFoundFirstUser) {
+ // After finding the first user message, all subsequent non-system messages are retained.
+ filteredMessages.push(msg);
+ }
+ // If hasFoundFirstUser is false and it is not a system message, it will be skipped.
+ }
+
const modelConfig = {
...useAppConfig.getState().modelConfig,
...useChatStore.getState().currentSession().mask.modelConfig,
@@ -85,7 +104,7 @@ export class DeepSeekApi implements LLMApi {
};
const requestPayload: RequestPayload = {
- messages,
+ messages: filteredMessages,
stream: options.config.stream,
model: modelConfig.model,
temperature: modelConfig.temperature,
diff --git a/app/components/emoji.tsx b/app/components/emoji.tsx
index 1bf39ac1d..31d7f0ac6 100644
--- a/app/components/emoji.tsx
+++ b/app/components/emoji.tsx
@@ -66,11 +66,11 @@ export function Avatar(props: { model?: ModelType; avatar?: string }) {
LlmIcon = BotIconGemma;
} else if (modelName.startsWith("claude")) {
LlmIcon = BotIconClaude;
- } else if (modelName.toLowerCase().includes("llama")) {
+ } else if (modelName.includes("llama")) {
LlmIcon = BotIconMeta;
- } else if (modelName.startsWith("mixtral")) {
+ } else if (modelName.startsWith("mixtral") || modelName.startsWith("codestral")) {
LlmIcon = BotIconMistral;
- } else if (modelName.toLowerCase().includes("deepseek")) {
+ } else if (modelName.includes("deepseek")) {
LlmIcon = BotIconDeepseek;
} else if (modelName.startsWith("moonshot")) {
LlmIcon = BotIconMoonshot;
@@ -85,7 +85,7 @@ export function Avatar(props: { model?: ModelType; avatar?: string }) {
} else if (modelName.startsWith("doubao") || modelName.startsWith("ep-")) {
LlmIcon = BotIconDoubao;
} else if (
- modelName.toLowerCase().includes("glm") ||
+ modelName.includes("glm") ||
modelName.startsWith("cogview-") ||
modelName.startsWith("cogvideox-")
) {
diff --git a/app/constant.ts b/app/constant.ts
index 50aaf7921..422c42629 100644
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -221,7 +221,12 @@ export const ByteDance = {
export const Alibaba = {
ExampleEndpoint: ALIBABA_BASE_URL,
- ChatPath: "v1/services/aigc/text-generation/generation",
+ ChatPath: (modelName: string) => {
+ if (modelName.includes("vl") || modelName.includes("omni")) {
+ return "v1/services/aigc/multimodal-generation/generation";
+ }
+ return `v1/services/aigc/text-generation/generation`;
+ },
};
export const Tencent = {
@@ -412,6 +417,14 @@ export const KnowledgeCutOffDate: Record = {
"gpt-4-turbo": "2023-12",
"gpt-4-turbo-2024-04-09": "2023-12",
"gpt-4-turbo-preview": "2023-12",
+ "gpt-4.1": "2024-06",
+ "gpt-4.1-2025-04-14": "2024-06",
+ "gpt-4.1-mini": "2024-06",
+ "gpt-4.1-mini-2025-04-14": "2024-06",
+ "gpt-4.1-nano": "2024-06",
+ "gpt-4.1-nano-2025-04-14": "2024-06",
+ "gpt-4.5-preview": "2023-10",
+ "gpt-4.5-preview-2025-02-27": "2023-10",
"gpt-4o": "2023-10",
"gpt-4o-2024-05-13": "2023-10",
"gpt-4o-2024-08-06": "2023-10",
@@ -453,6 +466,7 @@ export const DEFAULT_TTS_VOICES = [
export const VISION_MODEL_REGEXES = [
/vision/,
/gpt-4o/,
+ /gpt-4\.1/,
/claude-3/,
/gemini-1\.5/,
/gemini-exp/,
@@ -480,6 +494,14 @@ const openaiModels = [
"gpt-4-32k-0613",
"gpt-4-turbo",
"gpt-4-turbo-preview",
+ "gpt-4.1",
+ "gpt-4.1-2025-04-14",
+ "gpt-4.1-mini",
+ "gpt-4.1-mini-2025-04-14",
+ "gpt-4.1-nano",
+ "gpt-4.1-nano-2025-04-14",
+ "gpt-4.5-preview",
+ "gpt-4.5-preview-2025-02-27",
"gpt-4o",
"gpt-4o-2024-05-13",
"gpt-4o-2024-08-06",
@@ -535,6 +557,8 @@ const anthropicModels = [
"claude-3-5-sonnet-20240620",
"claude-3-5-sonnet-20241022",
"claude-3-5-sonnet-latest",
+ "claude-3-7-sonnet-20250219",
+ "claude-3-7-sonnet-latest",
];
const baiduModels = [
@@ -568,6 +592,9 @@ const alibabaModes = [
"qwen-max-0403",
"qwen-max-0107",
"qwen-max-longcontext",
+ "qwen-omni-turbo",
+ "qwen-vl-plus",
+ "qwen-vl-max",
];
const tencentModels = [
diff --git a/app/utils/chat.ts b/app/utils/chat.ts
index efc496f2c..cae775512 100644
--- a/app/utils/chat.ts
+++ b/app/utils/chat.ts
@@ -3,7 +3,7 @@ import {
UPLOAD_URL,
REQUEST_TIMEOUT_MS,
} from "@/app/constant";
-import { RequestMessage } from "@/app/client/api";
+import { MultimodalContent, RequestMessage } from "@/app/client/api";
import Locale from "@/app/locales";
import {
EventStreamContentType,
@@ -70,8 +70,9 @@ export function compressImage(file: Blob, maxSize: number): Promise {
});
}
-export async function preProcessImageContent(
+export async function preProcessImageContentBase(
content: RequestMessage["content"],
+ transformImageUrl: (url: string) => Promise<{ [key: string]: any }>,
) {
if (typeof content === "string") {
return content;
@@ -81,7 +82,7 @@ export async function preProcessImageContent(
if (part?.type == "image_url" && part?.image_url?.url) {
try {
const url = await cacheImageToBase64Image(part?.image_url?.url);
- result.push({ type: part.type, image_url: { url } });
+ result.push(await transformImageUrl(url));
} catch (error) {
console.error("Error processing image URL:", error);
}
@@ -92,6 +93,23 @@ export async function preProcessImageContent(
return result;
}
+export async function preProcessImageContent(
+ content: RequestMessage["content"],
+) {
+ return preProcessImageContentBase(content, async (url) => ({
+ type: "image_url",
+ image_url: { url },
+ })) as Promise;
+}
+
+export async function preProcessImageContentForAlibabaDashScope(
+ content: RequestMessage["content"],
+) {
+ return preProcessImageContentBase(content, async (url) => ({
+ image: url,
+ }));
+}
+
const imageCaches: Record = {};
export function cacheImageToBase64Image(imageUrl: string) {
if (imageUrl.includes(CACHE_URL_PREFIX)) {
diff --git a/jest.config.ts b/jest.config.ts
index cd25e8e7a..5f095dffb 100644
--- a/jest.config.ts
+++ b/jest.config.ts
@@ -15,6 +15,8 @@ const config: Config = {
moduleNameMapper: {
"^@/(.*)$": "/$1",
},
+ extensionsToTreatAsEsm: [".ts", ".tsx"],
+ injectGlobals: true,
};
// createJestConfig is exported this way to ensure that next/jest can load the Next.js config which is async
diff --git a/jest.setup.ts b/jest.setup.ts
index bc515f9a1..9ba27fef2 100644
--- a/jest.setup.ts
+++ b/jest.setup.ts
@@ -1,24 +1,22 @@
// Learn more: https://github.com/testing-library/jest-dom
import "@testing-library/jest-dom";
+import { jest } from "@jest/globals";
global.fetch = jest.fn(() =>
Promise.resolve({
ok: true,
status: 200,
- json: () => Promise.resolve({}),
+ json: () => Promise.resolve([]),
headers: new Headers(),
redirected: false,
statusText: "OK",
type: "basic",
url: "",
- clone: function () {
- return this;
- },
body: null,
bodyUsed: false,
arrayBuffer: () => Promise.resolve(new ArrayBuffer(0)),
blob: () => Promise.resolve(new Blob()),
formData: () => Promise.resolve(new FormData()),
text: () => Promise.resolve(""),
- }),
+ } as Response),
);
diff --git a/package.json b/package.json
index 0efe27b39..ceb92d7fc 100644
--- a/package.json
+++ b/package.json
@@ -17,8 +17,8 @@
"prompts": "node ./scripts/fetch-prompts.mjs",
"prepare": "husky install",
"proxy-dev": "sh ./scripts/init-proxy.sh && proxychains -f ./scripts/proxychains.conf yarn dev",
- "test": "jest --watch",
- "test:ci": "jest --ci"
+ "test": "node --no-warnings --experimental-vm-modules $(yarn bin jest) --watch",
+ "test:ci": "node --no-warnings --experimental-vm-modules $(yarn bin jest) --ci"
},
"dependencies": {
"@fortaine/fetch-event-source": "^3.0.6",
diff --git a/test/vision-model-checker.test.ts b/test/vision-model-checker.test.ts
index 734e992d8..c25d92337 100644
--- a/test/vision-model-checker.test.ts
+++ b/test/vision-model-checker.test.ts
@@ -1,3 +1,4 @@
+import { jest } from "@jest/globals";
import { isVisionModel } from "../app/utils";
describe("isVisionModel", () => {
@@ -50,7 +51,7 @@ describe("isVisionModel", () => {
test("should identify models from VISION_MODELS env var", () => {
process.env.VISION_MODELS = "custom-vision-model,another-vision-model";
-
+
expect(isVisionModel("custom-vision-model")).toBe(true);
expect(isVisionModel("another-vision-model")).toBe(true);
expect(isVisionModel("unrelated-model")).toBe(false);
@@ -64,4 +65,4 @@ describe("isVisionModel", () => {
expect(isVisionModel("unrelated-model")).toBe(false);
expect(isVisionModel("gpt-4-vision")).toBe(true);
});
-});
\ No newline at end of file
+});