diff --git a/.env.template b/.env.template
index 907ec9dfe..4efaa2ff8 100644
--- a/.env.template
+++ b/.env.template
@@ -73,6 +73,11 @@ ANTHROPIC_API_VERSION=
### anthropic claude Api url (optional)
ANTHROPIC_URL=
-
### (optional)
WHITE_WEBDAV_ENDPOINTS=
+
+### siliconflow Api key (optional)
+SILICONFLOW_API_KEY=
+
+### siliconflow Api url (optional)
+SILICONFLOW_URL=
diff --git a/LICENSE b/LICENSE
index 047f9431e..4864ab00d 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) 2023-2024 Zhang Yifei
+Copyright (c) 2023-2025 NextChat
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 63d7c35c2..15c16eb68 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,12 @@
-
+
+
NextChat (ChatGPT Next Web)
English / [简体中文](./README_CN.md)
@@ -13,8 +14,7 @@ English / [简体中文](./README_CN.md)

-
-One-Click to get a well-designed cross-platform ChatGPT web UI, with Claude, GPT4 & Gemini Pro support.
+✨ Light and Fast AI Assistant,with Claude, DeepSeek, GPT4 & Gemini Pro support.
[![Saas][Saas-image]][saas-url]
[![Web][Web-image]][web-url]
@@ -22,6 +22,7 @@ One-Click to get a well-designed cross-platform ChatGPT web UI, with Claude, GPT
[![MacOS][MacOS-image]][download-url]
[![Linux][Linux-image]][download-url]
+[NextChatAI](https://nextchat.dev/chat?utm_source=readme) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases)
[NextChatAI](https://nextchat.club?utm_source=readme) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases) / [Discord](https://discord.gg/YCkeafCafC) / [Enterprise Edition](#enterprise-edition) / [Twitter](https://twitter.com/NextChatDev)
@@ -34,9 +35,9 @@ One-Click to get a well-designed cross-platform ChatGPT web UI, with Claude, GPT
[MacOS-image]: https://img.shields.io/badge/-MacOS-black?logo=apple
[Linux-image]: https://img.shields.io/badge/-Linux-333?logo=ubuntu
-[

](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2FChatGPTNextWeb%2FChatGPT-Next-Web&env=OPENAI_API_KEY&env=CODE&project-name=nextchat&repository-name=NextChat) [

](https://zeabur.com/templates/ZBUEFA) [

](https://gitpod.io/#https://github.com/Yidadaa/ChatGPT-Next-Web) [

](https://www.bt.cn/new/download.html)
+[

](https://zeabur.com/templates/ZBUEFA) [

](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2FChatGPTNextWeb%2FChatGPT-Next-Web&env=OPENAI_API_KEY&env=CODE&project-name=nextchat&repository-name=NextChat) [

](https://gitpod.io/#https://github.com/ChatGPTNextWeb/NextChat)
-[

](https://monica.im/?utm=nxcrp)
+[

](https://monica.im/?utm=nxcrp)
@@ -352,6 +353,13 @@ Customize Stability API url.
Enable MCP(Model Context Protocol)Feature
+### `SILICONFLOW_API_KEY` (optional)
+
+SiliconFlow API Key.
+
+### `SILICONFLOW_URL` (optional)
+
+SiliconFlow API URL.
## Requirements
diff --git a/README_CN.md b/README_CN.md
index 2d2b28e82..5576231cc 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -89,7 +89,7 @@ code1,code2,code3
### `OPENAI_API_KEY` (必填项)
-OpanAI 密钥,你在 openai 账户页面申请的 api key,使用英文逗号隔开多个 key,这样可以随机轮询这些 key。
+OpenAI 密钥,你在 openai 账户页面申请的 api key,使用英文逗号隔开多个 key,这样可以随机轮询这些 key。
### `CODE` (可选)
@@ -267,6 +267,13 @@ Stability API密钥
启用MCP(Model Context Protocol)功能
+### `SILICONFLOW_API_KEY` (optional)
+
+SiliconFlow API Key.
+
+### `SILICONFLOW_URL` (optional)
+
+SiliconFlow API URL.
## 开发
diff --git a/app/api/[provider]/[...path]/route.ts b/app/api/[provider]/[...path]/route.ts
index 3b5833d7e..8975bf971 100644
--- a/app/api/[provider]/[...path]/route.ts
+++ b/app/api/[provider]/[...path]/route.ts
@@ -11,6 +11,7 @@ import { handle as moonshotHandler } from "../../moonshot";
import { handle as stabilityHandler } from "../../stability";
import { handle as iflytekHandler } from "../../iflytek";
import { handle as deepseekHandler } from "../../deepseek";
+import { handle as siliconflowHandler } from "../../siliconflow";
import { handle as xaiHandler } from "../../xai";
import { handle as chatglmHandler } from "../../glm";
import { handle as proxyHandler } from "../../proxy";
@@ -47,6 +48,8 @@ async function handle(
return xaiHandler(req, { params });
case ApiPath.ChatGLM:
return chatglmHandler(req, { params });
+ case ApiPath.SiliconFlow:
+ return siliconflowHandler(req, { params });
case ApiPath.OpenAI:
return openaiHandler(req, { params });
default:
diff --git a/app/api/auth.ts b/app/api/auth.ts
index 1760c249c..8c78c70c8 100644
--- a/app/api/auth.ts
+++ b/app/api/auth.ts
@@ -101,6 +101,9 @@ export function auth(req: NextRequest, modelProvider: ModelProvider) {
case ModelProvider.ChatGLM:
systemApiKey = serverConfig.chatglmApiKey;
break;
+ case ModelProvider.SiliconFlow:
+ systemApiKey = serverConfig.siliconFlowApiKey;
+ break;
case ModelProvider.GPT:
default:
if (req.nextUrl.pathname.includes("azure/deployments")) {
diff --git a/app/api/openai.ts b/app/api/openai.ts
index 2b5deca8b..5bb3a5dcb 100644
--- a/app/api/openai.ts
+++ b/app/api/openai.ts
@@ -14,8 +14,12 @@ function getModels(remoteModelRes: OpenAIListModelResponse) {
if (config.disableGPT4) {
remoteModelRes.data = remoteModelRes.data.filter(
(m) =>
- !(m.id.startsWith("gpt-4") || m.id.startsWith("chatgpt-4o") || m.id.startsWith("o1")) ||
- m.id.startsWith("gpt-4o-mini"),
+ !(
+ m.id.startsWith("gpt-4") ||
+ m.id.startsWith("chatgpt-4o") ||
+ m.id.startsWith("o1") ||
+ m.id.startsWith("o3")
+ ) || m.id.startsWith("gpt-4o-mini"),
);
}
diff --git a/app/api/siliconflow.ts b/app/api/siliconflow.ts
new file mode 100644
index 000000000..e298a21d4
--- /dev/null
+++ b/app/api/siliconflow.ts
@@ -0,0 +1,128 @@
+import { getServerSideConfig } from "@/app/config/server";
+import {
+ SILICONFLOW_BASE_URL,
+ ApiPath,
+ ModelProvider,
+ ServiceProvider,
+} from "@/app/constant";
+import { prettyObject } from "@/app/utils/format";
+import { NextRequest, NextResponse } from "next/server";
+import { auth } from "@/app/api/auth";
+import { isModelNotavailableInServer } from "@/app/utils/model";
+
+const serverConfig = getServerSideConfig();
+
+export async function handle(
+ req: NextRequest,
+ { params }: { params: { path: string[] } },
+) {
+ console.log("[SiliconFlow Route] params ", params);
+
+ if (req.method === "OPTIONS") {
+ return NextResponse.json({ body: "OK" }, { status: 200 });
+ }
+
+ const authResult = auth(req, ModelProvider.SiliconFlow);
+ if (authResult.error) {
+ return NextResponse.json(authResult, {
+ status: 401,
+ });
+ }
+
+ try {
+ const response = await request(req);
+ return response;
+ } catch (e) {
+ console.error("[SiliconFlow] ", e);
+ return NextResponse.json(prettyObject(e));
+ }
+}
+
+async function request(req: NextRequest) {
+ const controller = new AbortController();
+
+ // alibaba use base url or just remove the path
+ let path = `${req.nextUrl.pathname}`.replaceAll(ApiPath.SiliconFlow, "");
+
+ let baseUrl = serverConfig.siliconFlowUrl || SILICONFLOW_BASE_URL;
+
+ if (!baseUrl.startsWith("http")) {
+ baseUrl = `https://${baseUrl}`;
+ }
+
+ if (baseUrl.endsWith("/")) {
+ baseUrl = baseUrl.slice(0, -1);
+ }
+
+ console.log("[Proxy] ", path);
+ console.log("[Base Url]", baseUrl);
+
+ const timeoutId = setTimeout(
+ () => {
+ controller.abort();
+ },
+ 10 * 60 * 1000,
+ );
+
+ const fetchUrl = `${baseUrl}${path}`;
+ const fetchOptions: RequestInit = {
+ headers: {
+ "Content-Type": "application/json",
+ Authorization: req.headers.get("Authorization") ?? "",
+ },
+ method: req.method,
+ body: req.body,
+ redirect: "manual",
+ // @ts-ignore
+ duplex: "half",
+ signal: controller.signal,
+ };
+
+ // #1815 try to refuse some request to some models
+ if (serverConfig.customModels && req.body) {
+ try {
+ const clonedBody = await req.text();
+ fetchOptions.body = clonedBody;
+
+ const jsonBody = JSON.parse(clonedBody) as { model?: string };
+
+ // not undefined and is false
+ if (
+ isModelNotavailableInServer(
+ serverConfig.customModels,
+ jsonBody?.model as string,
+ ServiceProvider.SiliconFlow as string,
+ )
+ ) {
+ return NextResponse.json(
+ {
+ error: true,
+ message: `you are not allowed to use ${jsonBody?.model} model`,
+ },
+ {
+ status: 403,
+ },
+ );
+ }
+ } catch (e) {
+ console.error(`[SiliconFlow] filter`, e);
+ }
+ }
+ try {
+ const res = await fetch(fetchUrl, fetchOptions);
+
+ // to prevent browser prompt for credentials
+ const newHeaders = new Headers(res.headers);
+ newHeaders.delete("www-authenticate");
+ // to disable nginx buffering
+ newHeaders.set("X-Accel-Buffering", "no");
+
+ return new Response(res.body, {
+ status: res.status,
+ statusText: res.statusText,
+ headers: newHeaders,
+ });
+ } finally {
+ clearTimeout(timeoutId);
+ }
+}
diff --git a/app/client/api.ts b/app/client/api.ts
index 8f263763b..64ac82b2a 100644
--- a/app/client/api.ts
+++ b/app/client/api.ts
@@ -23,6 +23,7 @@ import { SparkApi } from "./platforms/iflytek";
import { DeepSeekApi } from "./platforms/deepseek";
import { XAIApi } from "./platforms/xai";
import { ChatGLMApi } from "./platforms/glm";
+import { SiliconflowApi } from "./platforms/siliconflow";
export const ROLES = ["system", "user", "assistant"] as const;
export type MessageRole = (typeof ROLES)[number];
@@ -164,6 +165,9 @@ export class ClientApi {
case ModelProvider.ChatGLM:
this.llm = new ChatGLMApi();
break;
+ case ModelProvider.SiliconFlow:
+ this.llm = new SiliconflowApi();
+ break;
default:
this.llm = new ChatGPTApi();
}
@@ -254,6 +258,8 @@ export function getHeaders(ignoreHeaders: boolean = false) {
const isDeepSeek = modelConfig.providerName === ServiceProvider.DeepSeek;
const isXAI = modelConfig.providerName === ServiceProvider.XAI;
const isChatGLM = modelConfig.providerName === ServiceProvider.ChatGLM;
+ const isSiliconFlow =
+ modelConfig.providerName === ServiceProvider.SiliconFlow;
const isEnabledAccessControl = accessStore.enabledAccessControl();
const apiKey = isGoogle
? accessStore.googleApiKey
@@ -273,6 +279,8 @@ export function getHeaders(ignoreHeaders: boolean = false) {
? accessStore.deepseekApiKey
: isChatGLM
? accessStore.chatglmApiKey
+ : isSiliconFlow
+ ? accessStore.siliconflowApiKey
: isIflytek
? accessStore.iflytekApiKey && accessStore.iflytekApiSecret
? accessStore.iflytekApiKey + ":" + accessStore.iflytekApiSecret
@@ -290,6 +298,7 @@ export function getHeaders(ignoreHeaders: boolean = false) {
isDeepSeek,
isXAI,
isChatGLM,
+ isSiliconFlow,
apiKey,
isEnabledAccessControl,
};
@@ -317,6 +326,7 @@ export function getHeaders(ignoreHeaders: boolean = false) {
isDeepSeek,
isXAI,
isChatGLM,
+ isSiliconFlow,
apiKey,
isEnabledAccessControl,
} = getConfig();
@@ -365,6 +375,8 @@ export function getClientApi(provider: ServiceProvider): ClientApi {
return new ClientApi(ModelProvider.XAI);
case ServiceProvider.ChatGLM:
return new ClientApi(ModelProvider.ChatGLM);
+ case ServiceProvider.SiliconFlow:
+ return new ClientApi(ModelProvider.SiliconFlow);
default:
return new ClientApi(ModelProvider.GPT);
}
diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts
index 6fe69e87a..88511768c 100644
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -1,12 +1,13 @@
"use client";
+import { ApiPath, Alibaba, ALIBABA_BASE_URL } from "@/app/constant";
import {
- ApiPath,
- Alibaba,
- ALIBABA_BASE_URL,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
-import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
-
+ useAccessStore,
+ useAppConfig,
+ useChatStore,
+ ChatMessageTool,
+ usePluginStore,
+} from "@/app/store";
+import { streamWithThink } from "@/app/utils/chat";
import {
ChatOptions,
getHeaders,
@@ -15,14 +16,12 @@ import {
SpeechOptions,
MultimodalContent,
} from "../api";
-import Locale from "../../locales";
-import {
- EventStreamContentType,
- fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent } from "@/app/utils";
+import {
+ getMessageTextContent,
+ getMessageTextContentWithoutThinking,
+ getTimeoutMSByModel,
+} from "@/app/utils";
import { fetch } from "@/app/utils/stream";
export interface OpenAIListModelResponse {
@@ -92,7 +91,10 @@ export class QwenApi implements LLMApi {
async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
role: v.role,
- content: getMessageTextContent(v),
+ content:
+ v.role === "assistant"
+ ? getMessageTextContentWithoutThinking(v)
+ : getMessageTextContent(v),
}));
const modelConfig = {
@@ -122,134 +124,118 @@ export class QwenApi implements LLMApi {
options.onController?.(controller);
try {
+ const headers = {
+ ...getHeaders(),
+ "X-DashScope-SSE": shouldStream ? "enable" : "disable",
+ };
+
const chatPath = this.path(Alibaba.ChatPath);
const chatPayload = {
method: "POST",
body: JSON.stringify(requestPayload),
signal: controller.signal,
- headers: {
- ...getHeaders(),
- "X-DashScope-SSE": shouldStream ? "enable" : "disable",
- },
+ headers: headers,
};
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
- let responseText = "";
- let remainText = "";
- let finished = false;
- let responseRes: Response;
+ const [tools, funcs] = usePluginStore
+ .getState()
+ .getAsTools(
+ useChatStore.getState().currentSession().mask?.plugin || [],
+ );
+ return streamWithThink(
+ chatPath,
+ requestPayload,
+ headers,
+ tools as any,
+ funcs,
+ controller,
+ // parseSSE
+ (text: string, runTools: ChatMessageTool[]) => {
+ // console.log("parseSSE", text, runTools);
+ const json = JSON.parse(text);
+ const choices = json.output.choices as Array<{
+ message: {
+ content: string | null;
+ tool_calls: ChatMessageTool[];
+ reasoning_content: string | null;
+ };
+ }>;
- // animate response to make it looks smooth
- function animateResponseText() {
- if (finished || controller.signal.aborted) {
- responseText += remainText;
- console.log("[Response Animation] finished");
- if (responseText?.length === 0) {
- options.onError?.(new Error("empty response from server"));
- }
- return;
- }
+ if (!choices?.length) return { isThinking: false, content: "" };
- if (remainText.length > 0) {
- const fetchCount = Math.max(1, Math.round(remainText.length / 60));
- const fetchText = remainText.slice(0, fetchCount);
- responseText += fetchText;
- remainText = remainText.slice(fetchCount);
- options.onUpdate?.(responseText, fetchText);
- }
-
- requestAnimationFrame(animateResponseText);
- }
-
- // start animaion
- animateResponseText();
-
- const finish = () => {
- if (!finished) {
- finished = true;
- options.onFinish(responseText + remainText, responseRes);
- }
- };
-
- controller.signal.onabort = finish;
-
- fetchEventSource(chatPath, {
- fetch: fetch as any,
- ...chatPayload,
- async onopen(res) {
- clearTimeout(requestTimeoutId);
- const contentType = res.headers.get("content-type");
- console.log(
- "[Alibaba] request response content type: ",
- contentType,
- );
- responseRes = res;
-
- if (contentType?.startsWith("text/plain")) {
- responseText = await res.clone().text();
- return finish();
+ const tool_calls = choices[0]?.message?.tool_calls;
+ if (tool_calls?.length > 0) {
+ const index = tool_calls[0]?.index;
+ const id = tool_calls[0]?.id;
+ const args = tool_calls[0]?.function?.arguments;
+ if (id) {
+ runTools.push({
+ id,
+ type: tool_calls[0]?.type,
+ function: {
+ name: tool_calls[0]?.function?.name as string,
+ arguments: args,
+ },
+ });
+ } else {
+ // @ts-ignore
+ runTools[index]["function"]["arguments"] += args;
+ }
}
+ const reasoning = choices[0]?.message?.reasoning_content;
+ const content = choices[0]?.message?.content;
+
+ // Skip if both content and reasoning_content are empty or null
if (
- !res.ok ||
- !res.headers
- .get("content-type")
- ?.startsWith(EventStreamContentType) ||
- res.status !== 200
+ (!reasoning || reasoning.length === 0) &&
+ (!content || content.length === 0)
) {
- const responseTexts = [responseText];
- let extraInfo = await res.clone().text();
- try {
- const resJson = await res.clone().json();
- extraInfo = prettyObject(resJson);
- } catch {}
-
- if (res.status === 401) {
- responseTexts.push(Locale.Error.Unauthorized);
- }
-
- if (extraInfo) {
- responseTexts.push(extraInfo);
- }
-
- responseText = responseTexts.join("\n\n");
-
- return finish();
+ return {
+ isThinking: false,
+ content: "",
+ };
}
- },
- onmessage(msg) {
- if (msg.data === "[DONE]" || finished) {
- return finish();
- }
- const text = msg.data;
- try {
- const json = JSON.parse(text);
- const choices = json.output.choices as Array<{
- message: { content: string };
- }>;
- const delta = choices[0]?.message?.content;
- if (delta) {
- remainText += delta;
- }
- } catch (e) {
- console.error("[Request] parse error", text, msg);
+
+ if (reasoning && reasoning.length > 0) {
+ return {
+ isThinking: true,
+ content: reasoning,
+ };
+ } else if (content && content.length > 0) {
+ return {
+ isThinking: false,
+ content: content,
+ };
}
+
+ return {
+ isThinking: false,
+ content: "",
+ };
},
- onclose() {
- finish();
+ // processToolMessage, include tool_calls message and tool call results
+ (
+ requestPayload: RequestPayload,
+ toolCallMessage: any,
+ toolCallResult: any[],
+ ) => {
+ requestPayload?.input?.messages?.splice(
+ requestPayload?.input?.messages?.length,
+ 0,
+ toolCallMessage,
+ ...toolCallResult,
+ );
},
- onerror(e) {
- options.onError?.(e);
- throw e;
- },
- openWhenHidden: true,
- });
+ options,
+ );
} else {
const res = await fetch(chatPath, chatPayload);
clearTimeout(requestTimeoutId);
diff --git a/app/client/platforms/baidu.ts b/app/client/platforms/baidu.ts
index 9e8c2f139..dc990db41 100644
--- a/app/client/platforms/baidu.ts
+++ b/app/client/platforms/baidu.ts
@@ -1,10 +1,5 @@
"use client";
-import {
- ApiPath,
- Baidu,
- BAIDU_BASE_URL,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
+import { ApiPath, Baidu, BAIDU_BASE_URL } from "@/app/constant";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getAccessToken } from "@/app/utils/baidu";
@@ -23,7 +18,7 @@ import {
} from "@fortaine/fetch-event-source";
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent } from "@/app/utils";
+import { getMessageTextContent, getTimeoutMSByModel } from "@/app/utils";
import { fetch } from "@/app/utils/stream";
export interface OpenAIListModelResponse {
@@ -155,7 +150,7 @@ export class ErnieApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
diff --git a/app/client/platforms/bytedance.ts b/app/client/platforms/bytedance.ts
index a2f0660d8..f9524cba2 100644
--- a/app/client/platforms/bytedance.ts
+++ b/app/client/platforms/bytedance.ts
@@ -1,11 +1,12 @@
"use client";
+import { ApiPath, ByteDance, BYTEDANCE_BASE_URL } from "@/app/constant";
import {
- ApiPath,
- ByteDance,
- BYTEDANCE_BASE_URL,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
-import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
+ useAccessStore,
+ useAppConfig,
+ useChatStore,
+ ChatMessageTool,
+ usePluginStore,
+} from "@/app/store";
import {
ChatOptions,
@@ -15,14 +16,14 @@ import {
MultimodalContent,
SpeechOptions,
} from "../api";
-import Locale from "../../locales";
-import {
- EventStreamContentType,
- fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
+
+import { streamWithThink } from "@/app/utils/chat";
import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent } from "@/app/utils";
+import { preProcessImageContent } from "@/app/utils/chat";
+import {
+ getMessageTextContentWithoutThinking,
+ getTimeoutMSByModel,
+} from "@/app/utils";
import { fetch } from "@/app/utils/stream";
export interface OpenAIListModelResponse {
@@ -34,7 +35,7 @@ export interface OpenAIListModelResponse {
}>;
}
-interface RequestPayload {
+interface RequestPayloadForByteDance {
messages: {
role: "system" | "user" | "assistant";
content: string | MultimodalContent[];
@@ -84,10 +85,14 @@ export class DoubaoApi implements LLMApi {
}
async chat(options: ChatOptions) {
- const messages = options.messages.map((v) => ({
- role: v.role,
- content: getMessageTextContent(v),
- }));
+ const messages: ChatOptions["messages"] = [];
+ for (const v of options.messages) {
+ const content =
+ v.role === "assistant"
+ ? getMessageTextContentWithoutThinking(v)
+ : await preProcessImageContent(v.content);
+ messages.push({ role: v.role, content });
+ }
const modelConfig = {
...useAppConfig.getState().modelConfig,
@@ -98,7 +103,7 @@ export class DoubaoApi implements LLMApi {
};
const shouldStream = !!options.config.stream;
- const requestPayload: RequestPayload = {
+ const requestPayload: RequestPayloadForByteDance = {
messages,
stream: shouldStream,
model: modelConfig.model,
@@ -123,119 +128,101 @@ export class DoubaoApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
- let responseText = "";
- let remainText = "";
- let finished = false;
- let responseRes: Response;
+ const [tools, funcs] = usePluginStore
+ .getState()
+ .getAsTools(
+ useChatStore.getState().currentSession().mask?.plugin || [],
+ );
+ return streamWithThink(
+ chatPath,
+ requestPayload,
+ getHeaders(),
+ tools as any,
+ funcs,
+ controller,
+ // parseSSE
+ (text: string, runTools: ChatMessageTool[]) => {
+ // console.log("parseSSE", text, runTools);
+ const json = JSON.parse(text);
+ const choices = json.choices as Array<{
+ delta: {
+ content: string | null;
+ tool_calls: ChatMessageTool[];
+ reasoning_content: string | null;
+ };
+ }>;
- // animate response to make it looks smooth
- function animateResponseText() {
- if (finished || controller.signal.aborted) {
- responseText += remainText;
- console.log("[Response Animation] finished");
- if (responseText?.length === 0) {
- options.onError?.(new Error("empty response from server"));
- }
- return;
- }
-
- if (remainText.length > 0) {
- const fetchCount = Math.max(1, Math.round(remainText.length / 60));
- const fetchText = remainText.slice(0, fetchCount);
- responseText += fetchText;
- remainText = remainText.slice(fetchCount);
- options.onUpdate?.(responseText, fetchText);
- }
-
- requestAnimationFrame(animateResponseText);
- }
-
- // start animaion
- animateResponseText();
-
- const finish = () => {
- if (!finished) {
- finished = true;
- options.onFinish(responseText + remainText, responseRes);
- }
- };
-
- controller.signal.onabort = finish;
-
- fetchEventSource(chatPath, {
- fetch: fetch as any,
- ...chatPayload,
- async onopen(res) {
- clearTimeout(requestTimeoutId);
- const contentType = res.headers.get("content-type");
- console.log(
- "[ByteDance] request response content type: ",
- contentType,
- );
- responseRes = res;
- if (contentType?.startsWith("text/plain")) {
- responseText = await res.clone().text();
- return finish();
+ if (!choices?.length) return { isThinking: false, content: "" };
+
+ const tool_calls = choices[0]?.delta?.tool_calls;
+ if (tool_calls?.length > 0) {
+ const index = tool_calls[0]?.index;
+ const id = tool_calls[0]?.id;
+ const args = tool_calls[0]?.function?.arguments;
+ if (id) {
+ runTools.push({
+ id,
+ type: tool_calls[0]?.type,
+ function: {
+ name: tool_calls[0]?.function?.name as string,
+ arguments: args,
+ },
+ });
+ } else {
+ // @ts-ignore
+ runTools[index]["function"]["arguments"] += args;
+ }
}
+ const reasoning = choices[0]?.delta?.reasoning_content;
+ const content = choices[0]?.delta?.content;
+ // Skip if both content and reasoning_content are empty or null
if (
- !res.ok ||
- !res.headers
- .get("content-type")
- ?.startsWith(EventStreamContentType) ||
- res.status !== 200
+ (!reasoning || reasoning.length === 0) &&
+ (!content || content.length === 0)
) {
- const responseTexts = [responseText];
- let extraInfo = await res.clone().text();
- try {
- const resJson = await res.clone().json();
- extraInfo = prettyObject(resJson);
- } catch {}
-
- if (res.status === 401) {
- responseTexts.push(Locale.Error.Unauthorized);
- }
-
- if (extraInfo) {
- responseTexts.push(extraInfo);
- }
-
- responseText = responseTexts.join("\n\n");
-
- return finish();
+ return {
+ isThinking: false,
+ content: "",
+ };
}
- },
- onmessage(msg) {
- if (msg.data === "[DONE]" || finished) {
- return finish();
- }
- const text = msg.data;
- try {
- const json = JSON.parse(text);
- const choices = json.choices as Array<{
- delta: { content: string };
- }>;
- const delta = choices[0]?.delta?.content;
- if (delta) {
- remainText += delta;
- }
- } catch (e) {
- console.error("[Request] parse error", text, msg);
+
+ if (reasoning && reasoning.length > 0) {
+ return {
+ isThinking: true,
+ content: reasoning,
+ };
+ } else if (content && content.length > 0) {
+ return {
+ isThinking: false,
+ content: content,
+ };
}
+
+ return {
+ isThinking: false,
+ content: "",
+ };
},
- onclose() {
- finish();
+ // processToolMessage, include tool_calls message and tool call results
+ (
+ requestPayload: RequestPayloadForByteDance,
+ toolCallMessage: any,
+ toolCallResult: any[],
+ ) => {
+ requestPayload?.messages?.splice(
+ requestPayload?.messages?.length,
+ 0,
+ toolCallMessage,
+ ...toolCallResult,
+ );
},
- onerror(e) {
- options.onError?.(e);
- throw e;
- },
- openWhenHidden: true,
- });
+ options,
+ );
} else {
const res = await fetch(chatPath, chatPayload);
clearTimeout(requestTimeoutId);
diff --git a/app/client/platforms/deepseek.ts b/app/client/platforms/deepseek.ts
index 2bf3b2338..b21d24cef 100644
--- a/app/client/platforms/deepseek.ts
+++ b/app/client/platforms/deepseek.ts
@@ -1,11 +1,6 @@
"use client";
// azure and openai, using same models. so using same LLMApi.
-import {
- ApiPath,
- DEEPSEEK_BASE_URL,
- DeepSeek,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
+import { ApiPath, DEEPSEEK_BASE_URL, DeepSeek } from "@/app/constant";
import {
useAccessStore,
useAppConfig,
@@ -25,6 +20,7 @@ import { getClientConfig } from "@/app/config/client";
import {
getMessageTextContent,
getMessageTextContentWithoutThinking,
+ getTimeoutMSByModel,
} from "@/app/utils";
import { RequestPayload } from "./openai";
import { fetch } from "@/app/utils/stream";
@@ -115,12 +111,10 @@ export class DeepSeekApi implements LLMApi {
headers: getHeaders(),
};
- // console.log(chatPayload);
-
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
@@ -171,8 +165,8 @@ export class DeepSeekApi implements LLMApi {
// Skip if both content and reasoning_content are empty or null
if (
- (!reasoning || reasoning.trim().length === 0) &&
- (!content || content.trim().length === 0)
+ (!reasoning || reasoning.length === 0) &&
+ (!content || content.length === 0)
) {
return {
isThinking: false,
@@ -180,12 +174,12 @@ export class DeepSeekApi implements LLMApi {
};
}
- if (reasoning && reasoning.trim().length > 0) {
+ if (reasoning && reasoning.length > 0) {
return {
isThinking: true,
content: reasoning,
};
- } else if (content && content.trim().length > 0) {
+ } else if (content && content.length > 0) {
return {
isThinking: false,
content: content,
diff --git a/app/client/platforms/glm.ts b/app/client/platforms/glm.ts
index a8d1869e3..98b10277d 100644
--- a/app/client/platforms/glm.ts
+++ b/app/client/platforms/glm.ts
@@ -1,10 +1,5 @@
"use client";
-import {
- ApiPath,
- CHATGLM_BASE_URL,
- ChatGLM,
- REQUEST_TIMEOUT_MS,
-} from "@/app/constant";
+import { ApiPath, CHATGLM_BASE_URL, ChatGLM } from "@/app/constant";
import {
useAccessStore,
useAppConfig,
@@ -21,7 +16,11 @@ import {
SpeechOptions,
} from "../api";
import { getClientConfig } from "@/app/config/client";
-import { getMessageTextContent, isVisionModel } from "@/app/utils";
+import {
+ getMessageTextContent,
+ isVisionModel,
+ getTimeoutMSByModel,
+} from "@/app/utils";
import { RequestPayload } from "./openai";
import { fetch } from "@/app/utils/stream";
import { preProcessImageContent } from "@/app/utils/chat";
@@ -191,7 +190,7 @@ export class ChatGLMApi implements LLMApi {
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (modelType === "image" || modelType === "video") {
diff --git a/app/client/platforms/google.ts b/app/client/platforms/google.ts
index 5ca8e1071..654f0e3e4 100644
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -1,4 +1,4 @@
-import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant";
+import { ApiPath, Google } from "@/app/constant";
import {
ChatOptions,
getHeaders,
@@ -22,6 +22,7 @@ import {
getMessageTextContent,
getMessageImages,
isVisionModel,
+ getTimeoutMSByModel,
} from "@/app/utils";
import { preProcessImageContent } from "@/app/utils/chat";
import { nanoid } from "nanoid";
@@ -69,9 +70,16 @@ export class GeminiProApi implements LLMApi {
.join("\n\n");
};
+ let content = "";
+ if (Array.isArray(res)) {
+ res.map((item) => {
+ content += getTextFromParts(item?.candidates?.at(0)?.content?.parts);
+ });
+ }
+
return (
getTextFromParts(res?.candidates?.at(0)?.content?.parts) ||
- getTextFromParts(res?.at(0)?.candidates?.at(0)?.content?.parts) ||
+ content || //getTextFromParts(res?.at(0)?.candidates?.at(0)?.content?.parts) ||
res?.error?.message ||
""
);
@@ -190,10 +198,11 @@ export class GeminiProApi implements LLMApi {
headers: getHeaders(),
};
+ const isThinking = options.config.model.includes("-thinking");
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- REQUEST_TIMEOUT_MS,
+ getTimeoutMSByModel(options.config.model),
);
if (shouldStream) {
diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts
index 5a110b84b..c6f3fc425 100644
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -21,7 +21,7 @@ import {
preProcessImageContent,
uploadImage,
base64Image2Blob,
- stream,
+ streamWithThink,
} from "@/app/utils/chat";
import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
import { ModelSize, DalleQuality, DalleStyle } from "@/app/typing";
@@ -41,6 +41,7 @@ import {
getMessageTextContent,
isVisionModel,
isDalle3 as _isDalle3,
+ getTimeoutMSByModel,
} from "@/app/utils";
import { fetch } from "@/app/utils/stream";
@@ -195,7 +196,9 @@ export class ChatGPTApi implements LLMApi {
let requestPayload: RequestPayload | DalleRequestPayload;
const isDalle3 = _isDalle3(options.config.model);
- const isO1 = options.config.model.startsWith("o1");
+ const isO1OrO3 =
+ options.config.model.startsWith("o1") ||
+ options.config.model.startsWith("o3");
if (isDalle3) {
const prompt = getMessageTextContent(
options.messages.slice(-1)?.pop() as any,
@@ -217,7 +220,7 @@ export class ChatGPTApi implements LLMApi {
const content = visionModel
? await preProcessImageContent(v.content)
: getMessageTextContent(v);
- if (!(isO1 && v.role === "system"))
+ if (!(isO1OrO3 && v.role === "system"))
messages.push({ role: v.role, content });
}
@@ -226,16 +229,16 @@ export class ChatGPTApi implements LLMApi {
messages,
stream: options.config.stream,
model: modelConfig.model,
- temperature: !isO1 ? modelConfig.temperature : 1,
- presence_penalty: !isO1 ? modelConfig.presence_penalty : 0,
- frequency_penalty: !isO1 ? modelConfig.frequency_penalty : 0,
- top_p: !isO1 ? modelConfig.top_p : 1,
+ temperature: !isO1OrO3 ? modelConfig.temperature : 1,
+ presence_penalty: !isO1OrO3 ? modelConfig.presence_penalty : 0,
+ frequency_penalty: !isO1OrO3 ? modelConfig.frequency_penalty : 0,
+ top_p: !isO1OrO3 ? modelConfig.top_p : 1,
// max_tokens: Math.max(modelConfig.max_tokens, 1024),
// Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
};
// O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
- if (isO1) {
+ if (isO1OrO3) {
requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
}
@@ -291,7 +294,7 @@ export class ChatGPTApi implements LLMApi {
useChatStore.getState().currentSession().mask?.plugin || [],
);
// console.log("getAsTools", tools, funcs);
- stream(
+ streamWithThink(
chatPath,
requestPayload,
getHeaders(),
@@ -306,8 +309,12 @@ export class ChatGPTApi implements LLMApi {
delta: {
content: string;
tool_calls: ChatMessageTool[];
+ reasoning_content: string | null;
};
}>;
+
+ if (!choices?.length) return { isThinking: false, content: "" };
+
const tool_calls = choices[0]?.delta?.tool_calls;
if (tool_calls?.length > 0) {
const id = tool_calls[0]?.id;
@@ -327,7 +334,37 @@ export class ChatGPTApi implements LLMApi {
runTools[index]["function"]["arguments"] += args;
}
}
- return choices[0]?.delta?.content;
+
+ const reasoning = choices[0]?.delta?.reasoning_content;
+ const content = choices[0]?.delta?.content;
+
+ // Skip if both content and reasoning_content are empty or null
+ if (
+ (!reasoning || reasoning.length === 0) &&
+ (!content || content.length === 0)
+ ) {
+ return {
+ isThinking: false,
+ content: "",
+ };
+ }
+
+ if (reasoning && reasoning.length > 0) {
+ return {
+ isThinking: true,
+ content: reasoning,
+ };
+ } else if (content && content.length > 0) {
+ return {
+ isThinking: false,
+ content: content,
+ };
+ }
+
+ return {
+ isThinking: false,
+ content: "",
+ };
},
// processToolMessage, include tool_calls message and tool call results
(
@@ -359,7 +396,7 @@ export class ChatGPTApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
- isDalle3 || isO1 ? REQUEST_TIMEOUT_MS * 4 : REQUEST_TIMEOUT_MS, // dalle3 using b64_json is slow.
+ getTimeoutMSByModel(options.config.model),
);
const res = await fetch(chatPath, chatPayload);
diff --git a/app/client/platforms/siliconflow.ts b/app/client/platforms/siliconflow.ts
new file mode 100644
index 000000000..34f0844c3
--- /dev/null
+++ b/app/client/platforms/siliconflow.ts
@@ -0,0 +1,287 @@
+"use client";
+// azure and openai, using same models. so using same LLMApi.
+import {
+ ApiPath,
+ SILICONFLOW_BASE_URL,
+ SiliconFlow,
+ DEFAULT_MODELS,
+} from "@/app/constant";
+import {
+ useAccessStore,
+ useAppConfig,
+ useChatStore,
+ ChatMessageTool,
+ usePluginStore,
+} from "@/app/store";
+import { preProcessImageContent, streamWithThink } from "@/app/utils/chat";
+import {
+ ChatOptions,
+ getHeaders,
+ LLMApi,
+ LLMModel,
+ SpeechOptions,
+} from "../api";
+import { getClientConfig } from "@/app/config/client";
+import {
+ getMessageTextContent,
+ getMessageTextContentWithoutThinking,
+ isVisionModel,
+ getTimeoutMSByModel,
+} from "@/app/utils";
+import { RequestPayload } from "./openai";
+
+import { fetch } from "@/app/utils/stream";
+export interface SiliconFlowListModelResponse {
+ object: string;
+ data: Array<{
+ id: string;
+ object: string;
+ root: string;
+ }>;
+}
+
+export class SiliconflowApi implements LLMApi {
+ private disableListModels = false;
+
+ path(path: string): string {
+ const accessStore = useAccessStore.getState();
+
+ let baseUrl = "";
+
+ if (accessStore.useCustomConfig) {
+ baseUrl = accessStore.siliconflowUrl;
+ }
+
+ if (baseUrl.length === 0) {
+ const isApp = !!getClientConfig()?.isApp;
+ const apiPath = ApiPath.SiliconFlow;
+ baseUrl = isApp ? SILICONFLOW_BASE_URL : apiPath;
+ }
+
+ if (baseUrl.endsWith("/")) {
+ baseUrl = baseUrl.slice(0, baseUrl.length - 1);
+ }
+ if (
+ !baseUrl.startsWith("http") &&
+ !baseUrl.startsWith(ApiPath.SiliconFlow)
+ ) {
+ baseUrl = "https://" + baseUrl;
+ }
+
+ console.log("[Proxy Endpoint] ", baseUrl, path);
+
+ return [baseUrl, path].join("/");
+ }
+
+ extractMessage(res: any) {
+ return res.choices?.at(0)?.message?.content ?? "";
+ }
+
+ speech(options: SpeechOptions): Promise