mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-05-19 04:00:16 +09:00
Merge pull request #6292 from Little-LittleProgrammer/feature/alibaba-omni-support
feat(alibaba): Added alibaba vision model and omni model support
This commit is contained in:
commit
f7cde17919
@ -40,6 +40,11 @@ export interface MultimodalContent {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface MultimodalContentForAlibaba {
|
||||||
|
text?: string;
|
||||||
|
image?: string;
|
||||||
|
}
|
||||||
|
|
||||||
export interface RequestMessage {
|
export interface RequestMessage {
|
||||||
role: MessageRole;
|
role: MessageRole;
|
||||||
content: string | MultimodalContent[];
|
content: string | MultimodalContent[];
|
||||||
|
@ -7,7 +7,10 @@ import {
|
|||||||
ChatMessageTool,
|
ChatMessageTool,
|
||||||
usePluginStore,
|
usePluginStore,
|
||||||
} from "@/app/store";
|
} from "@/app/store";
|
||||||
import { streamWithThink } from "@/app/utils/chat";
|
import {
|
||||||
|
preProcessImageContentForAlibabaDashScope,
|
||||||
|
streamWithThink,
|
||||||
|
} from "@/app/utils/chat";
|
||||||
import {
|
import {
|
||||||
ChatOptions,
|
ChatOptions,
|
||||||
getHeaders,
|
getHeaders,
|
||||||
@ -15,12 +18,14 @@ import {
|
|||||||
LLMModel,
|
LLMModel,
|
||||||
SpeechOptions,
|
SpeechOptions,
|
||||||
MultimodalContent,
|
MultimodalContent,
|
||||||
|
MultimodalContentForAlibaba,
|
||||||
} from "../api";
|
} from "../api";
|
||||||
import { getClientConfig } from "@/app/config/client";
|
import { getClientConfig } from "@/app/config/client";
|
||||||
import {
|
import {
|
||||||
getMessageTextContent,
|
getMessageTextContent,
|
||||||
getMessageTextContentWithoutThinking,
|
getMessageTextContentWithoutThinking,
|
||||||
getTimeoutMSByModel,
|
getTimeoutMSByModel,
|
||||||
|
isVisionModel,
|
||||||
} from "@/app/utils";
|
} from "@/app/utils";
|
||||||
import { fetch } from "@/app/utils/stream";
|
import { fetch } from "@/app/utils/stream";
|
||||||
|
|
||||||
@ -89,14 +94,6 @@ export class QwenApi implements LLMApi {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async chat(options: ChatOptions) {
|
async chat(options: ChatOptions) {
|
||||||
const messages = options.messages.map((v) => ({
|
|
||||||
role: v.role,
|
|
||||||
content:
|
|
||||||
v.role === "assistant"
|
|
||||||
? getMessageTextContentWithoutThinking(v)
|
|
||||||
: getMessageTextContent(v),
|
|
||||||
}));
|
|
||||||
|
|
||||||
const modelConfig = {
|
const modelConfig = {
|
||||||
...useAppConfig.getState().modelConfig,
|
...useAppConfig.getState().modelConfig,
|
||||||
...useChatStore.getState().currentSession().mask.modelConfig,
|
...useChatStore.getState().currentSession().mask.modelConfig,
|
||||||
@ -105,6 +102,21 @@ export class QwenApi implements LLMApi {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const visionModel = isVisionModel(options.config.model);
|
||||||
|
|
||||||
|
const messages: ChatOptions["messages"] = [];
|
||||||
|
for (const v of options.messages) {
|
||||||
|
const content = (
|
||||||
|
visionModel
|
||||||
|
? await preProcessImageContentForAlibabaDashScope(v.content)
|
||||||
|
: v.role === "assistant"
|
||||||
|
? getMessageTextContentWithoutThinking(v)
|
||||||
|
: getMessageTextContent(v)
|
||||||
|
) as any;
|
||||||
|
|
||||||
|
messages.push({ role: v.role, content });
|
||||||
|
}
|
||||||
|
|
||||||
const shouldStream = !!options.config.stream;
|
const shouldStream = !!options.config.stream;
|
||||||
const requestPayload: RequestPayload = {
|
const requestPayload: RequestPayload = {
|
||||||
model: modelConfig.model,
|
model: modelConfig.model,
|
||||||
@ -129,7 +141,7 @@ export class QwenApi implements LLMApi {
|
|||||||
"X-DashScope-SSE": shouldStream ? "enable" : "disable",
|
"X-DashScope-SSE": shouldStream ? "enable" : "disable",
|
||||||
};
|
};
|
||||||
|
|
||||||
const chatPath = this.path(Alibaba.ChatPath);
|
const chatPath = this.path(Alibaba.ChatPath(modelConfig.model));
|
||||||
const chatPayload = {
|
const chatPayload = {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
body: JSON.stringify(requestPayload),
|
body: JSON.stringify(requestPayload),
|
||||||
@ -162,7 +174,7 @@ export class QwenApi implements LLMApi {
|
|||||||
const json = JSON.parse(text);
|
const json = JSON.parse(text);
|
||||||
const choices = json.output.choices as Array<{
|
const choices = json.output.choices as Array<{
|
||||||
message: {
|
message: {
|
||||||
content: string | null;
|
content: string | null | MultimodalContentForAlibaba[];
|
||||||
tool_calls: ChatMessageTool[];
|
tool_calls: ChatMessageTool[];
|
||||||
reasoning_content: string | null;
|
reasoning_content: string | null;
|
||||||
};
|
};
|
||||||
@ -212,7 +224,9 @@ export class QwenApi implements LLMApi {
|
|||||||
} else if (content && content.length > 0) {
|
} else if (content && content.length > 0) {
|
||||||
return {
|
return {
|
||||||
isThinking: false,
|
isThinking: false,
|
||||||
content: content,
|
content: Array.isArray(content)
|
||||||
|
? content.map((item) => item.text).join(",")
|
||||||
|
: content,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -221,7 +221,12 @@ export const ByteDance = {
|
|||||||
|
|
||||||
export const Alibaba = {
|
export const Alibaba = {
|
||||||
ExampleEndpoint: ALIBABA_BASE_URL,
|
ExampleEndpoint: ALIBABA_BASE_URL,
|
||||||
ChatPath: "v1/services/aigc/text-generation/generation",
|
ChatPath: (modelName: string) => {
|
||||||
|
if (modelName.includes("vl") || modelName.includes("omni")) {
|
||||||
|
return "v1/services/aigc/multimodal-generation/generation";
|
||||||
|
}
|
||||||
|
return `v1/services/aigc/text-generation/generation`;
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
export const Tencent = {
|
export const Tencent = {
|
||||||
@ -570,6 +575,9 @@ const alibabaModes = [
|
|||||||
"qwen-max-0403",
|
"qwen-max-0403",
|
||||||
"qwen-max-0107",
|
"qwen-max-0107",
|
||||||
"qwen-max-longcontext",
|
"qwen-max-longcontext",
|
||||||
|
"qwen-omni-turbo",
|
||||||
|
"qwen-vl-plus",
|
||||||
|
"qwen-vl-max",
|
||||||
];
|
];
|
||||||
|
|
||||||
const tencentModels = [
|
const tencentModels = [
|
||||||
|
@ -3,7 +3,7 @@ import {
|
|||||||
UPLOAD_URL,
|
UPLOAD_URL,
|
||||||
REQUEST_TIMEOUT_MS,
|
REQUEST_TIMEOUT_MS,
|
||||||
} from "@/app/constant";
|
} from "@/app/constant";
|
||||||
import { RequestMessage } from "@/app/client/api";
|
import { MultimodalContent, RequestMessage } from "@/app/client/api";
|
||||||
import Locale from "@/app/locales";
|
import Locale from "@/app/locales";
|
||||||
import {
|
import {
|
||||||
EventStreamContentType,
|
EventStreamContentType,
|
||||||
@ -70,8 +70,9 @@ export function compressImage(file: Blob, maxSize: number): Promise<string> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function preProcessImageContent(
|
export async function preProcessImageContentBase(
|
||||||
content: RequestMessage["content"],
|
content: RequestMessage["content"],
|
||||||
|
transformImageUrl: (url: string) => Promise<{ [key: string]: any }>,
|
||||||
) {
|
) {
|
||||||
if (typeof content === "string") {
|
if (typeof content === "string") {
|
||||||
return content;
|
return content;
|
||||||
@ -81,7 +82,7 @@ export async function preProcessImageContent(
|
|||||||
if (part?.type == "image_url" && part?.image_url?.url) {
|
if (part?.type == "image_url" && part?.image_url?.url) {
|
||||||
try {
|
try {
|
||||||
const url = await cacheImageToBase64Image(part?.image_url?.url);
|
const url = await cacheImageToBase64Image(part?.image_url?.url);
|
||||||
result.push({ type: part.type, image_url: { url } });
|
result.push(await transformImageUrl(url));
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error processing image URL:", error);
|
console.error("Error processing image URL:", error);
|
||||||
}
|
}
|
||||||
@ -92,6 +93,23 @@ export async function preProcessImageContent(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function preProcessImageContent(
|
||||||
|
content: RequestMessage["content"],
|
||||||
|
) {
|
||||||
|
return preProcessImageContentBase(content, async (url) => ({
|
||||||
|
type: "image_url",
|
||||||
|
image_url: { url },
|
||||||
|
})) as Promise<MultimodalContent[] | string>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function preProcessImageContentForAlibabaDashScope(
|
||||||
|
content: RequestMessage["content"],
|
||||||
|
) {
|
||||||
|
return preProcessImageContentBase(content, async (url) => ({
|
||||||
|
image: url,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
const imageCaches: Record<string, string> = {};
|
const imageCaches: Record<string, string> = {};
|
||||||
export function cacheImageToBase64Image(imageUrl: string) {
|
export function cacheImageToBase64Image(imageUrl: string) {
|
||||||
if (imageUrl.includes(CACHE_URL_PREFIX)) {
|
if (imageUrl.includes(CACHE_URL_PREFIX)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user