feat(alibaba): Added alibaba vision model and omni model support

2025-05-19 20:20:16 +09:00 · 2025-02-24 20:18:07 +08:00 · 2025-02-24 20:18:07 +08:00 · b709ee3983
commit b709ee3983
parent f5f3ce94f6
4 changed files with 62 additions and 13 deletions
--- a/app/client/api.ts
+++ b/app/client/api.ts
@ -40,6 +40,11 @@ export interface MultimodalContent {
  };
 }
 export interface MultimodalContentForAlibaba {
  text?: string;
  image?: string;
 }
 export interface RequestMessage {
  role: MessageRole;
  content: string | MultimodalContent[];
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@ -7,7 +7,10 @@ import {
  ChatMessageTool,
  usePluginStore,
 } from "@/app/store";
-import { streamWithThink } from "@/app/utils/chat";
+import {
  preProcessImageContentForAlibabaDashScope,
  streamWithThink,
 } from "@/app/utils/chat";
 import {
  ChatOptions,
  getHeaders,
@ -15,12 +18,14 @@ import {
  LLMModel,
  SpeechOptions,
  MultimodalContent,
  MultimodalContentForAlibaba,
 } from "../api";
 import { getClientConfig } from "@/app/config/client";
 import {
  getMessageTextContent,
  getMessageTextContentWithoutThinking,
  getTimeoutMSByModel,
  isVisionModel,
 } from "@/app/utils";
 import { fetch } from "@/app/utils/stream";
@ -89,14 +94,6 @@ export class QwenApi implements LLMApi {
  }
  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
      role: v.role,
      content:
        v.role === "assistant"
          ? getMessageTextContentWithoutThinking(v)
          : getMessageTextContent(v),
    }));
    const modelConfig = {
      ...useAppConfig.getState().modelConfig,
      ...useChatStore.getState().currentSession().mask.modelConfig,
@ -105,6 +102,21 @@ export class QwenApi implements LLMApi {
      },
    };
    const visionModel = isVisionModel(options.config.model);
    const messages: ChatOptions["messages"] = [];
    for (const v of options.messages) {
      const content = (
        visionModel
          ? await preProcessImageContentForAlibabaDashScope(v.content)
          : v.role === "assistant"
          ? getMessageTextContentWithoutThinking(v)
          : getMessageTextContent(v)
      ) as any;
      messages.push({ role: v.role, content });
    }
    const shouldStream = !!options.config.stream;
    const requestPayload: RequestPayload = {
      model: modelConfig.model,
@ -129,7 +141,7 @@ export class QwenApi implements LLMApi {
        "X-DashScope-SSE": shouldStream ? "enable" : "disable",
      };
-      const chatPath = this.path(Alibaba.ChatPath);
+      const chatPath = this.path(Alibaba.ChatPath(modelConfig.model));
      const chatPayload = {
        method: "POST",
        body: JSON.stringify(requestPayload),
@ -162,7 +174,7 @@ export class QwenApi implements LLMApi {
            const json = JSON.parse(text);
            const choices = json.output.choices as Array<{
              message: {
-                content: string | null;
+                content: string | null | MultimodalContentForAlibaba[];
                tool_calls: ChatMessageTool[];
                reasoning_content: string | null;
              };
@ -212,7 +224,9 @@ export class QwenApi implements LLMApi {
            } else if (content && content.length > 0) {
              return {
                isThinking: false,
-                content: content,
+                content: Array.isArray(content)
                  ? content.map((item) => item.text).join(",")
                  : content,
              };
            }
--- a/app/constant.ts
+++ b/app/constant.ts
@ -221,7 +221,12 @@ export const ByteDance = {
 export const Alibaba = {
  ExampleEndpoint: ALIBABA_BASE_URL,
-  ChatPath: "v1/services/aigc/text-generation/generation",
+  ChatPath: (modelName: string) => {
    if (modelName.includes("vl") || modelName.includes("omni")) {
      return "v1/services/aigc/multimodal-generation/generation";
    }
    return `v1/services/aigc/text-generation/generation`;
  },
 };
 export const Tencent = {
@ -568,6 +573,9 @@ const alibabaModes = [
  "qwen-max-0403",
  "qwen-max-0107",
  "qwen-max-longcontext",
  "qwen-omni-turbo",
  "qwen-vl-plus",
  "qwen-vl-max",
 ];
 const tencentModels = [
--- a/app/utils/chat.ts
+++ b/app/utils/chat.ts
@ -92,6 +92,28 @@ export async function preProcessImageContent(
  return result;
 }
 export async function preProcessImageContentForAlibabaDashScope(
  content: RequestMessage["content"],
 ) {
  if (typeof content === "string") {
    return content;
  }
  const result = [];
  for (const part of content) {
    if (part?.type == "image_url" && part?.image_url?.url) {
      try {
        const url = await cacheImageToBase64Image(part?.image_url?.url);
        result.push({ image: url });
      } catch (error) {
        console.error("Error processing image URL:", error);
      }
    } else {
      result.push({ ...part });
    }
  }
  return result;
 }
 const imageCaches: Record<string, string> = {};
 export function cacheImageToBase64Image(imageUrl: string) {
  if (imageUrl.includes(CACHE_URL_PREFIX)) {