feat: api supports image input

2025-05-25 23:20:19 +09:00 · 2023-12-05 13:44:15 +08:00 · 2023-12-05 13:44:15 +08:00 · 7f3d261fb2
commit 7f3d261fb2
parent 4c46de7d1d
4 changed files with 40 additions and 11 deletions
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@ -74,10 +74,28 @@ export class ChatGPTApi implements LLMApi {
  }
  async chat(options: ChatOptions) {
-    const messages = options.messages.map((v) => ({
+    const messages = options.messages.map((v) => {
      let message: {
        role: string;
        content: { type: string; text?: string; image_url?: { url: string } }[];
      } = {
        role: v.role,
-      content: v.content,
+        content: [],
-    }));
+      };
      message.content.push({
        type: "text",
        text: v.content,
      });
      if (v.image_url) {
        message.content.push({
          type: "image_url",
          image_url: {
            url: v.image_url,
          },
        });
      }
      return message;
    });
    const modelConfig = {
      ...useAppConfig.getState().modelConfig,
@ -95,6 +113,10 @@ export class ChatGPTApi implements LLMApi {
      presence_penalty: modelConfig.presence_penalty,
      frequency_penalty: modelConfig.frequency_penalty,
      top_p: modelConfig.top_p,
      max_tokens:
        modelConfig.model == "gpt-4-vision-preview"
          ? modelConfig.max_tokens
          : null,
      // max_tokens: Math.max(modelConfig.max_tokens, 1024),
      // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
    };
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@ -80,6 +80,7 @@ import {
 import { useNavigate } from "react-router-dom";
 import {
  CHAT_PAGE_SIZE,
  LAST_INPUT_IMAGE_KEY,
  LAST_INPUT_KEY,
  Path,
  REQUEST_TIMEOUT_MS,
@ -554,7 +555,8 @@ export function ChatActions(props: {
        />
        {config.pluginConfig.enable &&
-          /^gpt(?!.*03\d{2}$).*$/.test(currentModel) && (
+          /^gpt(?!.*03\d{2}$).*$/.test(currentModel) &&
          currentModel != "gpt-4-vision-preview" && (
            <ChatAction
              onClick={switchUsePlugins}
              text={
@ -778,8 +780,11 @@ function _Chat() {
      return;
    }
    setIsLoading(true);
-    chatStore.onUserInput(userInput).then(() => setIsLoading(false));
+    chatStore
      .onUserInput(userInput, userImage.base64)
      .then(() => setIsLoading(false));
    localStorage.setItem(LAST_INPUT_KEY, userInput);
    localStorage.setItem(LAST_INPUT_IMAGE_KEY, userImage);
    setUserInput("");
    setPromptHints([]);
    setUserImage(null);
@ -847,6 +852,7 @@ function _Chat() {
      !(e.metaKey || e.altKey || e.ctrlKey)
    ) {
      setUserInput(localStorage.getItem(LAST_INPUT_KEY) ?? "");
      setUserImage(localStorage.getItem(LAST_INPUT_IMAGE_KEY));
      e.preventDefault();
      return;
    }
@ -1331,7 +1337,7 @@ function _Chat() {
                  )}
                  <div className={styles["chat-message-item"]}>
                    <Markdown
-                      imageBase64={isUser && userImage && userImage.base64}
+                      imageBase64={message.image_url}
                      content={message.content}
                      loading={
                        (message.preview || message.streaming) &&
--- a/app/constant.ts
+++ b/app/constant.ts
@ -57,6 +57,7 @@ export const NARROW_SIDEBAR_WIDTH = 100;
 export const ACCESS_CODE_PREFIX = "nk-";
 export const LAST_INPUT_KEY = "last-input";
 export const LAST_INPUT_IMAGE_KEY = "last-input-image";
 export const UNFINISHED_INPUT = (id: string) => "unfinished-input-" + id;
 export const STORAGE_KEY = "chatgpt-next-web";
--- a/app/store/chat.ts
+++ b/app/store/chat.ts
@ -274,7 +274,7 @@ export const useChatStore = createPersistStore(
        get().summarizeSession();
      },
-      async onUserInput(content: string) {
+      async onUserInput(content: string, image_url?: string) {
        const session = get().currentSession();
        const modelConfig = session.mask.modelConfig;
@ -284,8 +284,8 @@ export const useChatStore = createPersistStore(
        const userMessage: ChatMessage = createMessage({
          role: "user",
          content: userContent,
          image_url: image_url,
        });
        const botMessage: ChatMessage = createMessage({
          role: "assistant",
          streaming: true,
@ -319,11 +319,11 @@ export const useChatStore = createPersistStore(
          session.messages.push(savedUserMessage);
          session.messages.push(botMessage);
        });
        if (
          config.pluginConfig.enable &&
          session.mask.usePlugins &&
-          allPlugins.length > 0
+          allPlugins.length > 0 &&
          modelConfig.model != "gpt-4-vision-preview"
        ) {
          console.log("[ToolAgent] start");
          const pluginToolNames = allPlugins.map((m) => m.toolName);