feat: api supports image input

This commit is contained in:
Hk-Gosuto 2023-12-05 13:44:15 +08:00
parent 4c46de7d1d
commit 7f3d261fb2
4 changed files with 40 additions and 11 deletions

View File

@ -74,10 +74,28 @@ export class ChatGPTApi implements LLMApi {
}
async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
role: v.role,
content: v.content,
}));
const messages = options.messages.map((v) => {
let message: {
role: string;
content: { type: string; text?: string; image_url?: { url: string } }[];
} = {
role: v.role,
content: [],
};
message.content.push({
type: "text",
text: v.content,
});
if (v.image_url) {
message.content.push({
type: "image_url",
image_url: {
url: v.image_url,
},
});
}
return message;
});
const modelConfig = {
...useAppConfig.getState().modelConfig,
@ -95,6 +113,10 @@ export class ChatGPTApi implements LLMApi {
presence_penalty: modelConfig.presence_penalty,
frequency_penalty: modelConfig.frequency_penalty,
top_p: modelConfig.top_p,
max_tokens:
modelConfig.model == "gpt-4-vision-preview"
? modelConfig.max_tokens
: null,
// max_tokens: Math.max(modelConfig.max_tokens, 1024),
// Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
};

View File

@ -80,6 +80,7 @@ import {
import { useNavigate } from "react-router-dom";
import {
CHAT_PAGE_SIZE,
LAST_INPUT_IMAGE_KEY,
LAST_INPUT_KEY,
Path,
REQUEST_TIMEOUT_MS,
@ -554,7 +555,8 @@ export function ChatActions(props: {
/>
{config.pluginConfig.enable &&
/^gpt(?!.*03\d{2}$).*$/.test(currentModel) && (
/^gpt(?!.*03\d{2}$).*$/.test(currentModel) &&
currentModel != "gpt-4-vision-preview" && (
<ChatAction
onClick={switchUsePlugins}
text={
@ -778,8 +780,11 @@ function _Chat() {
return;
}
setIsLoading(true);
chatStore.onUserInput(userInput).then(() => setIsLoading(false));
chatStore
.onUserInput(userInput, userImage.base64)
.then(() => setIsLoading(false));
localStorage.setItem(LAST_INPUT_KEY, userInput);
localStorage.setItem(LAST_INPUT_IMAGE_KEY, userImage);
setUserInput("");
setPromptHints([]);
setUserImage(null);
@ -847,6 +852,7 @@ function _Chat() {
!(e.metaKey || e.altKey || e.ctrlKey)
) {
setUserInput(localStorage.getItem(LAST_INPUT_KEY) ?? "");
setUserImage(localStorage.getItem(LAST_INPUT_IMAGE_KEY));
e.preventDefault();
return;
}
@ -1331,7 +1337,7 @@ function _Chat() {
)}
<div className={styles["chat-message-item"]}>
<Markdown
imageBase64={isUser && userImage && userImage.base64}
imageBase64={message.image_url}
content={message.content}
loading={
(message.preview || message.streaming) &&

View File

@ -57,6 +57,7 @@ export const NARROW_SIDEBAR_WIDTH = 100;
export const ACCESS_CODE_PREFIX = "nk-";
export const LAST_INPUT_KEY = "last-input";
export const LAST_INPUT_IMAGE_KEY = "last-input-image";
export const UNFINISHED_INPUT = (id: string) => "unfinished-input-" + id;
export const STORAGE_KEY = "chatgpt-next-web";

View File

@ -274,7 +274,7 @@ export const useChatStore = createPersistStore(
get().summarizeSession();
},
async onUserInput(content: string) {
async onUserInput(content: string, image_url?: string) {
const session = get().currentSession();
const modelConfig = session.mask.modelConfig;
@ -284,8 +284,8 @@ export const useChatStore = createPersistStore(
const userMessage: ChatMessage = createMessage({
role: "user",
content: userContent,
image_url: image_url,
});
const botMessage: ChatMessage = createMessage({
role: "assistant",
streaming: true,
@ -319,11 +319,11 @@ export const useChatStore = createPersistStore(
session.messages.push(savedUserMessage);
session.messages.push(botMessage);
});
if (
config.pluginConfig.enable &&
session.mask.usePlugins &&
allPlugins.length > 0
allPlugins.length > 0 &&
modelConfig.model != "gpt-4-vision-preview"
) {
console.log("[ToolAgent] start");
const pluginToolNames = allPlugins.map((m) => m.toolName);