feat: api supports image input

This commit is contained in:
Hk-Gosuto 2023-12-05 13:44:15 +08:00
parent 4c46de7d1d
commit 7f3d261fb2
4 changed files with 40 additions and 11 deletions

View File

@ -74,10 +74,28 @@ export class ChatGPTApi implements LLMApi {
} }
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({ const messages = options.messages.map((v) => {
let message: {
role: string;
content: { type: string; text?: string; image_url?: { url: string } }[];
} = {
role: v.role, role: v.role,
content: v.content, content: [],
})); };
message.content.push({
type: "text",
text: v.content,
});
if (v.image_url) {
message.content.push({
type: "image_url",
image_url: {
url: v.image_url,
},
});
}
return message;
});
const modelConfig = { const modelConfig = {
...useAppConfig.getState().modelConfig, ...useAppConfig.getState().modelConfig,
@ -95,6 +113,10 @@ export class ChatGPTApi implements LLMApi {
presence_penalty: modelConfig.presence_penalty, presence_penalty: modelConfig.presence_penalty,
frequency_penalty: modelConfig.frequency_penalty, frequency_penalty: modelConfig.frequency_penalty,
top_p: modelConfig.top_p, top_p: modelConfig.top_p,
max_tokens:
modelConfig.model == "gpt-4-vision-preview"
? modelConfig.max_tokens
: null,
// max_tokens: Math.max(modelConfig.max_tokens, 1024), // max_tokens: Math.max(modelConfig.max_tokens, 1024),
// Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore. // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
}; };

View File

@ -80,6 +80,7 @@ import {
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import { import {
CHAT_PAGE_SIZE, CHAT_PAGE_SIZE,
LAST_INPUT_IMAGE_KEY,
LAST_INPUT_KEY, LAST_INPUT_KEY,
Path, Path,
REQUEST_TIMEOUT_MS, REQUEST_TIMEOUT_MS,
@ -554,7 +555,8 @@ export function ChatActions(props: {
/> />
{config.pluginConfig.enable && {config.pluginConfig.enable &&
/^gpt(?!.*03\d{2}$).*$/.test(currentModel) && ( /^gpt(?!.*03\d{2}$).*$/.test(currentModel) &&
currentModel != "gpt-4-vision-preview" && (
<ChatAction <ChatAction
onClick={switchUsePlugins} onClick={switchUsePlugins}
text={ text={
@ -778,8 +780,11 @@ function _Chat() {
return; return;
} }
setIsLoading(true); setIsLoading(true);
chatStore.onUserInput(userInput).then(() => setIsLoading(false)); chatStore
.onUserInput(userInput, userImage.base64)
.then(() => setIsLoading(false));
localStorage.setItem(LAST_INPUT_KEY, userInput); localStorage.setItem(LAST_INPUT_KEY, userInput);
localStorage.setItem(LAST_INPUT_IMAGE_KEY, userImage);
setUserInput(""); setUserInput("");
setPromptHints([]); setPromptHints([]);
setUserImage(null); setUserImage(null);
@ -847,6 +852,7 @@ function _Chat() {
!(e.metaKey || e.altKey || e.ctrlKey) !(e.metaKey || e.altKey || e.ctrlKey)
) { ) {
setUserInput(localStorage.getItem(LAST_INPUT_KEY) ?? ""); setUserInput(localStorage.getItem(LAST_INPUT_KEY) ?? "");
setUserImage(localStorage.getItem(LAST_INPUT_IMAGE_KEY));
e.preventDefault(); e.preventDefault();
return; return;
} }
@ -1331,7 +1337,7 @@ function _Chat() {
)} )}
<div className={styles["chat-message-item"]}> <div className={styles["chat-message-item"]}>
<Markdown <Markdown
imageBase64={isUser && userImage && userImage.base64} imageBase64={message.image_url}
content={message.content} content={message.content}
loading={ loading={
(message.preview || message.streaming) && (message.preview || message.streaming) &&

View File

@ -57,6 +57,7 @@ export const NARROW_SIDEBAR_WIDTH = 100;
export const ACCESS_CODE_PREFIX = "nk-"; export const ACCESS_CODE_PREFIX = "nk-";
export const LAST_INPUT_KEY = "last-input"; export const LAST_INPUT_KEY = "last-input";
export const LAST_INPUT_IMAGE_KEY = "last-input-image";
export const UNFINISHED_INPUT = (id: string) => "unfinished-input-" + id; export const UNFINISHED_INPUT = (id: string) => "unfinished-input-" + id;
export const STORAGE_KEY = "chatgpt-next-web"; export const STORAGE_KEY = "chatgpt-next-web";

View File

@ -274,7 +274,7 @@ export const useChatStore = createPersistStore(
get().summarizeSession(); get().summarizeSession();
}, },
async onUserInput(content: string) { async onUserInput(content: string, image_url?: string) {
const session = get().currentSession(); const session = get().currentSession();
const modelConfig = session.mask.modelConfig; const modelConfig = session.mask.modelConfig;
@ -284,8 +284,8 @@ export const useChatStore = createPersistStore(
const userMessage: ChatMessage = createMessage({ const userMessage: ChatMessage = createMessage({
role: "user", role: "user",
content: userContent, content: userContent,
image_url: image_url,
}); });
const botMessage: ChatMessage = createMessage({ const botMessage: ChatMessage = createMessage({
role: "assistant", role: "assistant",
streaming: true, streaming: true,
@ -319,11 +319,11 @@ export const useChatStore = createPersistStore(
session.messages.push(savedUserMessage); session.messages.push(savedUserMessage);
session.messages.push(botMessage); session.messages.push(botMessage);
}); });
if ( if (
config.pluginConfig.enable && config.pluginConfig.enable &&
session.mask.usePlugins && session.mask.usePlugins &&
allPlugins.length > 0 allPlugins.length > 0 &&
modelConfig.model != "gpt-4-vision-preview"
) { ) {
console.log("[ToolAgent] start"); console.log("[ToolAgent] start");
const pluginToolNames = allPlugins.map((m) => m.toolName); const pluginToolNames = allPlugins.map((m) => m.toolName);