diff --git a/adapter/azure/image.go b/adapter/azure/image.go index ec3efa0..6ff11cf 100644 --- a/adapter/azure/image.go +++ b/adapter/azure/image.go @@ -20,36 +20,41 @@ func (c *ChatInstance) GetImageEndpoint(model string) string { return fmt.Sprintf("%s/openai/deployments/%s/images/generations?api-version=%s", c.GetResource(), model, c.GetEndpoint()) } -// CreateImageRequest will create a dalle image from prompt, return url of image and error -func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, error) { +// CreateImageRequest will create a dalle image from prompt, return url of image, base64 data and error +func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, string, error) { res, err := utils.Post( c.GetImageEndpoint(props.Model), c.GetHeader(), ImageRequest{ Prompt: props.Prompt, Size: utils.Multi[ImageSize]( - props.Model == globals.Dalle3, + props.Model == globals.Dalle3 || props.Model == globals.GPTImage1, ImageSize1024, ImageSize512, ), N: 1, }, props.Proxy) if err != nil || res == nil { - return "", fmt.Errorf("openai error: %s", err.Error()) + return "", "", fmt.Errorf("openai error: %s", err.Error()) } data := utils.MapToStruct[ImageResponse](res) if data == nil { - return "", fmt.Errorf("openai error: cannot parse response") + return "", "", fmt.Errorf("openai error: cannot parse response") } else if data.Error.Message != "" { - return "", fmt.Errorf("openai error: %s", data.Error.Message) + return "", "", fmt.Errorf("openai error: %s", data.Error.Message) } - return data.Data[0].Url, nil + // for gpt-image-1, return base64 data if available + if props.Model == globals.GPTImage1 && data.Data[0].B64Json != "" { + return "", data.Data[0].B64Json, nil + } + + return data.Data[0].Url, "", nil } // CreateImage will create a dalle image from prompt, return markdown of image func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, error) { - url, err := c.CreateImageRequest(ImageProps{ + url, b64Json, err := c.CreateImageRequest(ImageProps{ Model: props.Model, Prompt: c.GetLatestPrompt(props), Proxy: props.Proxy, @@ -61,5 +66,9 @@ func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, erro return "", err } + if b64Json != "" { + return utils.GetBase64ImageMarkdown(b64Json), nil + } + return utils.GetImageMarkdown(url), nil } diff --git a/adapter/azure/types.go b/adapter/azure/types.go index 96ed1e4..b895550 100644 --- a/adapter/azure/types.go +++ b/adapter/azure/types.go @@ -26,16 +26,17 @@ type Message struct { // ChatRequest is the request body for openai type ChatRequest struct { - Model string `json:"model"` - Messages interface{} `json:"messages"` - MaxToken *int `json:"max_tokens,omitempty"` - Stream bool `json:"stream"` - PresencePenalty *float32 `json:"presence_penalty,omitempty"` - FrequencyPenalty *float32 `json:"frequency_penalty,omitempty"` - Temperature *float32 `json:"temperature,omitempty"` - TopP *float32 `json:"top_p,omitempty"` - Tools *globals.FunctionTools `json:"tools,omitempty"` - ToolChoice *interface{} `json:"tool_choice,omitempty"` // string or object + Model string `json:"model"` + Messages interface{} `json:"messages"` + MaxToken *int `json:"max_tokens,omitempty"` + MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"` + Stream bool `json:"stream"` + PresencePenalty *float32 `json:"presence_penalty,omitempty"` + FrequencyPenalty *float32 `json:"frequency_penalty,omitempty"` + Temperature *float32 `json:"temperature,omitempty"` + TopP *float32 `json:"top_p,omitempty"` + Tools *globals.FunctionTools `json:"tools,omitempty"` + ToolChoice *interface{} `json:"tool_choice,omitempty"` // string or object } // CompletionRequest is the request body for openai completion @@ -106,11 +107,21 @@ type ImageRequest struct { type ImageResponse struct { Data []struct { - Url string `json:"url"` + Url string `json:"url,omitempty"` + B64Json string `json:"b64_json,omitempty"` } `json:"data"` Error struct { Message string `json:"message"` } `json:"error"` + Usage *struct { + InputTokens int `json:"input_tokens"` + InputTokensDetails struct { + ImageTokens int `json:"image_tokens"` + TextTokens int `json:"text_tokens"` + } `json:"input_tokens_details"` + OutputTokens int `json:"output_tokens"` + TotalTokens int `json:"total_tokens"` + } `json:"usage,omitempty"` } var ( diff --git a/adapter/openai/image.go b/adapter/openai/image.go index 93c0edd..a32b8cd 100644 --- a/adapter/openai/image.go +++ b/adapter/openai/image.go @@ -19,37 +19,42 @@ func (c *ChatInstance) GetImageEndpoint() string { return fmt.Sprintf("%s/v1/images/generations", c.GetEndpoint()) } -// CreateImageRequest will create a dalle image from prompt, return url of image and error -func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, error) { +// CreateImageRequest will create a dalle image from prompt, return url of image, base64 data and error +func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, string, error) { res, err := utils.Post( c.GetImageEndpoint(), c.GetHeader(), ImageRequest{ Model: props.Model, Prompt: props.Prompt, Size: utils.Multi[ImageSize]( - props.Model == globals.Dalle3, + props.Model == globals.Dalle3 || props.Model == globals.GPTImage1, ImageSize1024, ImageSize512, ), N: 1, }, props.Proxy) if err != nil || res == nil { - return "", fmt.Errorf(err.Error()) + return "", "", fmt.Errorf(err.Error()) } data := utils.MapToStruct[ImageResponse](res) if data == nil { - return "", fmt.Errorf("openai error: cannot parse response") + return "", "", fmt.Errorf("openai error: cannot parse response") } else if data.Error.Message != "" { - return "", fmt.Errorf(data.Error.Message) + return "", "", fmt.Errorf(data.Error.Message) } - return data.Data[0].Url, nil + // for gpt-image-1, return base64 data if available + if props.Model == globals.GPTImage1 && data.Data[0].B64Json != "" { + return "", data.Data[0].B64Json, nil + } + + return data.Data[0].Url, "", nil } // CreateImage will create a dalle image from prompt, return markdown of image func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, error) { - original, err := c.CreateImageRequest(ImageProps{ + url, b64Json, err := c.CreateImageRequest(ImageProps{ Model: props.Model, Prompt: c.GetLatestPrompt(props), Proxy: props.Proxy, @@ -61,6 +66,10 @@ func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, erro return "", err } - url := utils.StoreImage(original) - return utils.GetImageMarkdown(url), nil + if b64Json != "" { + return utils.GetBase64ImageMarkdown(b64Json), nil + } + + storedUrl := utils.StoreImage(url) + return utils.GetImageMarkdown(storedUrl), nil } diff --git a/adapter/openai/types.go b/adapter/openai/types.go index dfe2247..9a08d4e 100644 --- a/adapter/openai/types.go +++ b/adapter/openai/types.go @@ -16,12 +16,13 @@ type MessageContent struct { type MessageContents []MessageContent type Message struct { - Role string `json:"role"` - Content MessageContents `json:"content"` - Name *string `json:"name,omitempty"` - FunctionCall *globals.FunctionCall `json:"function_call,omitempty"` // only `function` role - ToolCallId *string `json:"tool_call_id,omitempty"` // only `tool` role - ToolCalls *globals.ToolCalls `json:"tool_calls,omitempty"` // only `assistant` role + Role string `json:"role"` + Content MessageContents `json:"content"` + Name *string `json:"name,omitempty"` + FunctionCall *globals.FunctionCall `json:"function_call,omitempty"` // only `function` role + ToolCallId *string `json:"tool_call_id,omitempty"` // only `tool` role + ToolCalls *globals.ToolCalls `json:"tool_calls,omitempty"` // only `assistant` role + ReasoningContent *string `json:"reasoning,omitempty"` // only for claude reasoning models } // ChatRequest is the request body for openai @@ -107,7 +108,8 @@ type ImageRequest struct { type ImageResponse struct { Data []struct { - Url string `json:"url"` + Url string `json:"url,omitempty"` + B64Json string `json:"b64_json,omitempty"` } `json:"data"` Error struct { Message string `json:"message"` diff --git a/globals/variables.go b/globals/variables.go index 9461c32..804c38e 100644 --- a/globals/variables.go +++ b/globals/variables.go @@ -88,6 +88,7 @@ const ( GPT432k0613 = "gpt-4-32k-0613" GPT4O = "gpt-4o" GPT4O20240513 = "gpt-4o-2024-05-13" + GPTImage1 = "gpt-image-1" Dalle = "dalle" Dalle2 = "dall-e-2" Dalle3 = "dall-e-3" @@ -147,7 +148,7 @@ const ( ) var OpenAIDalleModels = []string{ - Dalle, Dalle2, Dalle3, + Dalle, Dalle2, Dalle3, GPTImage1, } var GoogleImagenModels = []string{ diff --git a/manager/images.go b/manager/images.go index 3317a1f..7c07990 100644 --- a/manager/images.go +++ b/manager/images.go @@ -71,15 +71,20 @@ func getImageProps(form RelayImageForm, messages []globals.Message, buffer *util }, buffer) } -func getUrlFromBuffer(buffer *utils.Buffer) string { +func getImageDataFromBuffer(buffer *utils.Buffer) (string, string) { content := buffer.Read() urls := utils.ExtractImagesFromMarkdown(content) if len(urls) > 0 { - return urls[len(urls)-1] + return urls[len(urls)-1], "" } - return "" + base64Data := utils.ExtractBase64FromMarkdown(content) + if len(base64Data) > 0 { + return "", base64Data[len(base64Data)-1] + } + + return "", "" } func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string, created int64, user *auth.User, plan bool) { @@ -112,8 +117,8 @@ func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string, CollectQuota(c, user, buffer, plan, err) } - image := getUrlFromBuffer(buffer) - if image == "" { + url, b64Json := getImageDataFromBuffer(buffer) + if url == "" && b64Json == "" { sendErrorResponse(c, fmt.Errorf("no image generated"), "image_generation_error") return } @@ -122,7 +127,8 @@ func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string, Created: created, Data: []RelayImageData{ { - Url: image, + Url: url, + B64Json: b64Json, }, }, }) diff --git a/manager/types.go b/manager/types.go index 815c2fc..f00d89a 100644 --- a/manager/types.go +++ b/manager/types.go @@ -108,7 +108,8 @@ type RelayImageForm struct { } type RelayImageData struct { - Url string `json:"url"` + Url string `json:"url,omitempty"` + B64Json string `json:"b64_json,omitempty"` } type RelayImageResponse struct { diff --git a/utils/char.go b/utils/char.go index 9ccff39..afaacea 100644 --- a/utils/char.go +++ b/utils/char.go @@ -147,6 +147,24 @@ func GetImageMarkdown(url string) string { return fmt.Sprintf("![image](%s)", url) } +func GetBase64ImageMarkdown(b64 string, _desc ...string) string { + // Extracts the image type from base64 string (e.g., "data:image/png;base64,...") or defaults to png + var imageType = "png" + if strings.HasPrefix(b64, "data:image/") { + parts := strings.Split(b64[11:], ";") + if len(parts) > 0 { + imageType = parts[0] + } + } + + desc := "image" + if len(_desc) > 0 && _desc[0] != "" { + desc = _desc[0] + } + + return fmt.Sprintf("![%s](data:image/%s;base64,%s)", desc, imageType, b64) +} + // SplitItem is the split function for strings.Split // e.g. // SplitItem("a,b,c", ",") => ["a,", "b,", "c"] @@ -233,6 +251,21 @@ func ExtractImagesFromMarkdown(data string) (images []string) { return images } +func ExtractBase64FromMarkdown(data string) (images []string) { + // extract base64 images like `![image]()` + re := regexp.MustCompile(`!\[.*?\]\((data:image/\w+;base64,[\w+/=]+)\)`) + matches := re.FindAllStringSubmatch(data, -1) + + for _, match := range matches { + // We only need the base64 data part + if len(match) > 1 { + images = append(images, match[1]) + } + } + + return images +} + func ExtractBase64Images(data string) []string { // get base64 images from data () (\n \\n [space] \\t \\r \\v \\f break the base64 string) re := regexp.MustCompile(`(data:image/\w+;base64,[\w+/=]+)`)