feat: gpt-image-1

This commit is contained in:
Sh1n3zZ 2025-04-29 21:08:28 +08:00
parent f787f3513a
commit 6ff57f1231
No known key found for this signature in database
GPG Key ID: 696702CF723B0452
8 changed files with 116 additions and 44 deletions

View File

@ -20,36 +20,41 @@ func (c *ChatInstance) GetImageEndpoint(model string) string {
return fmt.Sprintf("%s/openai/deployments/%s/images/generations?api-version=%s", c.GetResource(), model, c.GetEndpoint())
}
// CreateImageRequest will create a dalle image from prompt, return url of image and error
func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, error) {
// CreateImageRequest will create a dalle image from prompt, return url of image, base64 data and error
func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, string, error) {
res, err := utils.Post(
c.GetImageEndpoint(props.Model),
c.GetHeader(), ImageRequest{
Prompt: props.Prompt,
Size: utils.Multi[ImageSize](
props.Model == globals.Dalle3,
props.Model == globals.Dalle3 || props.Model == globals.GPTImage1,
ImageSize1024,
ImageSize512,
),
N: 1,
}, props.Proxy)
if err != nil || res == nil {
return "", fmt.Errorf("openai error: %s", err.Error())
return "", "", fmt.Errorf("openai error: %s", err.Error())
}
data := utils.MapToStruct[ImageResponse](res)
if data == nil {
return "", fmt.Errorf("openai error: cannot parse response")
return "", "", fmt.Errorf("openai error: cannot parse response")
} else if data.Error.Message != "" {
return "", fmt.Errorf("openai error: %s", data.Error.Message)
return "", "", fmt.Errorf("openai error: %s", data.Error.Message)
}
return data.Data[0].Url, nil
// for gpt-image-1, return base64 data if available
if props.Model == globals.GPTImage1 && data.Data[0].B64Json != "" {
return "", data.Data[0].B64Json, nil
}
return data.Data[0].Url, "", nil
}
// CreateImage will create a dalle image from prompt, return markdown of image
func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, error) {
url, err := c.CreateImageRequest(ImageProps{
url, b64Json, err := c.CreateImageRequest(ImageProps{
Model: props.Model,
Prompt: c.GetLatestPrompt(props),
Proxy: props.Proxy,
@ -61,5 +66,9 @@ func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, erro
return "", err
}
if b64Json != "" {
return utils.GetBase64ImageMarkdown(b64Json), nil
}
return utils.GetImageMarkdown(url), nil
}

View File

@ -29,6 +29,7 @@ type ChatRequest struct {
Model string `json:"model"`
Messages interface{} `json:"messages"`
MaxToken *int `json:"max_tokens,omitempty"`
MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"`
Stream bool `json:"stream"`
PresencePenalty *float32 `json:"presence_penalty,omitempty"`
FrequencyPenalty *float32 `json:"frequency_penalty,omitempty"`
@ -106,11 +107,21 @@ type ImageRequest struct {
type ImageResponse struct {
Data []struct {
Url string `json:"url"`
Url string `json:"url,omitempty"`
B64Json string `json:"b64_json,omitempty"`
} `json:"data"`
Error struct {
Message string `json:"message"`
} `json:"error"`
Usage *struct {
InputTokens int `json:"input_tokens"`
InputTokensDetails struct {
ImageTokens int `json:"image_tokens"`
TextTokens int `json:"text_tokens"`
} `json:"input_tokens_details"`
OutputTokens int `json:"output_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage,omitempty"`
}
var (

View File

@ -19,37 +19,42 @@ func (c *ChatInstance) GetImageEndpoint() string {
return fmt.Sprintf("%s/v1/images/generations", c.GetEndpoint())
}
// CreateImageRequest will create a dalle image from prompt, return url of image and error
func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, error) {
// CreateImageRequest will create a dalle image from prompt, return url of image, base64 data and error
func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, string, error) {
res, err := utils.Post(
c.GetImageEndpoint(),
c.GetHeader(), ImageRequest{
Model: props.Model,
Prompt: props.Prompt,
Size: utils.Multi[ImageSize](
props.Model == globals.Dalle3,
props.Model == globals.Dalle3 || props.Model == globals.GPTImage1,
ImageSize1024,
ImageSize512,
),
N: 1,
}, props.Proxy)
if err != nil || res == nil {
return "", fmt.Errorf(err.Error())
return "", "", fmt.Errorf(err.Error())
}
data := utils.MapToStruct[ImageResponse](res)
if data == nil {
return "", fmt.Errorf("openai error: cannot parse response")
return "", "", fmt.Errorf("openai error: cannot parse response")
} else if data.Error.Message != "" {
return "", fmt.Errorf(data.Error.Message)
return "", "", fmt.Errorf(data.Error.Message)
}
return data.Data[0].Url, nil
// for gpt-image-1, return base64 data if available
if props.Model == globals.GPTImage1 && data.Data[0].B64Json != "" {
return "", data.Data[0].B64Json, nil
}
return data.Data[0].Url, "", nil
}
// CreateImage will create a dalle image from prompt, return markdown of image
func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, error) {
original, err := c.CreateImageRequest(ImageProps{
url, b64Json, err := c.CreateImageRequest(ImageProps{
Model: props.Model,
Prompt: c.GetLatestPrompt(props),
Proxy: props.Proxy,
@ -61,6 +66,10 @@ func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, erro
return "", err
}
url := utils.StoreImage(original)
return utils.GetImageMarkdown(url), nil
if b64Json != "" {
return utils.GetBase64ImageMarkdown(b64Json), nil
}
storedUrl := utils.StoreImage(url)
return utils.GetImageMarkdown(storedUrl), nil
}

View File

@ -22,6 +22,7 @@ type Message struct {
FunctionCall *globals.FunctionCall `json:"function_call,omitempty"` // only `function` role
ToolCallId *string `json:"tool_call_id,omitempty"` // only `tool` role
ToolCalls *globals.ToolCalls `json:"tool_calls,omitempty"` // only `assistant` role
ReasoningContent *string `json:"reasoning,omitempty"` // only for claude reasoning models
}
// ChatRequest is the request body for openai
@ -107,7 +108,8 @@ type ImageRequest struct {
type ImageResponse struct {
Data []struct {
Url string `json:"url"`
Url string `json:"url,omitempty"`
B64Json string `json:"b64_json,omitempty"`
} `json:"data"`
Error struct {
Message string `json:"message"`

View File

@ -88,6 +88,7 @@ const (
GPT432k0613 = "gpt-4-32k-0613"
GPT4O = "gpt-4o"
GPT4O20240513 = "gpt-4o-2024-05-13"
GPTImage1 = "gpt-image-1"
Dalle = "dalle"
Dalle2 = "dall-e-2"
Dalle3 = "dall-e-3"
@ -147,7 +148,7 @@ const (
)
var OpenAIDalleModels = []string{
Dalle, Dalle2, Dalle3,
Dalle, Dalle2, Dalle3, GPTImage1,
}
var GoogleImagenModels = []string{

View File

@ -71,15 +71,20 @@ func getImageProps(form RelayImageForm, messages []globals.Message, buffer *util
}, buffer)
}
func getUrlFromBuffer(buffer *utils.Buffer) string {
func getImageDataFromBuffer(buffer *utils.Buffer) (string, string) {
content := buffer.Read()
urls := utils.ExtractImagesFromMarkdown(content)
if len(urls) > 0 {
return urls[len(urls)-1]
return urls[len(urls)-1], ""
}
return ""
base64Data := utils.ExtractBase64FromMarkdown(content)
if len(base64Data) > 0 {
return "", base64Data[len(base64Data)-1]
}
return "", ""
}
func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string, created int64, user *auth.User, plan bool) {
@ -112,8 +117,8 @@ func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string,
CollectQuota(c, user, buffer, plan, err)
}
image := getUrlFromBuffer(buffer)
if image == "" {
url, b64Json := getImageDataFromBuffer(buffer)
if url == "" && b64Json == "" {
sendErrorResponse(c, fmt.Errorf("no image generated"), "image_generation_error")
return
}
@ -122,7 +127,8 @@ func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string,
Created: created,
Data: []RelayImageData{
{
Url: image,
Url: url,
B64Json: b64Json,
},
},
})

View File

@ -108,7 +108,8 @@ type RelayImageForm struct {
}
type RelayImageData struct {
Url string `json:"url"`
Url string `json:"url,omitempty"`
B64Json string `json:"b64_json,omitempty"`
}
type RelayImageResponse struct {

View File

@ -147,6 +147,24 @@ func GetImageMarkdown(url string) string {
return fmt.Sprintf("![image](%s)", url)
}
func GetBase64ImageMarkdown(b64 string, _desc ...string) string {
// Extracts the image type from base64 string (e.g., "data:image/png;base64,...") or defaults to png
var imageType = "png"
if strings.HasPrefix(b64, "data:image/") {
parts := strings.Split(b64[11:], ";")
if len(parts) > 0 {
imageType = parts[0]
}
}
desc := "image"
if len(_desc) > 0 && _desc[0] != "" {
desc = _desc[0]
}
return fmt.Sprintf("![%s](data:image/%s;base64,%s)", desc, imageType, b64)
}
// SplitItem is the split function for strings.Split
// e.g.
// SplitItem("a,b,c", ",") => ["a,", "b,", "c"]
@ -233,6 +251,21 @@ func ExtractImagesFromMarkdown(data string) (images []string) {
return images
}
func ExtractBase64FromMarkdown(data string) (images []string) {
// extract base64 images like `![image]()`
re := regexp.MustCompile(`!\[.*?\]\((data:image/\w+;base64,[\w+/=]+)\)`)
matches := re.FindAllStringSubmatch(data, -1)
for _, match := range matches {
// We only need the base64 data part
if len(match) > 1 {
images = append(images, match[1])
}
}
return images
}
func ExtractBase64Images(data string) []string {
// get base64 images from data () (\n \\n [space] \\t \\r \\v \\f break the base64 string)
re := regexp.MustCompile(`(data:image/\w+;base64,[\w+/=]+)`)