mirror of
https://github.com/coaidev/coai.git
synced 2025-05-19 04:50:14 +09:00
feat: gpt-image-1
This commit is contained in:
parent
f787f3513a
commit
6ff57f1231
@ -20,36 +20,41 @@ func (c *ChatInstance) GetImageEndpoint(model string) string {
|
||||
return fmt.Sprintf("%s/openai/deployments/%s/images/generations?api-version=%s", c.GetResource(), model, c.GetEndpoint())
|
||||
}
|
||||
|
||||
// CreateImageRequest will create a dalle image from prompt, return url of image and error
|
||||
func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, error) {
|
||||
// CreateImageRequest will create a dalle image from prompt, return url of image, base64 data and error
|
||||
func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, string, error) {
|
||||
res, err := utils.Post(
|
||||
c.GetImageEndpoint(props.Model),
|
||||
c.GetHeader(), ImageRequest{
|
||||
Prompt: props.Prompt,
|
||||
Size: utils.Multi[ImageSize](
|
||||
props.Model == globals.Dalle3,
|
||||
props.Model == globals.Dalle3 || props.Model == globals.GPTImage1,
|
||||
ImageSize1024,
|
||||
ImageSize512,
|
||||
),
|
||||
N: 1,
|
||||
}, props.Proxy)
|
||||
if err != nil || res == nil {
|
||||
return "", fmt.Errorf("openai error: %s", err.Error())
|
||||
return "", "", fmt.Errorf("openai error: %s", err.Error())
|
||||
}
|
||||
|
||||
data := utils.MapToStruct[ImageResponse](res)
|
||||
if data == nil {
|
||||
return "", fmt.Errorf("openai error: cannot parse response")
|
||||
return "", "", fmt.Errorf("openai error: cannot parse response")
|
||||
} else if data.Error.Message != "" {
|
||||
return "", fmt.Errorf("openai error: %s", data.Error.Message)
|
||||
return "", "", fmt.Errorf("openai error: %s", data.Error.Message)
|
||||
}
|
||||
|
||||
return data.Data[0].Url, nil
|
||||
// for gpt-image-1, return base64 data if available
|
||||
if props.Model == globals.GPTImage1 && data.Data[0].B64Json != "" {
|
||||
return "", data.Data[0].B64Json, nil
|
||||
}
|
||||
|
||||
return data.Data[0].Url, "", nil
|
||||
}
|
||||
|
||||
// CreateImage will create a dalle image from prompt, return markdown of image
|
||||
func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, error) {
|
||||
url, err := c.CreateImageRequest(ImageProps{
|
||||
url, b64Json, err := c.CreateImageRequest(ImageProps{
|
||||
Model: props.Model,
|
||||
Prompt: c.GetLatestPrompt(props),
|
||||
Proxy: props.Proxy,
|
||||
@ -61,5 +66,9 @@ func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, erro
|
||||
return "", err
|
||||
}
|
||||
|
||||
if b64Json != "" {
|
||||
return utils.GetBase64ImageMarkdown(b64Json), nil
|
||||
}
|
||||
|
||||
return utils.GetImageMarkdown(url), nil
|
||||
}
|
||||
|
@ -26,16 +26,17 @@ type Message struct {
|
||||
|
||||
// ChatRequest is the request body for openai
|
||||
type ChatRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages interface{} `json:"messages"`
|
||||
MaxToken *int `json:"max_tokens,omitempty"`
|
||||
Stream bool `json:"stream"`
|
||||
PresencePenalty *float32 `json:"presence_penalty,omitempty"`
|
||||
FrequencyPenalty *float32 `json:"frequency_penalty,omitempty"`
|
||||
Temperature *float32 `json:"temperature,omitempty"`
|
||||
TopP *float32 `json:"top_p,omitempty"`
|
||||
Tools *globals.FunctionTools `json:"tools,omitempty"`
|
||||
ToolChoice *interface{} `json:"tool_choice,omitempty"` // string or object
|
||||
Model string `json:"model"`
|
||||
Messages interface{} `json:"messages"`
|
||||
MaxToken *int `json:"max_tokens,omitempty"`
|
||||
MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"`
|
||||
Stream bool `json:"stream"`
|
||||
PresencePenalty *float32 `json:"presence_penalty,omitempty"`
|
||||
FrequencyPenalty *float32 `json:"frequency_penalty,omitempty"`
|
||||
Temperature *float32 `json:"temperature,omitempty"`
|
||||
TopP *float32 `json:"top_p,omitempty"`
|
||||
Tools *globals.FunctionTools `json:"tools,omitempty"`
|
||||
ToolChoice *interface{} `json:"tool_choice,omitempty"` // string or object
|
||||
}
|
||||
|
||||
// CompletionRequest is the request body for openai completion
|
||||
@ -106,11 +107,21 @@ type ImageRequest struct {
|
||||
|
||||
type ImageResponse struct {
|
||||
Data []struct {
|
||||
Url string `json:"url"`
|
||||
Url string `json:"url,omitempty"`
|
||||
B64Json string `json:"b64_json,omitempty"`
|
||||
} `json:"data"`
|
||||
Error struct {
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
Usage *struct {
|
||||
InputTokens int `json:"input_tokens"`
|
||||
InputTokensDetails struct {
|
||||
ImageTokens int `json:"image_tokens"`
|
||||
TextTokens int `json:"text_tokens"`
|
||||
} `json:"input_tokens_details"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
} `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -19,37 +19,42 @@ func (c *ChatInstance) GetImageEndpoint() string {
|
||||
return fmt.Sprintf("%s/v1/images/generations", c.GetEndpoint())
|
||||
}
|
||||
|
||||
// CreateImageRequest will create a dalle image from prompt, return url of image and error
|
||||
func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, error) {
|
||||
// CreateImageRequest will create a dalle image from prompt, return url of image, base64 data and error
|
||||
func (c *ChatInstance) CreateImageRequest(props ImageProps) (string, string, error) {
|
||||
res, err := utils.Post(
|
||||
c.GetImageEndpoint(),
|
||||
c.GetHeader(), ImageRequest{
|
||||
Model: props.Model,
|
||||
Prompt: props.Prompt,
|
||||
Size: utils.Multi[ImageSize](
|
||||
props.Model == globals.Dalle3,
|
||||
props.Model == globals.Dalle3 || props.Model == globals.GPTImage1,
|
||||
ImageSize1024,
|
||||
ImageSize512,
|
||||
),
|
||||
N: 1,
|
||||
}, props.Proxy)
|
||||
if err != nil || res == nil {
|
||||
return "", fmt.Errorf(err.Error())
|
||||
return "", "", fmt.Errorf(err.Error())
|
||||
}
|
||||
|
||||
data := utils.MapToStruct[ImageResponse](res)
|
||||
if data == nil {
|
||||
return "", fmt.Errorf("openai error: cannot parse response")
|
||||
return "", "", fmt.Errorf("openai error: cannot parse response")
|
||||
} else if data.Error.Message != "" {
|
||||
return "", fmt.Errorf(data.Error.Message)
|
||||
return "", "", fmt.Errorf(data.Error.Message)
|
||||
}
|
||||
|
||||
return data.Data[0].Url, nil
|
||||
// for gpt-image-1, return base64 data if available
|
||||
if props.Model == globals.GPTImage1 && data.Data[0].B64Json != "" {
|
||||
return "", data.Data[0].B64Json, nil
|
||||
}
|
||||
|
||||
return data.Data[0].Url, "", nil
|
||||
}
|
||||
|
||||
// CreateImage will create a dalle image from prompt, return markdown of image
|
||||
func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, error) {
|
||||
original, err := c.CreateImageRequest(ImageProps{
|
||||
url, b64Json, err := c.CreateImageRequest(ImageProps{
|
||||
Model: props.Model,
|
||||
Prompt: c.GetLatestPrompt(props),
|
||||
Proxy: props.Proxy,
|
||||
@ -61,6 +66,10 @@ func (c *ChatInstance) CreateImage(props *adaptercommon.ChatProps) (string, erro
|
||||
return "", err
|
||||
}
|
||||
|
||||
url := utils.StoreImage(original)
|
||||
return utils.GetImageMarkdown(url), nil
|
||||
if b64Json != "" {
|
||||
return utils.GetBase64ImageMarkdown(b64Json), nil
|
||||
}
|
||||
|
||||
storedUrl := utils.StoreImage(url)
|
||||
return utils.GetImageMarkdown(storedUrl), nil
|
||||
}
|
||||
|
@ -16,12 +16,13 @@ type MessageContent struct {
|
||||
type MessageContents []MessageContent
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content MessageContents `json:"content"`
|
||||
Name *string `json:"name,omitempty"`
|
||||
FunctionCall *globals.FunctionCall `json:"function_call,omitempty"` // only `function` role
|
||||
ToolCallId *string `json:"tool_call_id,omitempty"` // only `tool` role
|
||||
ToolCalls *globals.ToolCalls `json:"tool_calls,omitempty"` // only `assistant` role
|
||||
Role string `json:"role"`
|
||||
Content MessageContents `json:"content"`
|
||||
Name *string `json:"name,omitempty"`
|
||||
FunctionCall *globals.FunctionCall `json:"function_call,omitempty"` // only `function` role
|
||||
ToolCallId *string `json:"tool_call_id,omitempty"` // only `tool` role
|
||||
ToolCalls *globals.ToolCalls `json:"tool_calls,omitempty"` // only `assistant` role
|
||||
ReasoningContent *string `json:"reasoning,omitempty"` // only for claude reasoning models
|
||||
}
|
||||
|
||||
// ChatRequest is the request body for openai
|
||||
@ -107,7 +108,8 @@ type ImageRequest struct {
|
||||
|
||||
type ImageResponse struct {
|
||||
Data []struct {
|
||||
Url string `json:"url"`
|
||||
Url string `json:"url,omitempty"`
|
||||
B64Json string `json:"b64_json,omitempty"`
|
||||
} `json:"data"`
|
||||
Error struct {
|
||||
Message string `json:"message"`
|
||||
|
@ -88,6 +88,7 @@ const (
|
||||
GPT432k0613 = "gpt-4-32k-0613"
|
||||
GPT4O = "gpt-4o"
|
||||
GPT4O20240513 = "gpt-4o-2024-05-13"
|
||||
GPTImage1 = "gpt-image-1"
|
||||
Dalle = "dalle"
|
||||
Dalle2 = "dall-e-2"
|
||||
Dalle3 = "dall-e-3"
|
||||
@ -147,7 +148,7 @@ const (
|
||||
)
|
||||
|
||||
var OpenAIDalleModels = []string{
|
||||
Dalle, Dalle2, Dalle3,
|
||||
Dalle, Dalle2, Dalle3, GPTImage1,
|
||||
}
|
||||
|
||||
var GoogleImagenModels = []string{
|
||||
|
@ -71,15 +71,20 @@ func getImageProps(form RelayImageForm, messages []globals.Message, buffer *util
|
||||
}, buffer)
|
||||
}
|
||||
|
||||
func getUrlFromBuffer(buffer *utils.Buffer) string {
|
||||
func getImageDataFromBuffer(buffer *utils.Buffer) (string, string) {
|
||||
content := buffer.Read()
|
||||
|
||||
urls := utils.ExtractImagesFromMarkdown(content)
|
||||
if len(urls) > 0 {
|
||||
return urls[len(urls)-1]
|
||||
return urls[len(urls)-1], ""
|
||||
}
|
||||
|
||||
return ""
|
||||
base64Data := utils.ExtractBase64FromMarkdown(content)
|
||||
if len(base64Data) > 0 {
|
||||
return "", base64Data[len(base64Data)-1]
|
||||
}
|
||||
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string, created int64, user *auth.User, plan bool) {
|
||||
@ -112,8 +117,8 @@ func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string,
|
||||
CollectQuota(c, user, buffer, plan, err)
|
||||
}
|
||||
|
||||
image := getUrlFromBuffer(buffer)
|
||||
if image == "" {
|
||||
url, b64Json := getImageDataFromBuffer(buffer)
|
||||
if url == "" && b64Json == "" {
|
||||
sendErrorResponse(c, fmt.Errorf("no image generated"), "image_generation_error")
|
||||
return
|
||||
}
|
||||
@ -122,7 +127,8 @@ func createRelayImageObject(c *gin.Context, form RelayImageForm, prompt string,
|
||||
Created: created,
|
||||
Data: []RelayImageData{
|
||||
{
|
||||
Url: image,
|
||||
Url: url,
|
||||
B64Json: b64Json,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
@ -108,7 +108,8 @@ type RelayImageForm struct {
|
||||
}
|
||||
|
||||
type RelayImageData struct {
|
||||
Url string `json:"url"`
|
||||
Url string `json:"url,omitempty"`
|
||||
B64Json string `json:"b64_json,omitempty"`
|
||||
}
|
||||
|
||||
type RelayImageResponse struct {
|
||||
|
@ -147,6 +147,24 @@ func GetImageMarkdown(url string) string {
|
||||
return fmt.Sprintf("", url)
|
||||
}
|
||||
|
||||
func GetBase64ImageMarkdown(b64 string, _desc ...string) string {
|
||||
// Extracts the image type from base64 string (e.g., "data:image/png;base64,...") or defaults to png
|
||||
var imageType = "png"
|
||||
if strings.HasPrefix(b64, "data:image/") {
|
||||
parts := strings.Split(b64[11:], ";")
|
||||
if len(parts) > 0 {
|
||||
imageType = parts[0]
|
||||
}
|
||||
}
|
||||
|
||||
desc := "image"
|
||||
if len(_desc) > 0 && _desc[0] != "" {
|
||||
desc = _desc[0]
|
||||
}
|
||||
|
||||
return fmt.Sprintf("", desc, imageType, b64)
|
||||
}
|
||||
|
||||
// SplitItem is the split function for strings.Split
|
||||
// e.g.
|
||||
// SplitItem("a,b,c", ",") => ["a,", "b,", "c"]
|
||||
@ -233,6 +251,21 @@ func ExtractImagesFromMarkdown(data string) (images []string) {
|
||||
return images
|
||||
}
|
||||
|
||||
func ExtractBase64FromMarkdown(data string) (images []string) {
|
||||
// extract base64 images like ``
|
||||
re := regexp.MustCompile(`!\[.*?\]\((data:image/\w+;base64,[\w+/=]+)\)`)
|
||||
matches := re.FindAllStringSubmatch(data, -1)
|
||||
|
||||
for _, match := range matches {
|
||||
// We only need the base64 data part
|
||||
if len(match) > 1 {
|
||||
images = append(images, match[1])
|
||||
}
|
||||
}
|
||||
|
||||
return images
|
||||
}
|
||||
|
||||
func ExtractBase64Images(data string) []string {
|
||||
// get base64 images from data (data:image/png;base64,xxxxxx) (\n \\n [space] \\t \\r \\v \\f break the base64 string)
|
||||
re := regexp.MustCompile(`(data:image/\w+;base64,[\w+/=]+)`)
|
||||
|
Loading…
Reference in New Issue
Block a user