From 16a74423beca1c952cb14a97347642631988d6e7 Mon Sep 17 00:00:00 2001 From: Zhang Minghan Date: Sat, 11 Nov 2023 09:33:23 +0800 Subject: [PATCH] add stable diffusion, llama models and add dashboard feature --- README.md | 10 ++++- adapter/oneapi/globals.go | 5 +++ app/src/conf.ts | 21 ++++++++-- auth/rule.go | 5 +++ globals/variables.go | 83 +++++++++++++++++---------------------- manager/router.go | 2 + manager/usage.go | 60 ++++++++++++++++++++++++++++ middleware/throttle.go | 8 ++-- utils/tokenizer.go | 14 +++++++ 9 files changed, 151 insertions(+), 57 deletions(-) create mode 100644 manager/usage.go diff --git a/README.md b/README.md index dd69dcc..460e43f 100644 --- a/README.md +++ b/README.md @@ -58,16 +58,19 @@ ## ๐Ÿ”จ ๆจกๅž‹ | Models - ChatGPT - - GPT-3.5-Turbo (_0613_, _0301_) + - GPT-3.5-Turbo (_0613_, _0301_, _1106_) - GPT-3.5-Turbo-16k (_0613_, _0301_) - GPT-3.5-Reverse (_text-davincci-002-render-sha_, _text-davincci-002-render-paid_) - GPT-3.5-Turbo-Instruct - GPT-4 (_0314_, _0613_) + - GPT-4 1106 Preview - GPT-4-32k (_0314_, _0613_) - GPT-4-Reverse (_gpt-4_, _**gpt-4v**_, _**dalle3**_) - DALL-E 2 + - DALL-E 3 - Claude - Slack-Claude (unstable) + - Claude-instant - Claude-2 - Claude-2-100k - SparkDesk ่ฎฏ้ฃžๆ˜Ÿ็ซ @@ -84,12 +87,17 @@ - Balanced - Precise - ChatGLM + - Turbo - Pro - Std - Lite - DashScope Tongyi - Qwen Plus (net) - Qwen Turbo (net) +- Midjourney +- Stable Diffusion +- LLaMa 2 (70b, 13b, 7b) +- Code LLaMa (34b, 13b, 7b) - More models are under development... diff --git a/adapter/oneapi/globals.go b/adapter/oneapi/globals.go index 04f49e9..d1e0804 100644 --- a/adapter/oneapi/globals.go +++ b/adapter/oneapi/globals.go @@ -7,6 +7,9 @@ import ( var HitModels = []string{ globals.Claude1, globals.Claude1100k, globals.Claude2, globals.Claude2100k, + globals.StableDiffusion, + globals.LLaMa270B, globals.LLaMa213B, globals.LLaMa27B, + globals.CodeLLaMa34B, globals.CodeLLaMa13B, globals.CodeLLaMa7B, } func (c *ChatInstance) Process(data string) string { @@ -27,6 +30,8 @@ func (c *ChatInstance) GetToken(model string) int { return 5000 case globals.Claude2100k, globals.Claude1100k: return 50000 + case globals.LLaMa270B, globals.LLaMa213B, globals.LLaMa27B, globals.CodeLLaMa34B, globals.CodeLLaMa13B, globals.CodeLLaMa7B: + return 3000 default: return 2500 } diff --git a/app/src/conf.ts b/app/src/conf.ts index 83d60bd..3a8f37d 100644 --- a/app/src/conf.ts +++ b/app/src/conf.ts @@ -8,7 +8,7 @@ import { } from "@/utils/env.ts"; import { getMemory } from "@/utils/memory.ts"; -export const version = "3.6.17"; +export const version = "3.6.18"; export const dev: boolean = getDev(); export const deploy: boolean = true; export let rest_api: string = getRestApi(deploy); @@ -39,15 +39,28 @@ export const supportModels: Model[] = [ { id: "qwen-turbo-net", name: "้€šไน‰ๅƒ้—ฎ Turbo X", free: false, auth: true }, { id: "qwen-turbo", name: "้€šไน‰ๅƒ้—ฎ Turbo", free: false, auth: true }, + // zhipu models + { id: "zhipu-chatglm-turbo", name: "ChatGLM Turbo 32k", free: false, auth: true }, + + // llama models + { id: "llama-2-70b", name: "LLaMa-2 70B", free: false, auth: true }, + { id: "llama-2-13b", name: "LLaMa-2 13B", free: false, auth: true }, + { id: "llama-2-7b", name: "LLaMa-2 7B", free: false, auth: true }, + + { id: "code-llama-34b", name: "Code LLaMa 34B", free: false, auth: true }, + { id: "code-llama-13b", name: "Code LLaMa 13B", free: false, auth: true }, + { id: "code-llama-7b", name: "Code LLaMa 7B", free: false, auth: true }, + + // drawing models + { id: "stable-diffusion", name: "Stable Diffusion XL", free: false, auth: true }, + // new bing { id: "bing-creative", name: "New Bing", free: true, auth: true }, // google palm2 { id: "chat-bison-001", name: "Palm2", free: true, auth: true }, - // zhipu models - { id: "zhipu-chatglm-turbo", name: "ChatGLM Turbo 32k", free: false, auth: true }, - + // dalle models { id: "dalle", name: "DALLE2", free: true, auth: true }, // reverse models diff --git a/auth/rule.go b/auth/rule.go index 40dee8a..9292a3d 100644 --- a/auth/rule.go +++ b/auth/rule.go @@ -23,6 +23,11 @@ func CanEnableModel(db *sql.DB, user *User, model string) bool { return user != nil && user.GetQuota(db) >= 1 case globals.QwenTurbo, globals.QwenPlus, globals.QwenPlusNet, globals.QwenTurboNet: return user != nil && user.GetQuota(db) >= 1 + case globals.Midjourney, globals.StableDiffusion: + return user != nil && user.GetQuota(db) >= 1 + case globals.LLaMa27B, globals.LLaMa213B, globals.LLaMa270B, + globals.CodeLLaMa34B, globals.CodeLLaMa13B, globals.CodeLLaMa7B: + return user != nil && user.GetQuota(db) >= 1 default: return user != nil } diff --git a/globals/variables.go b/globals/variables.go index 602a57a..fe66d68 100644 --- a/globals/variables.go +++ b/globals/variables.go @@ -34,7 +34,7 @@ func OriginIsAllowed(uri string) bool { } func OriginIsOpen(c *gin.Context) bool { - return strings.HasPrefix(c.Request.URL.Path, "/v1") + return strings.HasPrefix(c.Request.URL.Path, "/v1") || strings.HasPrefix(c.Request.URL.Path, "/dashboard") } const ( @@ -77,6 +77,14 @@ const ( QwenPlus = "qwen-plus" QwenTurboNet = "qwen-turbo-net" QwenPlusNet = "qwen-plus-net" + Midjourney = "midjourney" + StableDiffusion = "stable-diffusion" + LLaMa270B = "llama-2-70b" + LLaMa213B = "llama-2-13b" + LLaMa27B = "llama-2-7b" + CodeLLaMa34B = "code-llama-34b" + CodeLLaMa13B = "code-llama-13b" + CodeLLaMa7B = "code-llama-7b" ) var GPT3TurboArray = []string{ @@ -109,6 +117,11 @@ var ClaudeModelArray = []string{ Claude2, Claude2100k, } +var LLaMaModelArray = []string{ + LLaMa270B, LLaMa213B, LLaMa27B, + CodeLLaMa34B, CodeLLaMa13B, CodeLLaMa7B, +} + var BingModelArray = []string{ BingCreative, BingBalanced, @@ -136,16 +149,11 @@ var QwenModelArray = []string{ } var LongContextModelArray = []string{ - GPT3Turbo16k, - GPT3Turbo16k0613, - GPT3Turbo16k0301, - GPT432k, - GPT432k0314, - GPT432k0613, - Claude1, - Claude1100k, - Claude2, - Claude2100k, + GPT3Turbo16k, GPT3Turbo16k0613, GPT3Turbo16k0301, + GPT41106Preview, GPT432k, GPT432k0314, GPT432k0613, + Claude1, Claude1100k, + CodeLLaMa34B, LLaMa270B, + Claude2, Claude2100k, } var FreeModelArray = []string{ @@ -167,45 +175,20 @@ var FreeModelArray = []string{ } var AllModels = []string{ - GPT3Turbo, - GPT3TurboInstruct, - GPT3Turbo0613, - GPT3Turbo0301, - GPT3Turbo1106, - GPT3Turbo16k, - GPT3Turbo16k0613, - GPT3Turbo16k0301, - GPT4, - GPT40314, - GPT40613, - GPT4Vision, - GPT4All, - GPT41106Preview, - GPT4Dalle, - GPT432k, - GPT432k0314, - GPT432k0613, + GPT3Turbo, GPT3TurboInstruct, GPT3Turbo0613, GPT3Turbo0301, GPT3Turbo1106, + GPT3Turbo16k, GPT3Turbo16k0613, GPT3Turbo16k0301, + GPT4, GPT40314, GPT40613, GPT4Vision, GPT4All, GPT41106Preview, GPT4Dalle, + GPT432k, GPT432k0314, GPT432k0613, Dalle2, - Claude1, - Claude1100k, - Claude2, - Claude2100k, - ClaudeSlack, - SparkDesk, - SparkDeskV2, - SparkDeskV3, + Claude1, Claude1100k, Claude2, Claude2100k, ClaudeSlack, + SparkDesk, SparkDeskV2, SparkDeskV3, ChatBison001, - BingCreative, - BingBalanced, - BingPrecise, - ZhiPuChatGLMTurbo, - ZhiPuChatGLMPro, - ZhiPuChatGLMStd, - ZhiPuChatGLMLite, - QwenTurbo, - QwenPlus, - QwenTurboNet, - QwenPlusNet, + BingCreative, BingBalanced, BingPrecise, + ZhiPuChatGLMTurbo, ZhiPuChatGLMPro, ZhiPuChatGLMStd, ZhiPuChatGLMLite, + QwenTurbo, QwenPlus, QwenTurboNet, QwenPlusNet, + Midjourney, StableDiffusion, + LLaMa270B, LLaMa213B, LLaMa27B, + CodeLLaMa34B, CodeLLaMa13B, CodeLLaMa7B, } func in(value string, slice []string) bool { @@ -237,6 +220,10 @@ func IsClaudeModel(model string) bool { return in(model, ClaudeModelArray) } +func IsLLaMaModel(model string) bool { + return in(model, LLaMaModelArray) +} + func IsClaude100KModel(model string) bool { return model == Claude1100k || model == Claude2100k } diff --git a/manager/router.go b/manager/router.go index 9436ad6..66cad3a 100644 --- a/manager/router.go +++ b/manager/router.go @@ -5,5 +5,7 @@ import "github.com/gin-gonic/gin" func Register(app *gin.Engine) { app.GET("/chat", ChatAPI) app.GET("/v1/models", ModelAPI) + app.GET("/dashboard/billing/usage", GetBillingUsage) + app.GET("/dashboard/billing/subscription", GetSubscription) app.POST("/v1/chat/completions", TranshipmentAPI) } diff --git a/manager/usage.go b/manager/usage.go new file mode 100644 index 0000000..fc7c7f3 --- /dev/null +++ b/manager/usage.go @@ -0,0 +1,60 @@ +package manager + +import ( + "chat/auth" + "chat/utils" + "github.com/gin-gonic/gin" + "net/http" +) + +type BillingResponse struct { + Object string `json:"object"` + TotalUsage float32 `json:"total_usage"` +} + +type SubscriptionResponse struct { + Object string `json:"object"` + SoftLimit int64 `json:"soft_limit"` + HardLimit int64 `json:"hard_limit"` + SystemHardLimit int64 `json:"system_hard_limit"` + SoftLimitUSD float32 `json:"soft_limit_usd"` + HardLimitUSD float32 `json:"hard_limit_usd"` + SystemHardLimitUSD float32 `json:"system_hard_limit_usd"` +} + +func GetBillingUsage(c *gin.Context) { + user := auth.RequireAuth(c) + if user == nil { + return + } + + db := utils.GetDBFromContext(c) + usage := user.GetUsedQuota(db) + + c.JSON(http.StatusOK, BillingResponse{ + Object: "list", + TotalUsage: usage, + }) +} + +func GetSubscription(c *gin.Context) { + user := auth.RequireAuth(c) + if user == nil { + return + } + + db := utils.GetDBFromContext(c) + quota := user.GetQuota(db) + used := user.GetUsedQuota(db) + total := quota + used + + c.JSON(http.StatusOK, SubscriptionResponse{ + Object: "billing_subscription", + SoftLimit: int64(quota * 100), + HardLimit: int64(total * 100), + SystemHardLimit: 100000000, + SoftLimitUSD: quota / 7.3, + HardLimitUSD: total / 7.3, + SystemHardLimitUSD: 1000000, + }) +} diff --git a/middleware/throttle.go b/middleware/throttle.go index ba98bbf..3f6b1fb 100644 --- a/middleware/throttle.go +++ b/middleware/throttle.go @@ -39,10 +39,10 @@ var limits = map[string]Limiter{ "/conversation": {Duration: 1, Count: 5}, "/invite": {Duration: 7200, Count: 20}, "/v1": {Duration: 1, Count: 600}, - - "/card": {Duration: 1, Count: 5}, - "/generation": {Duration: 1, Count: 5}, - "/article": {Duration: 1, Count: 5}, + "/dashboard": {Duration: 1, Count: 5}, + "/card": {Duration: 1, Count: 5}, + "/generation": {Duration: 1, Count: 5}, + "/article": {Duration: 1, Count: 5}, } func GetPrefixMap[T comparable](s string, p map[string]T) *T { diff --git a/utils/tokenizer.go b/utils/tokenizer.go index 50abe49..bb5c7ed 100644 --- a/utils/tokenizer.go +++ b/utils/tokenizer.go @@ -20,6 +20,8 @@ func GetWeightByModel(model string) int { globals.GPT3Turbo16k, globals.GPT3Turbo16k0613, globals.GPT4, globals.GPT4Vision, globals.GPT4Dalle, globals.GPT4All, globals.GPT40314, globals.GPT40613, globals.GPT41106Preview, globals.GPT432k, globals.GPT432k0613, globals.GPT432k0314, + globals.LLaMa27B, globals.LLaMa213B, globals.LLaMa270B, + globals.CodeLLaMa34B, globals.CodeLLaMa13B, globals.CodeLLaMa7B, globals.SparkDesk, globals.SparkDeskV2, globals.SparkDeskV3, globals.QwenTurbo, globals.QwenPlus, globals.QwenTurboNet, globals.QwenPlusNet, @@ -91,6 +93,10 @@ func CountInputToken(model string, v []globals.Message) float32 { return 0 case globals.Claude1100k, globals.Claude2100k: return float32(CountTokenPrice(v, model)) / 1000 * 0.8 * 0.6 + case globals.LLaMa270B, globals.CodeLLaMa34B: + return float32(CountTokenPrice(v, model)) / 1000 * 0.25 + case globals.LLaMa213B, globals.CodeLLaMa13B, globals.LLaMa27B, globals.CodeLLaMa7B: + return float32(CountTokenPrice(v, model)) / 1000 * 0.1 case globals.ZhiPuChatGLMPro: return float32(CountTokenPrice(v, model)) / 1000 * 0.1 case globals.ZhiPuChatGLMTurbo, globals.ZhiPuChatGLMStd: @@ -123,6 +129,10 @@ func CountOutputToken(model string, t int) float32 { return 0 case globals.Claude1100k, globals.Claude2100k: return float32(t*GetWeightByModel(model)) / 1000 * 2.4 * 0.6 + case globals.LLaMa270B, globals.CodeLLaMa34B: + return float32(t*GetWeightByModel(model)) / 1000 * 0.25 + case globals.LLaMa213B, globals.CodeLLaMa13B, globals.LLaMa27B, globals.CodeLLaMa7B: + return float32(t*GetWeightByModel(model)) / 1000 * 0.1 case globals.ZhiPuChatGLMPro: return float32(t*GetWeightByModel(model)) / 1000 * 0.1 case globals.ZhiPuChatGLMTurbo, globals.ZhiPuChatGLMStd: @@ -131,6 +141,10 @@ func CountOutputToken(model string, t int) float32 { return float32(t*GetWeightByModel(model)) / 1000 * 0.08 case globals.QwenPlus, globals.QwenPlusNet: return float32(t*GetWeightByModel(model)) / 1000 * 0.2 + case globals.StableDiffusion: + return 0.25 + case globals.Midjourney: + return 0.5 default: return 0 }