This commit is contained in:
Hk-Gosuto 2024-03-24 11:42:06 +08:00
parent 428bf81801
commit a18cb2c525
12 changed files with 112 additions and 18 deletions

View File

@ -67,9 +67,16 @@ export async function requestOpenai(req: NextRequest) {
let jsonBody;
let clonedBody;
if (req.method !== "GET" && req.method !== "HEAD") {
const contentType = req.headers.get("Content-Type");
if (
req.method !== "GET" &&
req.method !== "HEAD" &&
contentType?.includes("json")
) {
clonedBody = await req.text();
jsonBody = JSON.parse(clonedBody) as { model?: string };
} else {
clonedBody = req.body;
}
if (serverConfig.isAzure) {
baseUrl = `${baseUrl}/${jsonBody?.model}`;
@ -77,7 +84,7 @@ export async function requestOpenai(req: NextRequest) {
const fetchUrl = `${baseUrl}/${path}`;
const fetchOptions: RequestInit = {
headers: {
"Content-Type": "application/json",
"Content-Type": contentType ?? "application/json",
"Cache-Control": "no-store",
[authHeaderName]: authValue,
...(serverConfig.openaiOrgId && {

View File

@ -143,11 +143,12 @@ export class ChatGPTApi implements LLMApi {
try {
const path = this.path(OpenaiPath.TranscriptionPath, options.model);
const headers = getHeaders(true);
const payload = {
method: "POST",
body: formData,
signal: controller.signal,
headers: getHeaders(true),
headers: headers,
};
// make a fetch request
@ -155,7 +156,6 @@ export class ChatGPTApi implements LLMApi {
() => controller.abort(),
REQUEST_TIMEOUT_MS,
);
const res = await fetch(path, payload);
clearTimeout(requestTimeoutId);
const json = await res.json();

View File

@ -65,6 +65,16 @@
align-items: center;
}
.icon-button-loading-icon {
width: 40px;
height: 16px;
display: flex;
align-items: center;
justify-content: center;
fill: white;
stroke: white;
}
@media only screen and (max-width: 600px) {
.icon-button {
padding: 16px;

View File

@ -4,6 +4,8 @@ import styles from "./button.module.scss";
export type ButtonType = "primary" | "danger" | null;
import LoadingIcon from "../icons/three-dots-white.svg";
export function IconButton(props: {
onClick?: () => void;
icon?: JSX.Element;
@ -16,6 +18,7 @@ export function IconButton(props: {
disabled?: boolean;
tabIndex?: number;
autoFocus?: boolean;
loding?: boolean;
}) {
return (
<button
@ -32,7 +35,7 @@ export function IconButton(props: {
tabIndex={props.tabIndex}
autoFocus={props.autoFocus}
>
{props.icon && (
{props.icon && !props.loding && (
<div
className={
styles["icon-button-icon"] +
@ -43,9 +46,19 @@ export function IconButton(props: {
</div>
)}
{props.text && (
{props.text && !props.loding && (
<div className={styles["icon-button-text"]}>{props.text}</div>
)}
{props.loding ? (
<div
className={
styles["icon-button-loading-icon"] +
` ${props.type === "primary" && "no-dark"}`
}
>
<LoadingIcon />
</div>
) : null}
</button>
);
}

View File

@ -91,6 +91,7 @@ import {
import { useNavigate } from "react-router-dom";
import {
CHAT_PAGE_SIZE,
DEFAULT_STT_ENGINE,
LAST_INPUT_KEY,
ModelProvider,
Path,
@ -806,10 +807,10 @@ function _Chat() {
};
const [isListening, setIsListening] = useState(false);
const [isTranscription, setIsTranscription] = useState(false);
const [speechApi, setSpeechApi] = useState<any>(null);
const startListening = async () => {
console.log(speechApi);
if (speechApi) {
await speechApi.start();
setIsListening(true);
@ -818,6 +819,8 @@ function _Chat() {
const stopListening = async () => {
if (speechApi) {
if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
setIsTranscription(true);
await speechApi.stop();
setIsListening(false);
}
@ -826,6 +829,8 @@ function _Chat() {
const onRecognitionEnd = (finalTranscript: string) => {
console.log(finalTranscript);
if (finalTranscript) setUserInput(finalTranscript);
if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
setIsTranscription(false);
};
const doSubmit = (userInput: string) => {
@ -899,9 +904,13 @@ function _Chat() {
});
// eslint-disable-next-line react-hooks/exhaustive-deps
setSpeechApi(
new OpenAITranscriptionApi((transcription) =>
onRecognitionEnd(transcription),
),
config.sttConfig.engine === DEFAULT_STT_ENGINE
? new WebTranscriptionApi((transcription) =>
onRecognitionEnd(transcription),
)
: new OpenAITranscriptionApi((transcription) =>
onRecognitionEnd(transcription),
),
);
}, []);
@ -1695,6 +1704,7 @@ function _Chat() {
onClick={async () =>
isListening ? await stopListening() : await startListening()
}
loding={isTranscription}
/>
) : (
<IconButton

View File

@ -1,7 +1,8 @@
import { STTConfig } from "../store";
import { STTConfig, STTConfigValidator } from "../store";
import Locale from "../locales";
import { ListItem } from "./ui-lib";
import { ListItem, Select } from "./ui-lib";
import { DEFAULT_STT_ENGINES } from "../constant";
export function STTConfigList(props: {
sttConfig: STTConfig;
@ -23,6 +24,25 @@ export function STTConfigList(props: {
}
></input>
</ListItem>
<ListItem title={Locale.Settings.STT.Engine.Title}>
<Select
value={props.sttConfig.engine}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.engine = STTConfigValidator.engine(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_STT_ENGINES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
</>
);
}

View File

@ -134,6 +134,9 @@ export const DEFAULT_TTS_VOICES = [
"shimmer",
];
export const DEFAULT_STT_ENGINE = "WebAPI";
export const DEFAULT_STT_ENGINES = ["WebAPI", "OpenAI Whisper"];
export const DEFAULT_MODELS = [
{
name: "gpt-4",

View File

@ -0,0 +1,14 @@
<svg xmlns="http://www.w3.org/2000/svg" width="30" height="14" fill="#fff" viewBox="0 0 120 30">
<circle cx="15" cy="15" r="15" fill="#fff">
<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="15" repeatCount="indefinite" to="15" values="15;9;15" />
<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from="1" repeatCount="indefinite" to="1" values="1;.5;1" />
</circle>
<circle cx="60" cy="15" r="9" fill="#fff" fill-opacity=".3">
<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="9" repeatCount="indefinite" to="9" values="9;15;9" />
<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from=".5" repeatCount="indefinite" to=".5" values=".5;1;.5" />
</circle>
<circle cx="105" cy="15" r="15" fill="#fff">
<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="15" repeatCount="indefinite" to="15" values="15;9;15" />
<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from="1" repeatCount="indefinite" to="1" values="1;.5;1" />
</circle>
</svg>

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -402,6 +402,10 @@ const cn = {
Title: "启用语音转文本",
SubTitle: "启用语音转文本",
},
Engine: {
Title: "转换引擎",
SubTitle: "音频转换引擎",
},
},
},
Store: {

View File

@ -408,6 +408,10 @@ const en: LocaleType = {
Title: "Enable STT",
SubTitle: "Enable Speech-to-Text",
},
Engine: {
Title: "STT Engine",
SubTitle: "Text-to-Speech Engine",
},
},
},
Store: {

View File

@ -5,6 +5,8 @@ import {
DEFAULT_INPUT_TEMPLATE,
DEFAULT_MODELS,
DEFAULT_SIDEBAR_WIDTH,
DEFAULT_STT_ENGINE,
DEFAULT_STT_ENGINES,
DEFAULT_TTS_MODEL,
DEFAULT_TTS_MODELS,
DEFAULT_TTS_VOICE,
@ -17,6 +19,8 @@ export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
export type STTEngineType = (typeof DEFAULT_STT_ENGINES)[number];
export enum SubmitKey {
Enter = "Enter",
CtrlEnter = "Ctrl + Enter",
@ -81,6 +85,7 @@ export const DEFAULT_CONFIG = {
sttConfig: {
enable: false,
engine: DEFAULT_STT_ENGINE,
},
};
@ -116,6 +121,12 @@ export const TTSConfigValidator = {
},
};
export const STTConfigValidator = {
engine(x: string) {
return x as STTEngineType;
},
};
export const ModalConfigValidator = {
model(x: string) {
return x as ModelType;

View File

@ -31,7 +31,7 @@ export class OpenAITranscriptionApi extends SpeechApi {
}
async start(): Promise<void> {
// @ts-ignore
// @ts-ignore prettier-ignore
navigator.getUserMedia =
navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
@ -103,20 +103,18 @@ export class WebTranscriptionApi extends SpeechApi {
this.recognitionInstance.onresult = (event: any) => {
const result = event.results[event.results.length - 1];
if (result.isFinal) {
if (!this.isListening) {
this.onTranscriptionReceived(result[0].transcript);
}
this.onTranscription(result[0].transcript);
}
};
}
async start(): Promise<void> {
await this.recognitionInstance.start();
this.listeningStatus = true;
await this.recognitionInstance.start();
}
async stop(): Promise<void> {
await this.recognitionInstance.stop();
this.listeningStatus = false;
await this.recognitionInstance.stop();
}
}