This commit is contained in:
Hk-Gosuto 2024-03-24 11:42:06 +08:00
parent 428bf81801
commit a18cb2c525
12 changed files with 112 additions and 18 deletions

View File

@ -67,9 +67,16 @@ export async function requestOpenai(req: NextRequest) {
let jsonBody; let jsonBody;
let clonedBody; let clonedBody;
if (req.method !== "GET" && req.method !== "HEAD") { const contentType = req.headers.get("Content-Type");
if (
req.method !== "GET" &&
req.method !== "HEAD" &&
contentType?.includes("json")
) {
clonedBody = await req.text(); clonedBody = await req.text();
jsonBody = JSON.parse(clonedBody) as { model?: string }; jsonBody = JSON.parse(clonedBody) as { model?: string };
} else {
clonedBody = req.body;
} }
if (serverConfig.isAzure) { if (serverConfig.isAzure) {
baseUrl = `${baseUrl}/${jsonBody?.model}`; baseUrl = `${baseUrl}/${jsonBody?.model}`;
@ -77,7 +84,7 @@ export async function requestOpenai(req: NextRequest) {
const fetchUrl = `${baseUrl}/${path}`; const fetchUrl = `${baseUrl}/${path}`;
const fetchOptions: RequestInit = { const fetchOptions: RequestInit = {
headers: { headers: {
"Content-Type": "application/json", "Content-Type": contentType ?? "application/json",
"Cache-Control": "no-store", "Cache-Control": "no-store",
[authHeaderName]: authValue, [authHeaderName]: authValue,
...(serverConfig.openaiOrgId && { ...(serverConfig.openaiOrgId && {

View File

@ -143,11 +143,12 @@ export class ChatGPTApi implements LLMApi {
try { try {
const path = this.path(OpenaiPath.TranscriptionPath, options.model); const path = this.path(OpenaiPath.TranscriptionPath, options.model);
const headers = getHeaders(true);
const payload = { const payload = {
method: "POST", method: "POST",
body: formData, body: formData,
signal: controller.signal, signal: controller.signal,
headers: getHeaders(true), headers: headers,
}; };
// make a fetch request // make a fetch request
@ -155,7 +156,6 @@ export class ChatGPTApi implements LLMApi {
() => controller.abort(), () => controller.abort(),
REQUEST_TIMEOUT_MS, REQUEST_TIMEOUT_MS,
); );
const res = await fetch(path, payload); const res = await fetch(path, payload);
clearTimeout(requestTimeoutId); clearTimeout(requestTimeoutId);
const json = await res.json(); const json = await res.json();

View File

@ -65,6 +65,16 @@
align-items: center; align-items: center;
} }
.icon-button-loading-icon {
width: 40px;
height: 16px;
display: flex;
align-items: center;
justify-content: center;
fill: white;
stroke: white;
}
@media only screen and (max-width: 600px) { @media only screen and (max-width: 600px) {
.icon-button { .icon-button {
padding: 16px; padding: 16px;

View File

@ -4,6 +4,8 @@ import styles from "./button.module.scss";
export type ButtonType = "primary" | "danger" | null; export type ButtonType = "primary" | "danger" | null;
import LoadingIcon from "../icons/three-dots-white.svg";
export function IconButton(props: { export function IconButton(props: {
onClick?: () => void; onClick?: () => void;
icon?: JSX.Element; icon?: JSX.Element;
@ -16,6 +18,7 @@ export function IconButton(props: {
disabled?: boolean; disabled?: boolean;
tabIndex?: number; tabIndex?: number;
autoFocus?: boolean; autoFocus?: boolean;
loding?: boolean;
}) { }) {
return ( return (
<button <button
@ -32,7 +35,7 @@ export function IconButton(props: {
tabIndex={props.tabIndex} tabIndex={props.tabIndex}
autoFocus={props.autoFocus} autoFocus={props.autoFocus}
> >
{props.icon && ( {props.icon && !props.loding && (
<div <div
className={ className={
styles["icon-button-icon"] + styles["icon-button-icon"] +
@ -43,9 +46,19 @@ export function IconButton(props: {
</div> </div>
)} )}
{props.text && ( {props.text && !props.loding && (
<div className={styles["icon-button-text"]}>{props.text}</div> <div className={styles["icon-button-text"]}>{props.text}</div>
)} )}
{props.loding ? (
<div
className={
styles["icon-button-loading-icon"] +
` ${props.type === "primary" && "no-dark"}`
}
>
<LoadingIcon />
</div>
) : null}
</button> </button>
); );
} }

View File

@ -91,6 +91,7 @@ import {
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import { import {
CHAT_PAGE_SIZE, CHAT_PAGE_SIZE,
DEFAULT_STT_ENGINE,
LAST_INPUT_KEY, LAST_INPUT_KEY,
ModelProvider, ModelProvider,
Path, Path,
@ -806,10 +807,10 @@ function _Chat() {
}; };
const [isListening, setIsListening] = useState(false); const [isListening, setIsListening] = useState(false);
const [isTranscription, setIsTranscription] = useState(false);
const [speechApi, setSpeechApi] = useState<any>(null); const [speechApi, setSpeechApi] = useState<any>(null);
const startListening = async () => { const startListening = async () => {
console.log(speechApi);
if (speechApi) { if (speechApi) {
await speechApi.start(); await speechApi.start();
setIsListening(true); setIsListening(true);
@ -818,6 +819,8 @@ function _Chat() {
const stopListening = async () => { const stopListening = async () => {
if (speechApi) { if (speechApi) {
if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
setIsTranscription(true);
await speechApi.stop(); await speechApi.stop();
setIsListening(false); setIsListening(false);
} }
@ -826,6 +829,8 @@ function _Chat() {
const onRecognitionEnd = (finalTranscript: string) => { const onRecognitionEnd = (finalTranscript: string) => {
console.log(finalTranscript); console.log(finalTranscript);
if (finalTranscript) setUserInput(finalTranscript); if (finalTranscript) setUserInput(finalTranscript);
if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
setIsTranscription(false);
}; };
const doSubmit = (userInput: string) => { const doSubmit = (userInput: string) => {
@ -899,9 +904,13 @@ function _Chat() {
}); });
// eslint-disable-next-line react-hooks/exhaustive-deps // eslint-disable-next-line react-hooks/exhaustive-deps
setSpeechApi( setSpeechApi(
new OpenAITranscriptionApi((transcription) => config.sttConfig.engine === DEFAULT_STT_ENGINE
onRecognitionEnd(transcription), ? new WebTranscriptionApi((transcription) =>
), onRecognitionEnd(transcription),
)
: new OpenAITranscriptionApi((transcription) =>
onRecognitionEnd(transcription),
),
); );
}, []); }, []);
@ -1695,6 +1704,7 @@ function _Chat() {
onClick={async () => onClick={async () =>
isListening ? await stopListening() : await startListening() isListening ? await stopListening() : await startListening()
} }
loding={isTranscription}
/> />
) : ( ) : (
<IconButton <IconButton

View File

@ -1,7 +1,8 @@
import { STTConfig } from "../store"; import { STTConfig, STTConfigValidator } from "../store";
import Locale from "../locales"; import Locale from "../locales";
import { ListItem } from "./ui-lib"; import { ListItem, Select } from "./ui-lib";
import { DEFAULT_STT_ENGINES } from "../constant";
export function STTConfigList(props: { export function STTConfigList(props: {
sttConfig: STTConfig; sttConfig: STTConfig;
@ -23,6 +24,25 @@ export function STTConfigList(props: {
} }
></input> ></input>
</ListItem> </ListItem>
<ListItem title={Locale.Settings.STT.Engine.Title}>
<Select
value={props.sttConfig.engine}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.engine = STTConfigValidator.engine(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_STT_ENGINES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
</> </>
); );
} }

View File

@ -134,6 +134,9 @@ export const DEFAULT_TTS_VOICES = [
"shimmer", "shimmer",
]; ];
export const DEFAULT_STT_ENGINE = "WebAPI";
export const DEFAULT_STT_ENGINES = ["WebAPI", "OpenAI Whisper"];
export const DEFAULT_MODELS = [ export const DEFAULT_MODELS = [
{ {
name: "gpt-4", name: "gpt-4",

View File

@ -0,0 +1,14 @@
<svg xmlns="http://www.w3.org/2000/svg" width="30" height="14" fill="#fff" viewBox="0 0 120 30">
<circle cx="15" cy="15" r="15" fill="#fff">
<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="15" repeatCount="indefinite" to="15" values="15;9;15" />
<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from="1" repeatCount="indefinite" to="1" values="1;.5;1" />
</circle>
<circle cx="60" cy="15" r="9" fill="#fff" fill-opacity=".3">
<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="9" repeatCount="indefinite" to="9" values="9;15;9" />
<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from=".5" repeatCount="indefinite" to=".5" values=".5;1;.5" />
</circle>
<circle cx="105" cy="15" r="15" fill="#fff">
<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="15" repeatCount="indefinite" to="15" values="15;9;15" />
<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from="1" repeatCount="indefinite" to="1" values="1;.5;1" />
</circle>
</svg>

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -402,6 +402,10 @@ const cn = {
Title: "启用语音转文本", Title: "启用语音转文本",
SubTitle: "启用语音转文本", SubTitle: "启用语音转文本",
}, },
Engine: {
Title: "转换引擎",
SubTitle: "音频转换引擎",
},
}, },
}, },
Store: { Store: {

View File

@ -408,6 +408,10 @@ const en: LocaleType = {
Title: "Enable STT", Title: "Enable STT",
SubTitle: "Enable Speech-to-Text", SubTitle: "Enable Speech-to-Text",
}, },
Engine: {
Title: "STT Engine",
SubTitle: "Text-to-Speech Engine",
},
}, },
}, },
Store: { Store: {

View File

@ -5,6 +5,8 @@ import {
DEFAULT_INPUT_TEMPLATE, DEFAULT_INPUT_TEMPLATE,
DEFAULT_MODELS, DEFAULT_MODELS,
DEFAULT_SIDEBAR_WIDTH, DEFAULT_SIDEBAR_WIDTH,
DEFAULT_STT_ENGINE,
DEFAULT_STT_ENGINES,
DEFAULT_TTS_MODEL, DEFAULT_TTS_MODEL,
DEFAULT_TTS_MODELS, DEFAULT_TTS_MODELS,
DEFAULT_TTS_VOICE, DEFAULT_TTS_VOICE,
@ -17,6 +19,8 @@ export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number]; export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number]; export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
export type STTEngineType = (typeof DEFAULT_STT_ENGINES)[number];
export enum SubmitKey { export enum SubmitKey {
Enter = "Enter", Enter = "Enter",
CtrlEnter = "Ctrl + Enter", CtrlEnter = "Ctrl + Enter",
@ -81,6 +85,7 @@ export const DEFAULT_CONFIG = {
sttConfig: { sttConfig: {
enable: false, enable: false,
engine: DEFAULT_STT_ENGINE,
}, },
}; };
@ -116,6 +121,12 @@ export const TTSConfigValidator = {
}, },
}; };
export const STTConfigValidator = {
engine(x: string) {
return x as STTEngineType;
},
};
export const ModalConfigValidator = { export const ModalConfigValidator = {
model(x: string) { model(x: string) {
return x as ModelType; return x as ModelType;

View File

@ -31,7 +31,7 @@ export class OpenAITranscriptionApi extends SpeechApi {
} }
async start(): Promise<void> { async start(): Promise<void> {
// @ts-ignore // @ts-ignore prettier-ignore
navigator.getUserMedia = navigator.getUserMedia =
navigator.getUserMedia || navigator.getUserMedia ||
navigator.webkitGetUserMedia || navigator.webkitGetUserMedia ||
@ -103,20 +103,18 @@ export class WebTranscriptionApi extends SpeechApi {
this.recognitionInstance.onresult = (event: any) => { this.recognitionInstance.onresult = (event: any) => {
const result = event.results[event.results.length - 1]; const result = event.results[event.results.length - 1];
if (result.isFinal) { if (result.isFinal) {
if (!this.isListening) { this.onTranscription(result[0].transcript);
this.onTranscriptionReceived(result[0].transcript);
}
} }
}; };
} }
async start(): Promise<void> { async start(): Promise<void> {
await this.recognitionInstance.start();
this.listeningStatus = true; this.listeningStatus = true;
await this.recognitionInstance.start();
} }
async stop(): Promise<void> { async stop(): Promise<void> {
await this.recognitionInstance.stop();
this.listeningStatus = false; this.listeningStatus = false;
await this.recognitionInstance.stop();
} }
} }