feat: #226

2025-05-22 21:50:16 +09:00 · 2024-03-24 11:42:06 +08:00 · 2024-03-24 11:42:06 +08:00 · a18cb2c525
commit a18cb2c525
parent 428bf81801
12 changed files with 112 additions and 18 deletions
--- a/app/api/common.ts
+++ b/app/api/common.ts
@ -67,9 +67,16 @@ export async function requestOpenai(req: NextRequest) {

  let jsonBody;
  let clonedBody;
-  if (req.method !== "GET" && req.method !== "HEAD") {
+  const contentType = req.headers.get("Content-Type");
+  if (
+    req.method !== "GET" &&
+    req.method !== "HEAD" &&
+    contentType?.includes("json")
+  ) {
    clonedBody = await req.text();
    jsonBody = JSON.parse(clonedBody) as { model?: string };
+  } else {
+    clonedBody = req.body;
  }
  if (serverConfig.isAzure) {
    baseUrl = `${baseUrl}/${jsonBody?.model}`;
@ -77,7 +84,7 @@ export async function requestOpenai(req: NextRequest) {
  const fetchUrl = `${baseUrl}/${path}`;
  const fetchOptions: RequestInit = {
    headers: {
-      "Content-Type": "application/json",
+      "Content-Type": contentType ?? "application/json",
      "Cache-Control": "no-store",
      [authHeaderName]: authValue,
      ...(serverConfig.openaiOrgId && {
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@ -143,11 +143,12 @@ export class ChatGPTApi implements LLMApi {

    try {
      const path = this.path(OpenaiPath.TranscriptionPath, options.model);
+      const headers = getHeaders(true);
      const payload = {
        method: "POST",
        body: formData,
        signal: controller.signal,
-        headers: getHeaders(true),
+        headers: headers,
      };

      // make a fetch request
@ -155,7 +156,6 @@ export class ChatGPTApi implements LLMApi {
        () => controller.abort(),
        REQUEST_TIMEOUT_MS,
      );
-
      const res = await fetch(path, payload);
      clearTimeout(requestTimeoutId);
      const json = await res.json();
--- a/app/components/button.module.scss
+++ b/app/components/button.module.scss
@ -65,6 +65,16 @@
  align-items: center;
 }

+.icon-button-loading-icon {
+  width: 40px;
+  height: 16px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  fill: white;
+  stroke: white;
+}
+
@media only screen and (max-width: 600px) {
  .icon-button {
    padding: 16px;
--- a/app/components/button.tsx
+++ b/app/components/button.tsx
@ -4,6 +4,8 @@ import styles from "./button.module.scss";

 export type ButtonType = "primary" | "danger" | null;

+import LoadingIcon from "../icons/three-dots-white.svg";
+
 export function IconButton(props: {
  onClick?: () => void;
  icon?: JSX.Element;
@ -16,6 +18,7 @@ export function IconButton(props: {
  disabled?: boolean;
  tabIndex?: number;
  autoFocus?: boolean;
+  loding?: boolean;
 }) {
  return (
    <button
@ -32,7 +35,7 @@ export function IconButton(props: {
      tabIndex={props.tabIndex}
      autoFocus={props.autoFocus}
    >
-      {props.icon && (
+      {props.icon && !props.loding && (
        <div
          className={
            styles["icon-button-icon"] +
@ -43,9 +46,19 @@ export function IconButton(props: {
        </div>
      )}

-      {props.text && (
+      {props.text && !props.loding && (
        <div className={styles["icon-button-text"]}>{props.text}</div>
      )}
+      {props.loding ? (
+        <div
+          className={
+            styles["icon-button-loading-icon"] +
+            ` ${props.type === "primary" && "no-dark"}`
+          }
+        >
+          <LoadingIcon />
+        </div>
+      ) : null}
    </button>
  );
 }
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@ -91,6 +91,7 @@ import {
 import { useNavigate } from "react-router-dom";
 import {
  CHAT_PAGE_SIZE,
+  DEFAULT_STT_ENGINE,
  LAST_INPUT_KEY,
  ModelProvider,
  Path,
@ -806,10 +807,10 @@ function _Chat() {
  };

  const [isListening, setIsListening] = useState(false);
+  const [isTranscription, setIsTranscription] = useState(false);
  const [speechApi, setSpeechApi] = useState<any>(null);

  const startListening = async () => {
-    console.log(speechApi);
    if (speechApi) {
      await speechApi.start();
      setIsListening(true);
@ -818,6 +819,8 @@ function _Chat() {

  const stopListening = async () => {
    if (speechApi) {
+      if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
+        setIsTranscription(true);
      await speechApi.stop();
      setIsListening(false);
    }
@ -826,6 +829,8 @@ function _Chat() {
  const onRecognitionEnd = (finalTranscript: string) => {
    console.log(finalTranscript);
    if (finalTranscript) setUserInput(finalTranscript);
+    if (config.sttConfig.engine !== DEFAULT_STT_ENGINE)
+      setIsTranscription(false);
  };

  const doSubmit = (userInput: string) => {
@ -899,9 +904,13 @@ function _Chat() {
    });
    // eslint-disable-next-line react-hooks/exhaustive-deps
    setSpeechApi(
-      new OpenAITranscriptionApi((transcription) =>
-        onRecognitionEnd(transcription),
-      ),
+      config.sttConfig.engine === DEFAULT_STT_ENGINE
+        ? new WebTranscriptionApi((transcription) =>
+            onRecognitionEnd(transcription),
+          )
+        : new OpenAITranscriptionApi((transcription) =>
+            onRecognitionEnd(transcription),
+          ),
    );
  }, []);

@ -1695,6 +1704,7 @@ function _Chat() {
              onClick={async () =>
                isListening ? await stopListening() : await startListening()
              }
+              loding={isTranscription}
            />
          ) : (
            <IconButton
--- a/app/components/stt-config.tsx
+++ b/app/components/stt-config.tsx
@ -1,7 +1,8 @@
-import { STTConfig } from "../store";
+import { STTConfig, STTConfigValidator } from "../store";

 import Locale from "../locales";
-import { ListItem } from "./ui-lib";
+import { ListItem, Select } from "./ui-lib";
+import { DEFAULT_STT_ENGINES } from "../constant";

 export function STTConfigList(props: {
  sttConfig: STTConfig;
@ -23,6 +24,25 @@ export function STTConfigList(props: {
          }
        ></input>
      </ListItem>
+      <ListItem title={Locale.Settings.STT.Engine.Title}>
+        <Select
+          value={props.sttConfig.engine}
+          onChange={(e) => {
+            props.updateConfig(
+              (config) =>
+                (config.engine = STTConfigValidator.engine(
+                  e.currentTarget.value,
+                )),
+            );
+          }}
+        >
+          {DEFAULT_STT_ENGINES.map((v, i) => (
+            <option value={v} key={i}>
+              {v}
+            </option>
+          ))}
+        </Select>
+      </ListItem>
    </>
  );
 }
--- a/app/constant.ts
+++ b/app/constant.ts
@ -134,6 +134,9 @@ export const DEFAULT_TTS_VOICES = [
  "shimmer",
 ];

+export const DEFAULT_STT_ENGINE = "WebAPI";
+export const DEFAULT_STT_ENGINES = ["WebAPI", "OpenAI Whisper"];
+
 export const DEFAULT_MODELS = [
  {
    name: "gpt-4",
--- a/app/icons/three-dots-white.svg
+++ b/app/icons/three-dots-white.svg
@ -0,0 +1,14 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="30" height="14" fill="#fff" viewBox="0 0 120 30">
+	<circle cx="15" cy="15" r="15" fill="#fff">
+		<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="15" repeatCount="indefinite" to="15" values="15;9;15" />
+		<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from="1" repeatCount="indefinite" to="1" values="1;.5;1" />
+	</circle>
+	<circle cx="60" cy="15" r="9" fill="#fff" fill-opacity=".3">
+		<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="9" repeatCount="indefinite" to="9" values="9;15;9" />
+		<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from=".5" repeatCount="indefinite" to=".5" values=".5;1;.5" />
+	</circle>
+	<circle cx="105" cy="15" r="15" fill="#fff">
+		<animate attributeName="r" begin="0s" calcMode="linear" dur="0.8s" from="15" repeatCount="indefinite" to="15" values="15;9;15" />
+		<animate attributeName="fill-opacity" begin="0s" calcMode="linear" dur="0.8s" from="1" repeatCount="indefinite" to="1" values="1;.5;1" />
+	</circle>
+</svg>
--- a/app/locales/cn.ts
+++ b/app/locales/cn.ts
@ -402,6 +402,10 @@ const cn = {
        Title: "启用语音转文本",
        SubTitle: "启用语音转文本",
      },
+      Engine: {
+        Title: "转换引擎",
+        SubTitle: "音频转换引擎",
+      },
    },
  },
  Store: {
--- a/app/locales/en.ts
+++ b/app/locales/en.ts
@ -408,6 +408,10 @@ const en: LocaleType = {
        Title: "Enable STT",
        SubTitle: "Enable Speech-to-Text",
      },
+      Engine: {
+        Title: "STT Engine",
+        SubTitle: "Text-to-Speech Engine",
+      },
    },
  },
  Store: {
--- a/app/store/config.ts
+++ b/app/store/config.ts
@ -5,6 +5,8 @@ import {
  DEFAULT_INPUT_TEMPLATE,
  DEFAULT_MODELS,
  DEFAULT_SIDEBAR_WIDTH,
+  DEFAULT_STT_ENGINE,
+  DEFAULT_STT_ENGINES,
  DEFAULT_TTS_MODEL,
  DEFAULT_TTS_MODELS,
  DEFAULT_TTS_VOICE,
@ -17,6 +19,8 @@ export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
 export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
 export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];

+export type STTEngineType = (typeof DEFAULT_STT_ENGINES)[number];
+
 export enum SubmitKey {
  Enter = "Enter",
  CtrlEnter = "Ctrl + Enter",
@ -81,6 +85,7 @@ export const DEFAULT_CONFIG = {

  sttConfig: {
    enable: false,
+    engine: DEFAULT_STT_ENGINE,
  },
 };

@ -116,6 +121,12 @@ export const TTSConfigValidator = {
  },
 };

+export const STTConfigValidator = {
+  engine(x: string) {
+    return x as STTEngineType;
+  },
+};
+
 export const ModalConfigValidator = {
  model(x: string) {
    return x as ModelType;
--- a/app/utils/speech.ts
+++ b/app/utils/speech.ts
@ -31,7 +31,7 @@ export class OpenAITranscriptionApi extends SpeechApi {
  }

  async start(): Promise<void> {
-    // @ts-ignore
+    // @ts-ignore prettier-ignore
    navigator.getUserMedia =
      navigator.getUserMedia ||
      navigator.webkitGetUserMedia ||
@ -103,20 +103,18 @@ export class WebTranscriptionApi extends SpeechApi {
    this.recognitionInstance.onresult = (event: any) => {
      const result = event.results[event.results.length - 1];
      if (result.isFinal) {
-        if (!this.isListening) {
-          this.onTranscriptionReceived(result[0].transcript);
-        }
+        this.onTranscription(result[0].transcript);
      }
    };
  }

  async start(): Promise<void> {
-    await this.recognitionInstance.start();
    this.listeningStatus = true;
+    await this.recognitionInstance.start();
  }

  async stop(): Promise<void> {
-    await this.recognitionInstance.stop();
    this.listeningStatus = false;
+    await this.recognitionInstance.stop();
  }
 }