Merge pull request #5786 from ConnectAI-E/feature/realtime-chat

Feature/realtime chat
2025-05-19 04:00:16 +09:00 · 2024-11-11 13:19:26 +08:00 · 2024-11-11 13:19:26 +08:00 · 289aeec8af
commit 289aeec8af
parent f8f6954115 7d71da938f
24 changed files with 1937 additions and 634 deletions
--- a/README.md
+++ b/README.md
@ -96,10 +96,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
 - [x] Artifacts: Easily preview, copy and share generated content/webpages through a separate window [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092)
 - [x] Plugins: support network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
  - [x] network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
+- [x] Supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
 - [ ] local knowledge base

 ## What's New
-
+- 🚀 v2.15.8 Now supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
 - 🚀 v2.15.4 The Application supports using Tauri fetch LLM API, MORE SECURITY! [#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379)
 - 🚀 v2.15.0 Now supports Plugins! Read this: [NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
 - 🚀 v2.14.0 Now supports  Artifacts & SD 
@ -134,10 +135,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
 - [x] Artifacts: 通过独立窗口，轻松预览、复制和分享生成的内容/可交互网页 [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092)
 - [x] 插件机制，支持`联网搜索`、`计算器`、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
   - [x] 支持联网搜索、计算器、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
+ - [x] 支持 Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
 - [ ] 本地知识库

 ## 最新动态
-
+- 🚀 v2.15.8 现在支持Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
 - 🚀 v2.15.4 客户端支持Tauri本地直接调用大模型API，更安全！[#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379)
 - 🚀 v2.15.0 现在支持插件功能了！了解更多：[NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
 - 🚀 v2.14.0 现在支持 Artifacts & SD 了。
--- a/app/components/chat.module.scss
+++ b/app/components/chat.module.scss
@ -45,6 +45,14 @@
 .chat-input-actions {
  display: flex;
  flex-wrap: wrap;
+  justify-content: space-between;
+  gap: 5px;
+
+  &-end {
+    display: flex;
+    margin-left: auto;
+    gap: 5px;
+  }

  .chat-input-action {
    display: inline-flex;
@ -62,10 +70,6 @@
    width: var(--icon-width);
    overflow: hidden;

-    &:not(:last-child) {
-      margin-right: 5px;
-    }
-
    .text {
      white-space: nowrap;
      padding-left: 5px;
@ -231,10 +235,12 @@

  animation: slide-in ease 0.3s;

-  $linear: linear-gradient(to right,
-      rgba(0, 0, 0, 0),
-      rgba(0, 0, 0, 1),
-      rgba(0, 0, 0, 0));
+  $linear: linear-gradient(
+    to right,
+    rgba(0, 0, 0, 0),
+    rgba(0, 0, 0, 1),
+    rgba(0, 0, 0, 0)
+  );
  mask-image: $linear;

  @mixin show {
@ -373,7 +379,7 @@
  }
 }

-.chat-message-user>.chat-message-container {
+.chat-message-user > .chat-message-container {
  align-items: flex-end;
 }

@ -443,6 +449,25 @@
  transition: all ease 0.3s;
 }

+.chat-message-audio {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  border-radius: 10px;
+  background-color: rgba(0, 0, 0, 0.05);
+  border: var(--border-in-light);
+  position: relative;
+  transition: all ease 0.3s;
+  margin-top: 10px;
+  font-size: 14px;
+  user-select: text;
+  word-break: break-word;
+  box-sizing: border-box;
+  audio {
+    height: 30px; /* 调整高度 */
+  }
+}
+
 .chat-message-item-image {
  width: 100%;
  margin-top: 10px;
@ -471,23 +496,27 @@
  border: rgba($color: #888, $alpha: 0.2) 1px solid;
 }

-
@media only screen and (max-width: 600px) {
-  $calc-image-width: calc(100vw/3*2/var(--image-count));
+  $calc-image-width: calc(100vw / 3 * 2 / var(--image-count));

  .chat-message-item-image-multi {
    width: $calc-image-width;
    height: $calc-image-width;
  }
-  
+
  .chat-message-item-image {
-    max-width: calc(100vw/3*2);
+    max-width: calc(100vw / 3 * 2);
  }
 }

@media screen and (min-width: 600px) {
-  $max-image-width: calc(calc(1200px - var(--sidebar-width))/3*2/var(--image-count));
-  $image-width: calc(calc(var(--window-width) - var(--sidebar-width))/3*2/var(--image-count));
+  $max-image-width: calc(
+    calc(1200px - var(--sidebar-width)) / 3 * 2 / var(--image-count)
+  );
+  $image-width: calc(
+    calc(var(--window-width) - var(--sidebar-width)) / 3 * 2 /
+      var(--image-count)
+  );

  .chat-message-item-image-multi {
    width: $image-width;
@ -497,7 +526,7 @@
  }

  .chat-message-item-image {
-    max-width: calc(calc(1200px - var(--sidebar-width))/3*2);
+    max-width: calc(calc(1200px - var(--sidebar-width)) / 3 * 2);
  }
 }

@ -515,7 +544,7 @@
  z-index: 1;
 }

-.chat-message-user>.chat-message-container>.chat-message-item {
+.chat-message-user > .chat-message-container > .chat-message-item {
  background-color: var(--second);

  &:hover {
@ -626,7 +655,8 @@
  min-height: 68px;
 }

-.chat-input:focus {}
+.chat-input:focus {
+}

 .chat-input-send {
  background-color: var(--primary);
@ -693,4 +723,31 @@
 .shortcut-key span {
  font-size: 12px;
  color: var(--black);
-}
+}
+
+.chat-main {
+  display: flex;
+  height: 100%;
+  width: 100%;
+  position: relative;
+  overflow: hidden;
+  .chat-body-container {
+    height: 100%;
+    display: flex;
+    flex-direction: column;
+    flex: 1;
+    width: 100%;
+  }
+  .chat-side-panel {
+    position: absolute;
+    inset: 0;
+    background: var(--white);
+    overflow: hidden;
+    z-index: 10;
+    transform: translateX(100%);
+    transition: all ease 0.3s;
+    &-show {
+      transform: translateX(0);
+    }
+  }
+}
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
--- a/app/components/realtime-chat/index.ts
+++ b/app/components/realtime-chat/index.ts
@ -0,0 +1 @@
+export * from "./realtime-chat";
--- a/app/components/realtime-chat/realtime-chat.module.scss
+++ b/app/components/realtime-chat/realtime-chat.module.scss
@ -0,0 +1,74 @@
+.realtime-chat {
+  width: 100%;
+  justify-content: center;
+  align-items: center;
+  position: relative;
+  display: flex;
+  flex-direction: column;
+  height: 100%;
+  padding: 20px;
+  box-sizing: border-box;
+  .circle-mic {
+    width: 150px;
+    height: 150px;
+    border-radius: 50%;
+    background: linear-gradient(to bottom right, #a0d8ef, #f0f8ff);
+    display: flex;
+    justify-content: center;
+    align-items: center;
+  }
+  .icon-center {
+    font-size: 24px;
+  }
+
+  .bottom-icons {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    width: 100%;
+    position: absolute;
+    bottom: 20px;
+    box-sizing: border-box;
+    padding: 0 20px;
+  }
+
+  .icon-left,
+  .icon-right {
+    width: 46px;
+    height: 46px;
+    font-size: 36px;
+    background: var(--second);
+    border-radius: 50%;
+    padding: 2px;
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    cursor: pointer;
+    &:hover {
+      opacity: 0.8;
+    }
+  }
+
+  &.mobile {
+    display: none;
+  }
+}
+
+.pulse {
+  animation: pulse 1.5s infinite;
+}
+
+@keyframes pulse {
+  0% {
+    transform: scale(1);
+    opacity: 0.7;
+  }
+  50% {
+    transform: scale(1.1);
+    opacity: 1;
+  }
+  100% {
+    transform: scale(1);
+    opacity: 0.7;
+  }
+}
--- a/app/components/realtime-chat/realtime-chat.tsx
+++ b/app/components/realtime-chat/realtime-chat.tsx
@ -0,0 +1,359 @@
+import VoiceIcon from "@/app/icons/voice.svg";
+import VoiceOffIcon from "@/app/icons/voice-off.svg";
+import PowerIcon from "@/app/icons/power.svg";
+
+import styles from "./realtime-chat.module.scss";
+import clsx from "clsx";
+
+import { useState, useRef, useEffect } from "react";
+
+import { useChatStore, createMessage, useAppConfig } from "@/app/store";
+
+import { IconButton } from "@/app/components/button";
+
+import {
+  Modality,
+  RTClient,
+  RTInputAudioItem,
+  RTResponse,
+  TurnDetection,
+} from "rt-client";
+import { AudioHandler } from "@/app/lib/audio";
+import { uploadImage } from "@/app/utils/chat";
+import { VoicePrint } from "@/app/components/voice-print";
+
+interface RealtimeChatProps {
+  onClose?: () => void;
+  onStartVoice?: () => void;
+  onPausedVoice?: () => void;
+}
+
+export function RealtimeChat({
+  onClose,
+  onStartVoice,
+  onPausedVoice,
+}: RealtimeChatProps) {
+  const chatStore = useChatStore();
+  const session = chatStore.currentSession();
+  const config = useAppConfig();
+  const [status, setStatus] = useState("");
+  const [isRecording, setIsRecording] = useState(false);
+  const [isConnected, setIsConnected] = useState(false);
+  const [isConnecting, setIsConnecting] = useState(false);
+  const [modality, setModality] = useState("audio");
+  const [useVAD, setUseVAD] = useState(true);
+  const [frequencies, setFrequencies] = useState<Uint8Array | undefined>();
+
+  const clientRef = useRef<RTClient | null>(null);
+  const audioHandlerRef = useRef<AudioHandler | null>(null);
+  const initRef = useRef(false);
+
+  const temperature = config.realtimeConfig.temperature;
+  const apiKey = config.realtimeConfig.apiKey;
+  const model = config.realtimeConfig.model;
+  const azure = config.realtimeConfig.provider === "Azure";
+  const azureEndpoint = config.realtimeConfig.azure.endpoint;
+  const azureDeployment = config.realtimeConfig.azure.deployment;
+  const voice = config.realtimeConfig.voice;
+
+  const handleConnect = async () => {
+    if (isConnecting) return;
+    if (!isConnected) {
+      try {
+        setIsConnecting(true);
+        clientRef.current = azure
+          ? new RTClient(
+              new URL(azureEndpoint),
+              { key: apiKey },
+              { deployment: azureDeployment },
+            )
+          : new RTClient({ key: apiKey }, { model });
+        const modalities: Modality[] =
+          modality === "audio" ? ["text", "audio"] : ["text"];
+        const turnDetection: TurnDetection = useVAD
+          ? { type: "server_vad" }
+          : null;
+        await clientRef.current.configure({
+          instructions: "",
+          voice,
+          input_audio_transcription: { model: "whisper-1" },
+          turn_detection: turnDetection,
+          tools: [],
+          temperature,
+          modalities,
+        });
+        startResponseListener();
+
+        setIsConnected(true);
+        // TODO
+        // try {
+        //   const recentMessages = chatStore.getMessagesWithMemory();
+        //   for (const message of recentMessages) {
+        //     const { role, content } = message;
+        //     if (typeof content === "string") {
+        //       await clientRef.current.sendItem({
+        //         type: "message",
+        //         role: role as any,
+        //         content: [
+        //           {
+        //             type: (role === "assistant" ? "text" : "input_text") as any,
+        //             text: content as string,
+        //           },
+        //         ],
+        //       });
+        //     }
+        //   }
+        //   // await clientRef.current.generateResponse();
+        // } catch (error) {
+        //   console.error("Set message failed:", error);
+        // }
+      } catch (error) {
+        console.error("Connection failed:", error);
+        setStatus("Connection failed");
+      } finally {
+        setIsConnecting(false);
+      }
+    } else {
+      await disconnect();
+    }
+  };
+
+  const disconnect = async () => {
+    if (clientRef.current) {
+      try {
+        await clientRef.current.close();
+        clientRef.current = null;
+        setIsConnected(false);
+      } catch (error) {
+        console.error("Disconnect failed:", error);
+      }
+    }
+  };
+
+  const startResponseListener = async () => {
+    if (!clientRef.current) return;
+
+    try {
+      for await (const serverEvent of clientRef.current.events()) {
+        if (serverEvent.type === "response") {
+          await handleResponse(serverEvent);
+        } else if (serverEvent.type === "input_audio") {
+          await handleInputAudio(serverEvent);
+        }
+      }
+    } catch (error) {
+      if (clientRef.current) {
+        console.error("Response iteration error:", error);
+      }
+    }
+  };
+
+  const handleResponse = async (response: RTResponse) => {
+    for await (const item of response) {
+      if (item.type === "message" && item.role === "assistant") {
+        const botMessage = createMessage({
+          role: item.role,
+          content: "",
+        });
+        // add bot message first
+        chatStore.updateTargetSession(session, (session) => {
+          session.messages = session.messages.concat([botMessage]);
+        });
+        let hasAudio = false;
+        for await (const content of item) {
+          if (content.type === "text") {
+            for await (const text of content.textChunks()) {
+              botMessage.content += text;
+            }
+          } else if (content.type === "audio") {
+            const textTask = async () => {
+              for await (const text of content.transcriptChunks()) {
+                botMessage.content += text;
+              }
+            };
+            const audioTask = async () => {
+              audioHandlerRef.current?.startStreamingPlayback();
+              for await (const audio of content.audioChunks()) {
+                hasAudio = true;
+                audioHandlerRef.current?.playChunk(audio);
+              }
+            };
+            await Promise.all([textTask(), audioTask()]);
+          }
+          // update message.content
+          chatStore.updateTargetSession(session, (session) => {
+            session.messages = session.messages.concat();
+          });
+        }
+        if (hasAudio) {
+          // upload audio get audio_url
+          const blob = audioHandlerRef.current?.savePlayFile();
+          uploadImage(blob!).then((audio_url) => {
+            botMessage.audio_url = audio_url;
+            // update text and audio_url
+            chatStore.updateTargetSession(session, (session) => {
+              session.messages = session.messages.concat();
+            });
+          });
+        }
+      }
+    }
+  };
+
+  const handleInputAudio = async (item: RTInputAudioItem) => {
+    await item.waitForCompletion();
+    if (item.transcription) {
+      const userMessage = createMessage({
+        role: "user",
+        content: item.transcription,
+      });
+      chatStore.updateTargetSession(session, (session) => {
+        session.messages = session.messages.concat([userMessage]);
+      });
+      // save input audio_url, and update session
+      const { audioStartMillis, audioEndMillis } = item;
+      // upload audio get audio_url
+      const blob = audioHandlerRef.current?.saveRecordFile(
+        audioStartMillis,
+        audioEndMillis,
+      );
+      uploadImage(blob!).then((audio_url) => {
+        userMessage.audio_url = audio_url;
+        chatStore.updateTargetSession(session, (session) => {
+          session.messages = session.messages.concat();
+        });
+      });
+    }
+    // stop streaming play after get input audio.
+    audioHandlerRef.current?.stopStreamingPlayback();
+  };
+
+  const toggleRecording = async () => {
+    if (!isRecording && clientRef.current) {
+      try {
+        if (!audioHandlerRef.current) {
+          audioHandlerRef.current = new AudioHandler();
+          await audioHandlerRef.current.initialize();
+        }
+        await audioHandlerRef.current.startRecording(async (chunk) => {
+          await clientRef.current?.sendAudio(chunk);
+        });
+        setIsRecording(true);
+      } catch (error) {
+        console.error("Failed to start recording:", error);
+      }
+    } else if (audioHandlerRef.current) {
+      try {
+        audioHandlerRef.current.stopRecording();
+        if (!useVAD) {
+          const inputAudio = await clientRef.current?.commitAudio();
+          await handleInputAudio(inputAudio!);
+          await clientRef.current?.generateResponse();
+        }
+        setIsRecording(false);
+      } catch (error) {
+        console.error("Failed to stop recording:", error);
+      }
+    }
+  };
+
+  useEffect(() => {
+    // 防止重复初始化
+    if (initRef.current) return;
+    initRef.current = true;
+
+    const initAudioHandler = async () => {
+      const handler = new AudioHandler();
+      await handler.initialize();
+      audioHandlerRef.current = handler;
+      await handleConnect();
+      await toggleRecording();
+    };
+
+    initAudioHandler().catch((error) => {
+      setStatus(error);
+      console.error(error);
+    });
+
+    return () => {
+      if (isRecording) {
+        toggleRecording();
+      }
+      audioHandlerRef.current?.close().catch(console.error);
+      disconnect();
+    };
+  }, []);
+
+  useEffect(() => {
+    let animationFrameId: number;
+
+    if (isConnected && isRecording) {
+      const animationFrame = () => {
+        if (audioHandlerRef.current) {
+          const freqData = audioHandlerRef.current.getByteFrequencyData();
+          setFrequencies(freqData);
+        }
+        animationFrameId = requestAnimationFrame(animationFrame);
+      };
+
+      animationFrameId = requestAnimationFrame(animationFrame);
+    } else {
+      setFrequencies(undefined);
+    }
+
+    return () => {
+      if (animationFrameId) {
+        cancelAnimationFrame(animationFrameId);
+      }
+    };
+  }, [isConnected, isRecording]);
+
+  // update session params
+  useEffect(() => {
+    clientRef.current?.configure({ voice });
+  }, [voice]);
+  useEffect(() => {
+    clientRef.current?.configure({ temperature });
+  }, [temperature]);
+
+  const handleClose = async () => {
+    onClose?.();
+    if (isRecording) {
+      await toggleRecording();
+    }
+    disconnect().catch(console.error);
+  };
+
+  return (
+    <div className={styles["realtime-chat"]}>
+      <div
+        className={clsx(styles["circle-mic"], {
+          [styles["pulse"]]: isRecording,
+        })}
+      >
+        <VoicePrint frequencies={frequencies} isActive={isRecording} />
+      </div>
+
+      <div className={styles["bottom-icons"]}>
+        <div>
+          <IconButton
+            icon={isRecording ? <VoiceIcon /> : <VoiceOffIcon />}
+            onClick={toggleRecording}
+            disabled={!isConnected}
+            shadow
+            bordered
+          />
+        </div>
+        <div className={styles["icon-center"]}>{status}</div>
+        <div>
+          <IconButton
+            icon={<PowerIcon />}
+            onClick={handleClose}
+            shadow
+            bordered
+          />
+        </div>
+      </div>
+    </div>
+  );
+}
--- a/app/components/realtime-chat/realtime-config.tsx
+++ b/app/components/realtime-chat/realtime-config.tsx
@ -0,0 +1,173 @@
+import { RealtimeConfig } from "@/app/store";
+
+import Locale from "@/app/locales";
+import { ListItem, Select, PasswordInput } from "@/app/components/ui-lib";
+
+import { InputRange } from "@/app/components/input-range";
+import { Voice } from "rt-client";
+import { ServiceProvider } from "@/app/constant";
+
+const providers = [ServiceProvider.OpenAI, ServiceProvider.Azure];
+
+const models = ["gpt-4o-realtime-preview-2024-10-01"];
+
+const voice = ["alloy", "shimmer", "echo"];
+
+export function RealtimeConfigList(props: {
+  realtimeConfig: RealtimeConfig;
+  updateConfig: (updater: (config: RealtimeConfig) => void) => void;
+}) {
+  const azureConfigComponent = props.realtimeConfig.provider ===
+    ServiceProvider.Azure && (
+    <>
+      <ListItem
+        title={Locale.Settings.Realtime.Azure.Endpoint.Title}
+        subTitle={Locale.Settings.Realtime.Azure.Endpoint.SubTitle}
+      >
+        <input
+          value={props.realtimeConfig?.azure?.endpoint}
+          type="text"
+          placeholder={Locale.Settings.Realtime.Azure.Endpoint.Title}
+          onChange={(e) => {
+            props.updateConfig(
+              (config) => (config.azure.endpoint = e.currentTarget.value),
+            );
+          }}
+        />
+      </ListItem>
+      <ListItem
+        title={Locale.Settings.Realtime.Azure.Deployment.Title}
+        subTitle={Locale.Settings.Realtime.Azure.Deployment.SubTitle}
+      >
+        <input
+          value={props.realtimeConfig?.azure?.deployment}
+          type="text"
+          placeholder={Locale.Settings.Realtime.Azure.Deployment.Title}
+          onChange={(e) => {
+            props.updateConfig(
+              (config) => (config.azure.deployment = e.currentTarget.value),
+            );
+          }}
+        />
+      </ListItem>
+    </>
+  );
+
+  return (
+    <>
+      <ListItem
+        title={Locale.Settings.Realtime.Enable.Title}
+        subTitle={Locale.Settings.Realtime.Enable.SubTitle}
+      >
+        <input
+          type="checkbox"
+          checked={props.realtimeConfig.enable}
+          onChange={(e) =>
+            props.updateConfig(
+              (config) => (config.enable = e.currentTarget.checked),
+            )
+          }
+        ></input>
+      </ListItem>
+
+      {props.realtimeConfig.enable && (
+        <>
+          <ListItem
+            title={Locale.Settings.Realtime.Provider.Title}
+            subTitle={Locale.Settings.Realtime.Provider.SubTitle}
+          >
+            <Select
+              aria-label={Locale.Settings.Realtime.Provider.Title}
+              value={props.realtimeConfig.provider}
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) =>
+                    (config.provider = e.target.value as ServiceProvider),
+                );
+              }}
+            >
+              {providers.map((v, i) => (
+                <option value={v} key={i}>
+                  {v}
+                </option>
+              ))}
+            </Select>
+          </ListItem>
+          <ListItem
+            title={Locale.Settings.Realtime.Model.Title}
+            subTitle={Locale.Settings.Realtime.Model.SubTitle}
+          >
+            <Select
+              aria-label={Locale.Settings.Realtime.Model.Title}
+              value={props.realtimeConfig.model}
+              onChange={(e) => {
+                props.updateConfig((config) => (config.model = e.target.value));
+              }}
+            >
+              {models.map((v, i) => (
+                <option value={v} key={i}>
+                  {v}
+                </option>
+              ))}
+            </Select>
+          </ListItem>
+          <ListItem
+            title={Locale.Settings.Realtime.ApiKey.Title}
+            subTitle={Locale.Settings.Realtime.ApiKey.SubTitle}
+          >
+            <PasswordInput
+              aria={Locale.Settings.ShowPassword}
+              aria-label={Locale.Settings.Realtime.ApiKey.Title}
+              value={props.realtimeConfig.apiKey}
+              type="text"
+              placeholder={Locale.Settings.Realtime.ApiKey.Placeholder}
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) => (config.apiKey = e.currentTarget.value),
+                );
+              }}
+            />
+          </ListItem>
+          {azureConfigComponent}
+          <ListItem
+            title={Locale.Settings.TTS.Voice.Title}
+            subTitle={Locale.Settings.TTS.Voice.SubTitle}
+          >
+            <Select
+              value={props.realtimeConfig.voice}
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) => (config.voice = e.currentTarget.value as Voice),
+                );
+              }}
+            >
+              {voice.map((v, i) => (
+                <option value={v} key={i}>
+                  {v}
+                </option>
+              ))}
+            </Select>
+          </ListItem>
+          <ListItem
+            title={Locale.Settings.Realtime.Temperature.Title}
+            subTitle={Locale.Settings.Realtime.Temperature.SubTitle}
+          >
+            <InputRange
+              aria={Locale.Settings.Temperature.Title}
+              value={props.realtimeConfig?.temperature?.toFixed(1)}
+              min="0.6"
+              max="1"
+              step="0.1"
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) =>
+                    (config.temperature = e.currentTarget.valueAsNumber),
+                );
+              }}
+            ></InputRange>
+          </ListItem>
+        </>
+      )}
+    </>
+  );
+}
--- a/app/components/settings.tsx
+++ b/app/components/settings.tsx
@ -85,6 +85,7 @@ import { nanoid } from "nanoid";
 import { useMaskStore } from "../store/mask";
 import { ProviderType } from "../utils/cloud";
 import { TTSConfigList } from "./tts-config";
+import { RealtimeConfigList } from "./realtime-chat/realtime-config";

 function EditPromptModal(props: { id: string; onClose: () => void }) {
  const promptStore = usePromptStore();
@ -1799,7 +1800,18 @@ export function Settings() {
        {shouldShowPromptModal && (
          <UserPromptModal onClose={() => setShowPromptModal(false)} />
        )}
-
+        <List>
+          <RealtimeConfigList
+            realtimeConfig={config.realtimeConfig}
+            updateConfig={(updater) => {
+              const realtimeConfig = { ...config.realtimeConfig };
+              updater(realtimeConfig);
+              config.update(
+                (config) => (config.realtimeConfig = realtimeConfig),
+              );
+            }}
+          />
+        </List>
        <List>
          <TTSConfigList
            ttsConfig={config.ttsConfig}
--- a/app/components/voice-print/index.ts
+++ b/app/components/voice-print/index.ts
@ -0,0 +1 @@
+export * from "./voice-print";
--- a/app/components/voice-print/voice-print.module.scss
+++ b/app/components/voice-print/voice-print.module.scss
@ -0,0 +1,11 @@
+.voice-print {
+  width: 100%;
+  height: 60px;
+  margin: 20px 0;
+
+  canvas {
+    width: 100%;
+    height: 100%;
+    filter: brightness(1.2); // 增加整体亮度
+  }
+}
--- a/app/components/voice-print/voice-print.tsx
+++ b/app/components/voice-print/voice-print.tsx
@ -0,0 +1,180 @@
+import { useEffect, useRef, useCallback } from "react";
+import styles from "./voice-print.module.scss";
+
+interface VoicePrintProps {
+  frequencies?: Uint8Array;
+  isActive?: boolean;
+}
+
+export function VoicePrint({ frequencies, isActive }: VoicePrintProps) {
+  // Canvas引用，用于获取绘图上下文
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+  // 存储历史频率数据，用于平滑处理
+  const historyRef = useRef<number[][]>([]);
+  // 控制保留的历史数据帧数，影响平滑度
+  const historyLengthRef = useRef(10);
+  // 存储动画帧ID，用于清理
+  const animationFrameRef = useRef<number>();
+
+  /**
+   * 更新频率历史数据
+   * 使用FIFO队列维护固定长度的历史记录
+   */
+  const updateHistory = useCallback((freqArray: number[]) => {
+    historyRef.current.push(freqArray);
+    if (historyRef.current.length > historyLengthRef.current) {
+      historyRef.current.shift();
+    }
+  }, []);
+
+  useEffect(() => {
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+
+    const ctx = canvas.getContext("2d");
+    if (!ctx) return;
+
+    /**
+     * 处理高DPI屏幕显示
+     * 根据设备像素比例调整canvas实际渲染分辨率
+     */
+    const dpr = window.devicePixelRatio || 1;
+    canvas.width = canvas.offsetWidth * dpr;
+    canvas.height = canvas.offsetHeight * dpr;
+    ctx.scale(dpr, dpr);
+
+    /**
+     * 主要绘制函数
+     * 使用requestAnimationFrame实现平滑动画
+     * 包含以下步骤：
+     * 1. 清空画布
+     * 2. 更新历史数据
+     * 3. 计算波形点
+     * 4. 绘制上下对称的声纹
+     */
+    const draw = () => {
+      // 清空画布
+      ctx.clearRect(0, 0, canvas.width, canvas.height);
+
+      if (!frequencies || !isActive) {
+        historyRef.current = [];
+        return;
+      }
+
+      const freqArray = Array.from(frequencies);
+      updateHistory(freqArray);
+
+      // 绘制声纹
+      const points: [number, number][] = [];
+      const centerY = canvas.height / 2;
+      const width = canvas.width;
+      const sliceWidth = width / (frequencies.length - 1);
+
+      // 绘制主波形
+      ctx.beginPath();
+      ctx.moveTo(0, centerY);
+
+      /**
+       * 声纹绘制算法：
+       * 1. 使用历史数据平均值实现平滑过渡
+       * 2. 通过正弦函数添加自然波动
+       * 3. 使用贝塞尔曲线连接点，使曲线更平滑
+       * 4. 绘制对称部分形成完整声纹
+       */
+      for (let i = 0; i < frequencies.length; i++) {
+        const x = i * sliceWidth;
+        let avgFrequency = frequencies[i];
+
+        /**
+         * 波形平滑处理：
+         * 1. 收集历史数据中对应位置的频率值
+         * 2. 计算当前值与历史值的加权平均
+         * 3. 根据平均值计算实际显示高度
+         */
+        if (historyRef.current.length > 0) {
+          const historicalValues = historyRef.current.map((h) => h[i] || 0);
+          avgFrequency =
+            (avgFrequency + historicalValues.reduce((a, b) => a + b, 0)) /
+            (historyRef.current.length + 1);
+        }
+
+        /**
+         * 波形变换：
+         * 1. 归一化频率值到0-1范围
+         * 2. 添加时间相关的正弦变换
+         * 3. 使用贝塞尔曲线平滑连接点
+         */
+        const normalized = avgFrequency / 255.0;
+        const height = normalized * (canvas.height / 2);
+        const y = centerY + height * Math.sin(i * 0.2 + Date.now() * 0.002);
+
+        points.push([x, y]);
+
+        if (i === 0) {
+          ctx.moveTo(x, y);
+        } else {
+          // 使用贝塞尔曲线使波形更平滑
+          const prevPoint = points[i - 1];
+          const midX = (prevPoint[0] + x) / 2;
+          ctx.quadraticCurveTo(
+            prevPoint[0],
+            prevPoint[1],
+            midX,
+            (prevPoint[1] + y) / 2,
+          );
+        }
+      }
+
+      // 绘制对称的下半部分
+      for (let i = points.length - 1; i >= 0; i--) {
+        const [x, y] = points[i];
+        const symmetricY = centerY - (y - centerY);
+        if (i === points.length - 1) {
+          ctx.lineTo(x, symmetricY);
+        } else {
+          const nextPoint = points[i + 1];
+          const midX = (nextPoint[0] + x) / 2;
+          ctx.quadraticCurveTo(
+            nextPoint[0],
+            centerY - (nextPoint[1] - centerY),
+            midX,
+            centerY - ((nextPoint[1] + y) / 2 - centerY),
+          );
+        }
+      }
+
+      ctx.closePath();
+
+      /**
+       * 渐变效果：
+       * 从左到右应用三色渐变，带透明度
+       * 使用蓝色系配色提升视觉效果
+       */
+      const gradient = ctx.createLinearGradient(0, 0, canvas.width, 0);
+      gradient.addColorStop(0, "rgba(100, 180, 255, 0.95)");
+      gradient.addColorStop(0.5, "rgba(140, 200, 255, 0.9)");
+      gradient.addColorStop(1, "rgba(180, 220, 255, 0.95)");
+
+      ctx.fillStyle = gradient;
+      ctx.fill();
+
+      animationFrameRef.current = requestAnimationFrame(draw);
+    };
+
+    // 启动动画循环
+    draw();
+
+    // 清理函数：在组件卸载时取消动画
+    return () => {
+      if (animationFrameRef.current) {
+        cancelAnimationFrame(animationFrameRef.current);
+      }
+    };
+  }, [frequencies, isActive, updateHistory]);
+
+  return (
+    <div className={styles["voice-print"]}>
+      <canvas ref={canvasRef} />
+    </div>
+  );
+}
--- a/app/icons/headphone.svg
+++ b/app/icons/headphone.svg
@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="16" height="16" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M4 28C4 26.8954 4.89543 26 6 26H10V38H6C4.89543 38 4 37.1046 4 36V28Z" fill="none" />
+    <path d="M38 26H42C43.1046 26 44 26.8954 44 28V36C44 37.1046 43.1046 38 42 38H38V26Z"
+        fill="none" />
+    <path
+        d="M10 36V24C10 16.268 16.268 10 24 10C31.732 10 38 16.268 38 24V36M10 26H6C4.89543 26 4 26.8954 4 28V36C4 37.1046 4.89543 38 6 38H10V26ZM38 26H42C43.1046 26 44 26.8954 44 28V36C44 37.1046 43.1046 38 42 38H38V26Z"
+        stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+    <path d="M16 32H20L22 26L26 38L28 32H32" stroke="#333" stroke-width="4" stroke-linecap="round"
+        stroke-linejoin="round" />
+</svg>
--- a/app/icons/power.svg
+++ b/app/icons/power.svg
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path
+        d="M14.5 8C13.8406 8.37652 13.2062 8.79103 12.6 9.24051C11.5625 10.0097 10.6074 10.8814 9.75 11.8402C6.79377 15.1463 5 19.4891 5 24.2455C5 34.6033 13.5066 43 24 43C34.4934 43 43 34.6033 43 24.2455C43 19.4891 41.2062 15.1463 38.25 11.8402C37.3926 10.8814 36.4375 10.0097 35.4 9.24051C34.7938 8.79103 34.1594 8.37652 33.5 8"
+        stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+    <path d="M24 4V24" stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+</svg>
--- a/app/icons/voice-off.svg
+++ b/app/icons/voice-off.svg
@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path
+        d="M31 24V11C31 7.13401 27.866 4 24 4C20.134 4 17 7.13401 17 11V24C17 27.866 20.134 31 24 31C27.866 31 31 27.866 31 24Z"
+        stroke="#d0021b" stroke-width="4" stroke-linejoin="round" />
+    <path
+        d="M9 23C9 31.2843 15.7157 38 24 38C25.7532 38 27.4361 37.6992 29 37.1465M39 23C39 25.1333 38.5547 27.1626 37.7519 29"
+        stroke="#d0021b" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+    <path d="M24 38V44" stroke="#d0021b" stroke-width="4" stroke-linecap="round"
+        stroke-linejoin="round" />
+    <path d="M42 42L6 6" stroke="#d0021b" stroke-width="4" stroke-linecap="round"
+        stroke-linejoin="round" />
+</svg>
--- a/app/icons/voice.svg
+++ b/app/icons/voice.svg
@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <rect x="17" y="4" width="14" height="27" rx="7" fill="none" stroke="#333" stroke-width="4"
+        stroke-linejoin="round" />
+    <path d="M9 23C9 31.2843 15.7157 38 24 38C32.2843 38 39 31.2843 39 23" stroke="#333"
+        stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+    <path d="M24 38V44" stroke="#333" stroke-width="4" stroke-linecap="round"
+        stroke-linejoin="round" />
+</svg>
--- a/app/lib/audio.ts
+++ b/app/lib/audio.ts
@ -0,0 +1,200 @@
+export class AudioHandler {
+  private context: AudioContext;
+  private mergeNode: ChannelMergerNode;
+  private analyserData: Uint8Array;
+  public analyser: AnalyserNode;
+  private workletNode: AudioWorkletNode | null = null;
+  private stream: MediaStream | null = null;
+  private source: MediaStreamAudioSourceNode | null = null;
+  private recordBuffer: Int16Array[] = [];
+  private readonly sampleRate = 24000;
+
+  private nextPlayTime: number = 0;
+  private isPlaying: boolean = false;
+  private playbackQueue: AudioBufferSourceNode[] = [];
+  private playBuffer: Int16Array[] = [];
+
+  constructor() {
+    this.context = new AudioContext({ sampleRate: this.sampleRate });
+    // using ChannelMergerNode to get merged audio data, and then get analyser data.
+    this.mergeNode = new ChannelMergerNode(this.context, { numberOfInputs: 2 });
+    this.analyser = new AnalyserNode(this.context, { fftSize: 256 });
+    this.analyserData = new Uint8Array(this.analyser.frequencyBinCount);
+    this.mergeNode.connect(this.analyser);
+  }
+
+  getByteFrequencyData() {
+    this.analyser.getByteFrequencyData(this.analyserData);
+    return this.analyserData;
+  }
+
+  async initialize() {
+    await this.context.audioWorklet.addModule("/audio-processor.js");
+  }
+
+  async startRecording(onChunk: (chunk: Uint8Array) => void) {
+    try {
+      if (!this.workletNode) {
+        await this.initialize();
+      }
+
+      this.stream = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          channelCount: 1,
+          sampleRate: this.sampleRate,
+          echoCancellation: true,
+          noiseSuppression: true,
+        },
+      });
+
+      await this.context.resume();
+      this.source = this.context.createMediaStreamSource(this.stream);
+      this.workletNode = new AudioWorkletNode(
+        this.context,
+        "audio-recorder-processor",
+      );
+
+      this.workletNode.port.onmessage = (event) => {
+        if (event.data.eventType === "audio") {
+          const float32Data = event.data.audioData;
+          const int16Data = new Int16Array(float32Data.length);
+
+          for (let i = 0; i < float32Data.length; i++) {
+            const s = Math.max(-1, Math.min(1, float32Data[i]));
+            int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
+          }
+
+          const uint8Data = new Uint8Array(int16Data.buffer);
+          onChunk(uint8Data);
+          // save recordBuffer
+          // @ts-ignore
+          this.recordBuffer.push.apply(this.recordBuffer, int16Data);
+        }
+      };
+
+      this.source.connect(this.workletNode);
+      this.source.connect(this.mergeNode, 0, 0);
+      this.workletNode.connect(this.context.destination);
+
+      this.workletNode.port.postMessage({ command: "START_RECORDING" });
+    } catch (error) {
+      console.error("Error starting recording:", error);
+      throw error;
+    }
+  }
+
+  stopRecording() {
+    if (!this.workletNode || !this.source || !this.stream) {
+      throw new Error("Recording not started");
+    }
+
+    this.workletNode.port.postMessage({ command: "STOP_RECORDING" });
+
+    this.workletNode.disconnect();
+    this.source.disconnect();
+    this.stream.getTracks().forEach((track) => track.stop());
+  }
+  startStreamingPlayback() {
+    this.isPlaying = true;
+    this.nextPlayTime = this.context.currentTime;
+  }
+
+  stopStreamingPlayback() {
+    this.isPlaying = false;
+    this.playbackQueue.forEach((source) => source.stop());
+    this.playbackQueue = [];
+    this.playBuffer = [];
+  }
+
+  playChunk(chunk: Uint8Array) {
+    if (!this.isPlaying) return;
+
+    const int16Data = new Int16Array(chunk.buffer);
+    // @ts-ignore
+    this.playBuffer.push.apply(this.playBuffer, int16Data); // save playBuffer
+
+    const float32Data = new Float32Array(int16Data.length);
+    for (let i = 0; i < int16Data.length; i++) {
+      float32Data[i] = int16Data[i] / (int16Data[i] < 0 ? 0x8000 : 0x7fff);
+    }
+
+    const audioBuffer = this.context.createBuffer(
+      1,
+      float32Data.length,
+      this.sampleRate,
+    );
+    audioBuffer.getChannelData(0).set(float32Data);
+
+    const source = this.context.createBufferSource();
+    source.buffer = audioBuffer;
+    source.connect(this.context.destination);
+    source.connect(this.mergeNode, 0, 1);
+
+    const chunkDuration = audioBuffer.length / this.sampleRate;
+
+    source.start(this.nextPlayTime);
+
+    this.playbackQueue.push(source);
+    source.onended = () => {
+      const index = this.playbackQueue.indexOf(source);
+      if (index > -1) {
+        this.playbackQueue.splice(index, 1);
+      }
+    };
+
+    this.nextPlayTime += chunkDuration;
+
+    if (this.nextPlayTime < this.context.currentTime) {
+      this.nextPlayTime = this.context.currentTime;
+    }
+  }
+  _saveData(data: Int16Array, bytesPerSample = 16): Blob {
+    const headerLength = 44;
+    const numberOfChannels = 1;
+    const byteLength = data.buffer.byteLength;
+    const header = new Uint8Array(headerLength);
+    const view = new DataView(header.buffer);
+    view.setUint32(0, 1380533830, false); // RIFF identifier 'RIFF'
+    view.setUint32(4, 36 + byteLength, true); // file length minus RIFF identifier length and file description length
+    view.setUint32(8, 1463899717, false); // RIFF type 'WAVE'
+    view.setUint32(12, 1718449184, false); // format chunk identifier 'fmt '
+    view.setUint32(16, 16, true); // format chunk length
+    view.setUint16(20, 1, true); // sample format (raw)
+    view.setUint16(22, numberOfChannels, true); // channel count
+    view.setUint32(24, this.sampleRate, true); // sample rate
+    view.setUint32(28, this.sampleRate * 4, true); // byte rate (sample rate * block align)
+    view.setUint16(32, numberOfChannels * 2, true); // block align (channel count * bytes per sample)
+    view.setUint16(34, bytesPerSample, true); // bits per sample
+    view.setUint32(36, 1684108385, false); // data chunk identifier 'data'
+    view.setUint32(40, byteLength, true); // data chunk length
+
+    // using data.buffer, so no need to setUint16 to view.
+    return new Blob([view, data.buffer], { type: "audio/mpeg" });
+  }
+  savePlayFile() {
+    // @ts-ignore
+    return this._saveData(new Int16Array(this.playBuffer));
+  }
+  saveRecordFile(
+    audioStartMillis: number | undefined,
+    audioEndMillis: number | undefined,
+  ) {
+    const startIndex = audioStartMillis
+      ? Math.floor((audioStartMillis * this.sampleRate) / 1000)
+      : 0;
+    const endIndex = audioEndMillis
+      ? Math.floor((audioEndMillis * this.sampleRate) / 1000)
+      : this.recordBuffer.length;
+    return this._saveData(
+      // @ts-ignore
+      new Int16Array(this.recordBuffer.slice(startIndex, endIndex)),
+    );
+  }
+  async close() {
+    this.recordBuffer = [];
+    this.workletNode?.disconnect();
+    this.source?.disconnect();
+    this.stream?.getTracks().forEach((track) => track.stop());
+    await this.context.close();
+  }
+}
--- a/app/locales/cn.ts
+++ b/app/locales/cn.ts
@ -562,6 +562,39 @@ const cn = {
        SubTitle: "生成语音的速度",
      },
    },
+    Realtime: {
+      Enable: {
+        Title: "实时聊天",
+        SubTitle: "开启实时聊天功能",
+      },
+      Provider: {
+        Title: "模型服务商",
+        SubTitle: "切换不同的服务商",
+      },
+      Model: {
+        Title: "模型",
+        SubTitle: "选择一个模型",
+      },
+      ApiKey: {
+        Title: "API Key",
+        SubTitle: "API Key",
+        Placeholder: "API Key",
+      },
+      Azure: {
+        Endpoint: {
+          Title: "接口地址",
+          SubTitle: "接口地址",
+        },
+        Deployment: {
+          Title: "部署名称",
+          SubTitle: "部署名称",
+        },
+      },
+      Temperature: {
+        Title: "随机性 (temperature)",
+        SubTitle: "值越大，回复越随机",
+      },
+    },
  },
  Store: {
    DefaultTopic: "新的聊天",
--- a/app/locales/en.ts
+++ b/app/locales/en.ts
@ -570,6 +570,39 @@ const en: LocaleType = {
      },
      Engine: "TTS Engine",
    },
+    Realtime: {
+      Enable: {
+        Title: "Realtime Chat",
+        SubTitle: "Enable realtime chat feature",
+      },
+      Provider: {
+        Title: "Model Provider",
+        SubTitle: "Switch between different providers",
+      },
+      Model: {
+        Title: "Model",
+        SubTitle: "Select a model",
+      },
+      ApiKey: {
+        Title: "API Key",
+        SubTitle: "API Key",
+        Placeholder: "API Key",
+      },
+      Azure: {
+        Endpoint: {
+          Title: "Endpoint",
+          SubTitle: "Endpoint",
+        },
+        Deployment: {
+          Title: "Deployment Name",
+          SubTitle: "Deployment Name",
+        },
+      },
+      Temperature: {
+        Title: "Randomness (temperature)",
+        SubTitle: "Higher values result in more random responses",
+      },
+    },
  },
  Store: {
    DefaultTopic: "New Conversation",
--- a/app/store/chat.ts
+++ b/app/store/chat.ts
@ -52,6 +52,7 @@ export type ChatMessage = RequestMessage & {
  id: string;
  model?: ModelType;
  tools?: ChatMessageTool[];
+  audio_url?: string;
 };

 export function createMessage(override: Partial<ChatMessage>): ChatMessage {
--- a/app/store/config.ts
+++ b/app/store/config.ts
@ -15,6 +15,7 @@ import {
  ServiceProvider,
 } from "../constant";
 import { createPersistStore } from "../utils/store";
+import type { Voice } from "rt-client";

 export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
 export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
@ -90,12 +91,26 @@ export const DEFAULT_CONFIG = {
    voice: DEFAULT_TTS_VOICE,
    speed: 1.0,
  },
+
+  realtimeConfig: {
+    enable: false,
+    provider: "OpenAI" as ServiceProvider,
+    model: "gpt-4o-realtime-preview-2024-10-01",
+    apiKey: "",
+    azure: {
+      endpoint: "",
+      deployment: "",
+    },
+    temperature: 0.9,
+    voice: "alloy" as Voice,
+  },
 };

 export type ChatConfig = typeof DEFAULT_CONFIG;

 export type ModelConfig = ChatConfig["modelConfig"];
 export type TTSConfig = ChatConfig["ttsConfig"];
+export type RealtimeConfig = ChatConfig["realtimeConfig"];

 export function limitNumber(
  x: number,
--- a/app/utils/chat.ts
+++ b/app/utils/chat.ts
@ -138,7 +138,7 @@ export function uploadImage(file: Blob): Promise<string> {
  })
    .then((res) => res.json())
    .then((res) => {
-      console.log("res", res);
+      // console.log("res", res);
      if (res?.code == 0 && res?.data) {
        return res?.data;
      }
--- a/package.json
+++ b/package.json
@ -52,7 +52,8 @@
    "sass": "^1.59.2",
    "spark-md5": "^3.0.2",
    "use-debounce": "^9.0.4",
-    "zustand": "^4.3.8"
+    "zustand": "^4.3.8",
+    "rt-client": "https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz"
  },
  "devDependencies": {
    "@tauri-apps/api": "^1.6.0",
--- a/public/audio-processor.js
+++ b/public/audio-processor.js
@ -0,0 +1,48 @@
+// @ts-nocheck
+class AudioRecorderProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super();
+    this.isRecording = false;
+    this.bufferSize = 2400; // 100ms at 24kHz
+    this.currentBuffer = [];
+
+    this.port.onmessage = (event) => {
+      if (event.data.command === "START_RECORDING") {
+        this.isRecording = true;
+      } else if (event.data.command === "STOP_RECORDING") {
+        this.isRecording = false;
+
+        if (this.currentBuffer.length > 0) {
+          this.sendBuffer();
+        }
+      }
+    };
+  }
+
+  sendBuffer() {
+    if (this.currentBuffer.length > 0) {
+      const audioData = new Float32Array(this.currentBuffer);
+      this.port.postMessage({
+        eventType: "audio",
+        audioData: audioData,
+      });
+      this.currentBuffer = [];
+    }
+  }
+
+  process(inputs) {
+    const input = inputs[0];
+    if (input.length > 0 && this.isRecording) {
+      const audioData = input[0];
+
+      this.currentBuffer.push(...audioData);
+
+      if (this.currentBuffer.length >= this.bufferSize) {
+        this.sendBuffer();
+      }
+    }
+    return true;
+  }
+}
+
+registerProcessor("audio-recorder-processor", AudioRecorderProcessor);
--- a/yarn.lock
+++ b/yarn.lock
@ -7455,6 +7455,12 @@ robust-predicates@^3.0.0:
  resolved "https://registry.npmmirror.com/robust-predicates/-/robust-predicates-3.0.1.tgz#ecde075044f7f30118682bd9fb3f123109577f9a"
  integrity sha512-ndEIpszUHiG4HtDsQLeIuMvRsDnn8c8rYStabochtUeCvfuvNptb5TUbVD68LRAILPX7p9nqQGh4xJgn3EHS/g==

+"rt-client@https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz":
+  version "0.5.0"
+  resolved "https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz#abf2e9a850201e3571b8d36830f77bc52af3de9b"
+  dependencies:
+    ws "^8.18.0"
+
 run-parallel@^1.1.9:
  version "1.2.0"
  resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee"
@ -8498,9 +8504,9 @@ write-file-atomic@^4.0.2:
    imurmurhash "^0.1.4"
    signal-exit "^3.0.7"

-ws@^8.11.0:
+ws@^8.11.0, ws@^8.18.0:
  version "8.18.0"
-  resolved "https://registry.npmmirror.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc"
+  resolved "https://registry.yarnpkg.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc"
  integrity sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==

 xml-name-validator@^4.0.0: