mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-05-19 04:00:16 +09:00
Merge pull request #5786 from ConnectAI-E/feature/realtime-chat
Feature/realtime chat
This commit is contained in:
commit
289aeec8af
@ -96,10 +96,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
|
||||
- [x] Artifacts: Easily preview, copy and share generated content/webpages through a separate window [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092)
|
||||
- [x] Plugins: support network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
|
||||
- [x] network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
|
||||
- [x] Supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
|
||||
- [ ] local knowledge base
|
||||
|
||||
## What's New
|
||||
|
||||
- 🚀 v2.15.8 Now supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
|
||||
- 🚀 v2.15.4 The Application supports using Tauri fetch LLM API, MORE SECURITY! [#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379)
|
||||
- 🚀 v2.15.0 Now supports Plugins! Read this: [NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
|
||||
- 🚀 v2.14.0 Now supports Artifacts & SD
|
||||
@ -134,10 +135,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
|
||||
- [x] Artifacts: 通过独立窗口,轻松预览、复制和分享生成的内容/可交互网页 [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092)
|
||||
- [x] 插件机制,支持`联网搜索`、`计算器`、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
|
||||
- [x] 支持联网搜索、计算器、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
|
||||
- [x] 支持 Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
|
||||
- [ ] 本地知识库
|
||||
|
||||
## 最新动态
|
||||
|
||||
- 🚀 v2.15.8 现在支持Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
|
||||
- 🚀 v2.15.4 客户端支持Tauri本地直接调用大模型API,更安全
|
||||
- 🚀 v2.15.0 现在支持插件功能了!了解更多:[NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
|
||||
- 🚀 v2.14.0 现在支持 Artifacts & SD 了。
|
||||
|
@ -45,6 +45,14 @@
|
||||
.chat-input-actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
justify-content: space-between;
|
||||
gap: 5px;
|
||||
|
||||
&-end {
|
||||
display: flex;
|
||||
margin-left: auto;
|
||||
gap: 5px;
|
||||
}
|
||||
|
||||
.chat-input-action {
|
||||
display: inline-flex;
|
||||
@ -62,10 +70,6 @@
|
||||
width: var(--icon-width);
|
||||
overflow: hidden;
|
||||
|
||||
&:not(:last-child) {
|
||||
margin-right: 5px;
|
||||
}
|
||||
|
||||
.text {
|
||||
white-space: nowrap;
|
||||
padding-left: 5px;
|
||||
@ -231,10 +235,12 @@
|
||||
|
||||
animation: slide-in ease 0.3s;
|
||||
|
||||
$linear: linear-gradient(to right,
|
||||
rgba(0, 0, 0, 0),
|
||||
rgba(0, 0, 0, 1),
|
||||
rgba(0, 0, 0, 0));
|
||||
$linear: linear-gradient(
|
||||
to right,
|
||||
rgba(0, 0, 0, 0),
|
||||
rgba(0, 0, 0, 1),
|
||||
rgba(0, 0, 0, 0)
|
||||
);
|
||||
mask-image: $linear;
|
||||
|
||||
@mixin show {
|
||||
@ -373,7 +379,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
.chat-message-user>.chat-message-container {
|
||||
.chat-message-user > .chat-message-container {
|
||||
align-items: flex-end;
|
||||
}
|
||||
|
||||
@ -443,6 +449,25 @@
|
||||
transition: all ease 0.3s;
|
||||
}
|
||||
|
||||
.chat-message-audio {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
border-radius: 10px;
|
||||
background-color: rgba(0, 0, 0, 0.05);
|
||||
border: var(--border-in-light);
|
||||
position: relative;
|
||||
transition: all ease 0.3s;
|
||||
margin-top: 10px;
|
||||
font-size: 14px;
|
||||
user-select: text;
|
||||
word-break: break-word;
|
||||
box-sizing: border-box;
|
||||
audio {
|
||||
height: 30px; /* 调整高度 */
|
||||
}
|
||||
}
|
||||
|
||||
.chat-message-item-image {
|
||||
width: 100%;
|
||||
margin-top: 10px;
|
||||
@ -471,9 +496,8 @@
|
||||
border: rgba($color: #888, $alpha: 0.2) 1px solid;
|
||||
}
|
||||
|
||||
|
||||
@media only screen and (max-width: 600px) {
|
||||
$calc-image-width: calc(100vw/3*2/var(--image-count));
|
||||
$calc-image-width: calc(100vw / 3 * 2 / var(--image-count));
|
||||
|
||||
.chat-message-item-image-multi {
|
||||
width: $calc-image-width;
|
||||
@ -481,13 +505,18 @@
|
||||
}
|
||||
|
||||
.chat-message-item-image {
|
||||
max-width: calc(100vw/3*2);
|
||||
max-width: calc(100vw / 3 * 2);
|
||||
}
|
||||
}
|
||||
|
||||
@media screen and (min-width: 600px) {
|
||||
$max-image-width: calc(calc(1200px - var(--sidebar-width))/3*2/var(--image-count));
|
||||
$image-width: calc(calc(var(--window-width) - var(--sidebar-width))/3*2/var(--image-count));
|
||||
$max-image-width: calc(
|
||||
calc(1200px - var(--sidebar-width)) / 3 * 2 / var(--image-count)
|
||||
);
|
||||
$image-width: calc(
|
||||
calc(var(--window-width) - var(--sidebar-width)) / 3 * 2 /
|
||||
var(--image-count)
|
||||
);
|
||||
|
||||
.chat-message-item-image-multi {
|
||||
width: $image-width;
|
||||
@ -497,7 +526,7 @@
|
||||
}
|
||||
|
||||
.chat-message-item-image {
|
||||
max-width: calc(calc(1200px - var(--sidebar-width))/3*2);
|
||||
max-width: calc(calc(1200px - var(--sidebar-width)) / 3 * 2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -515,7 +544,7 @@
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
.chat-message-user>.chat-message-container>.chat-message-item {
|
||||
.chat-message-user > .chat-message-container > .chat-message-item {
|
||||
background-color: var(--second);
|
||||
|
||||
&:hover {
|
||||
@ -626,7 +655,8 @@
|
||||
min-height: 68px;
|
||||
}
|
||||
|
||||
.chat-input:focus {}
|
||||
.chat-input:focus {
|
||||
}
|
||||
|
||||
.chat-input-send {
|
||||
background-color: var(--primary);
|
||||
@ -694,3 +724,30 @@
|
||||
font-size: 12px;
|
||||
color: var(--black);
|
||||
}
|
||||
|
||||
.chat-main {
|
||||
display: flex;
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
.chat-body-container {
|
||||
height: 100%;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
flex: 1;
|
||||
width: 100%;
|
||||
}
|
||||
.chat-side-panel {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
background: var(--white);
|
||||
overflow: hidden;
|
||||
z-index: 10;
|
||||
transform: translateX(100%);
|
||||
transition: all ease 0.3s;
|
||||
&-show {
|
||||
transform: translateX(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
1
app/components/realtime-chat/index.ts
Normal file
1
app/components/realtime-chat/index.ts
Normal file
@ -0,0 +1 @@
|
||||
export * from "./realtime-chat";
|
74
app/components/realtime-chat/realtime-chat.module.scss
Normal file
74
app/components/realtime-chat/realtime-chat.module.scss
Normal file
@ -0,0 +1,74 @@
|
||||
.realtime-chat {
|
||||
width: 100%;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
position: relative;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
padding: 20px;
|
||||
box-sizing: border-box;
|
||||
.circle-mic {
|
||||
width: 150px;
|
||||
height: 150px;
|
||||
border-radius: 50%;
|
||||
background: linear-gradient(to bottom right, #a0d8ef, #f0f8ff);
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
.icon-center {
|
||||
font-size: 24px;
|
||||
}
|
||||
|
||||
.bottom-icons {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
position: absolute;
|
||||
bottom: 20px;
|
||||
box-sizing: border-box;
|
||||
padding: 0 20px;
|
||||
}
|
||||
|
||||
.icon-left,
|
||||
.icon-right {
|
||||
width: 46px;
|
||||
height: 46px;
|
||||
font-size: 36px;
|
||||
background: var(--second);
|
||||
border-radius: 50%;
|
||||
padding: 2px;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
cursor: pointer;
|
||||
&:hover {
|
||||
opacity: 0.8;
|
||||
}
|
||||
}
|
||||
|
||||
&.mobile {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
||||
.pulse {
|
||||
animation: pulse 1.5s infinite;
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
0% {
|
||||
transform: scale(1);
|
||||
opacity: 0.7;
|
||||
}
|
||||
50% {
|
||||
transform: scale(1.1);
|
||||
opacity: 1;
|
||||
}
|
||||
100% {
|
||||
transform: scale(1);
|
||||
opacity: 0.7;
|
||||
}
|
||||
}
|
359
app/components/realtime-chat/realtime-chat.tsx
Normal file
359
app/components/realtime-chat/realtime-chat.tsx
Normal file
@ -0,0 +1,359 @@
|
||||
import VoiceIcon from "@/app/icons/voice.svg";
|
||||
import VoiceOffIcon from "@/app/icons/voice-off.svg";
|
||||
import PowerIcon from "@/app/icons/power.svg";
|
||||
|
||||
import styles from "./realtime-chat.module.scss";
|
||||
import clsx from "clsx";
|
||||
|
||||
import { useState, useRef, useEffect } from "react";
|
||||
|
||||
import { useChatStore, createMessage, useAppConfig } from "@/app/store";
|
||||
|
||||
import { IconButton } from "@/app/components/button";
|
||||
|
||||
import {
|
||||
Modality,
|
||||
RTClient,
|
||||
RTInputAudioItem,
|
||||
RTResponse,
|
||||
TurnDetection,
|
||||
} from "rt-client";
|
||||
import { AudioHandler } from "@/app/lib/audio";
|
||||
import { uploadImage } from "@/app/utils/chat";
|
||||
import { VoicePrint } from "@/app/components/voice-print";
|
||||
|
||||
interface RealtimeChatProps {
|
||||
onClose?: () => void;
|
||||
onStartVoice?: () => void;
|
||||
onPausedVoice?: () => void;
|
||||
}
|
||||
|
||||
export function RealtimeChat({
|
||||
onClose,
|
||||
onStartVoice,
|
||||
onPausedVoice,
|
||||
}: RealtimeChatProps) {
|
||||
const chatStore = useChatStore();
|
||||
const session = chatStore.currentSession();
|
||||
const config = useAppConfig();
|
||||
const [status, setStatus] = useState("");
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [isConnected, setIsConnected] = useState(false);
|
||||
const [isConnecting, setIsConnecting] = useState(false);
|
||||
const [modality, setModality] = useState("audio");
|
||||
const [useVAD, setUseVAD] = useState(true);
|
||||
const [frequencies, setFrequencies] = useState<Uint8Array | undefined>();
|
||||
|
||||
const clientRef = useRef<RTClient | null>(null);
|
||||
const audioHandlerRef = useRef<AudioHandler | null>(null);
|
||||
const initRef = useRef(false);
|
||||
|
||||
const temperature = config.realtimeConfig.temperature;
|
||||
const apiKey = config.realtimeConfig.apiKey;
|
||||
const model = config.realtimeConfig.model;
|
||||
const azure = config.realtimeConfig.provider === "Azure";
|
||||
const azureEndpoint = config.realtimeConfig.azure.endpoint;
|
||||
const azureDeployment = config.realtimeConfig.azure.deployment;
|
||||
const voice = config.realtimeConfig.voice;
|
||||
|
||||
const handleConnect = async () => {
|
||||
if (isConnecting) return;
|
||||
if (!isConnected) {
|
||||
try {
|
||||
setIsConnecting(true);
|
||||
clientRef.current = azure
|
||||
? new RTClient(
|
||||
new URL(azureEndpoint),
|
||||
{ key: apiKey },
|
||||
{ deployment: azureDeployment },
|
||||
)
|
||||
: new RTClient({ key: apiKey }, { model });
|
||||
const modalities: Modality[] =
|
||||
modality === "audio" ? ["text", "audio"] : ["text"];
|
||||
const turnDetection: TurnDetection = useVAD
|
||||
? { type: "server_vad" }
|
||||
: null;
|
||||
await clientRef.current.configure({
|
||||
instructions: "",
|
||||
voice,
|
||||
input_audio_transcription: { model: "whisper-1" },
|
||||
turn_detection: turnDetection,
|
||||
tools: [],
|
||||
temperature,
|
||||
modalities,
|
||||
});
|
||||
startResponseListener();
|
||||
|
||||
setIsConnected(true);
|
||||
// TODO
|
||||
// try {
|
||||
// const recentMessages = chatStore.getMessagesWithMemory();
|
||||
// for (const message of recentMessages) {
|
||||
// const { role, content } = message;
|
||||
// if (typeof content === "string") {
|
||||
// await clientRef.current.sendItem({
|
||||
// type: "message",
|
||||
// role: role as any,
|
||||
// content: [
|
||||
// {
|
||||
// type: (role === "assistant" ? "text" : "input_text") as any,
|
||||
// text: content as string,
|
||||
// },
|
||||
// ],
|
||||
// });
|
||||
// }
|
||||
// }
|
||||
// // await clientRef.current.generateResponse();
|
||||
// } catch (error) {
|
||||
// console.error("Set message failed:", error);
|
||||
// }
|
||||
} catch (error) {
|
||||
console.error("Connection failed:", error);
|
||||
setStatus("Connection failed");
|
||||
} finally {
|
||||
setIsConnecting(false);
|
||||
}
|
||||
} else {
|
||||
await disconnect();
|
||||
}
|
||||
};
|
||||
|
||||
const disconnect = async () => {
|
||||
if (clientRef.current) {
|
||||
try {
|
||||
await clientRef.current.close();
|
||||
clientRef.current = null;
|
||||
setIsConnected(false);
|
||||
} catch (error) {
|
||||
console.error("Disconnect failed:", error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const startResponseListener = async () => {
|
||||
if (!clientRef.current) return;
|
||||
|
||||
try {
|
||||
for await (const serverEvent of clientRef.current.events()) {
|
||||
if (serverEvent.type === "response") {
|
||||
await handleResponse(serverEvent);
|
||||
} else if (serverEvent.type === "input_audio") {
|
||||
await handleInputAudio(serverEvent);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (clientRef.current) {
|
||||
console.error("Response iteration error:", error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const handleResponse = async (response: RTResponse) => {
|
||||
for await (const item of response) {
|
||||
if (item.type === "message" && item.role === "assistant") {
|
||||
const botMessage = createMessage({
|
||||
role: item.role,
|
||||
content: "",
|
||||
});
|
||||
// add bot message first
|
||||
chatStore.updateTargetSession(session, (session) => {
|
||||
session.messages = session.messages.concat([botMessage]);
|
||||
});
|
||||
let hasAudio = false;
|
||||
for await (const content of item) {
|
||||
if (content.type === "text") {
|
||||
for await (const text of content.textChunks()) {
|
||||
botMessage.content += text;
|
||||
}
|
||||
} else if (content.type === "audio") {
|
||||
const textTask = async () => {
|
||||
for await (const text of content.transcriptChunks()) {
|
||||
botMessage.content += text;
|
||||
}
|
||||
};
|
||||
const audioTask = async () => {
|
||||
audioHandlerRef.current?.startStreamingPlayback();
|
||||
for await (const audio of content.audioChunks()) {
|
||||
hasAudio = true;
|
||||
audioHandlerRef.current?.playChunk(audio);
|
||||
}
|
||||
};
|
||||
await Promise.all([textTask(), audioTask()]);
|
||||
}
|
||||
// update message.content
|
||||
chatStore.updateTargetSession(session, (session) => {
|
||||
session.messages = session.messages.concat();
|
||||
});
|
||||
}
|
||||
if (hasAudio) {
|
||||
// upload audio get audio_url
|
||||
const blob = audioHandlerRef.current?.savePlayFile();
|
||||
uploadImage(blob!).then((audio_url) => {
|
||||
botMessage.audio_url = audio_url;
|
||||
// update text and audio_url
|
||||
chatStore.updateTargetSession(session, (session) => {
|
||||
session.messages = session.messages.concat();
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const handleInputAudio = async (item: RTInputAudioItem) => {
|
||||
await item.waitForCompletion();
|
||||
if (item.transcription) {
|
||||
const userMessage = createMessage({
|
||||
role: "user",
|
||||
content: item.transcription,
|
||||
});
|
||||
chatStore.updateTargetSession(session, (session) => {
|
||||
session.messages = session.messages.concat([userMessage]);
|
||||
});
|
||||
// save input audio_url, and update session
|
||||
const { audioStartMillis, audioEndMillis } = item;
|
||||
// upload audio get audio_url
|
||||
const blob = audioHandlerRef.current?.saveRecordFile(
|
||||
audioStartMillis,
|
||||
audioEndMillis,
|
||||
);
|
||||
uploadImage(blob!).then((audio_url) => {
|
||||
userMessage.audio_url = audio_url;
|
||||
chatStore.updateTargetSession(session, (session) => {
|
||||
session.messages = session.messages.concat();
|
||||
});
|
||||
});
|
||||
}
|
||||
// stop streaming play after get input audio.
|
||||
audioHandlerRef.current?.stopStreamingPlayback();
|
||||
};
|
||||
|
||||
const toggleRecording = async () => {
|
||||
if (!isRecording && clientRef.current) {
|
||||
try {
|
||||
if (!audioHandlerRef.current) {
|
||||
audioHandlerRef.current = new AudioHandler();
|
||||
await audioHandlerRef.current.initialize();
|
||||
}
|
||||
await audioHandlerRef.current.startRecording(async (chunk) => {
|
||||
await clientRef.current?.sendAudio(chunk);
|
||||
});
|
||||
setIsRecording(true);
|
||||
} catch (error) {
|
||||
console.error("Failed to start recording:", error);
|
||||
}
|
||||
} else if (audioHandlerRef.current) {
|
||||
try {
|
||||
audioHandlerRef.current.stopRecording();
|
||||
if (!useVAD) {
|
||||
const inputAudio = await clientRef.current?.commitAudio();
|
||||
await handleInputAudio(inputAudio!);
|
||||
await clientRef.current?.generateResponse();
|
||||
}
|
||||
setIsRecording(false);
|
||||
} catch (error) {
|
||||
console.error("Failed to stop recording:", error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
// 防止重复初始化
|
||||
if (initRef.current) return;
|
||||
initRef.current = true;
|
||||
|
||||
const initAudioHandler = async () => {
|
||||
const handler = new AudioHandler();
|
||||
await handler.initialize();
|
||||
audioHandlerRef.current = handler;
|
||||
await handleConnect();
|
||||
await toggleRecording();
|
||||
};
|
||||
|
||||
initAudioHandler().catch((error) => {
|
||||
setStatus(error);
|
||||
console.error(error);
|
||||
});
|
||||
|
||||
return () => {
|
||||
if (isRecording) {
|
||||
toggleRecording();
|
||||
}
|
||||
audioHandlerRef.current?.close().catch(console.error);
|
||||
disconnect();
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
let animationFrameId: number;
|
||||
|
||||
if (isConnected && isRecording) {
|
||||
const animationFrame = () => {
|
||||
if (audioHandlerRef.current) {
|
||||
const freqData = audioHandlerRef.current.getByteFrequencyData();
|
||||
setFrequencies(freqData);
|
||||
}
|
||||
animationFrameId = requestAnimationFrame(animationFrame);
|
||||
};
|
||||
|
||||
animationFrameId = requestAnimationFrame(animationFrame);
|
||||
} else {
|
||||
setFrequencies(undefined);
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (animationFrameId) {
|
||||
cancelAnimationFrame(animationFrameId);
|
||||
}
|
||||
};
|
||||
}, [isConnected, isRecording]);
|
||||
|
||||
// update session params
|
||||
useEffect(() => {
|
||||
clientRef.current?.configure({ voice });
|
||||
}, [voice]);
|
||||
useEffect(() => {
|
||||
clientRef.current?.configure({ temperature });
|
||||
}, [temperature]);
|
||||
|
||||
const handleClose = async () => {
|
||||
onClose?.();
|
||||
if (isRecording) {
|
||||
await toggleRecording();
|
||||
}
|
||||
disconnect().catch(console.error);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={styles["realtime-chat"]}>
|
||||
<div
|
||||
className={clsx(styles["circle-mic"], {
|
||||
[styles["pulse"]]: isRecording,
|
||||
})}
|
||||
>
|
||||
<VoicePrint frequencies={frequencies} isActive={isRecording} />
|
||||
</div>
|
||||
|
||||
<div className={styles["bottom-icons"]}>
|
||||
<div>
|
||||
<IconButton
|
||||
icon={isRecording ? <VoiceIcon /> : <VoiceOffIcon />}
|
||||
onClick={toggleRecording}
|
||||
disabled={!isConnected}
|
||||
shadow
|
||||
bordered
|
||||
/>
|
||||
</div>
|
||||
<div className={styles["icon-center"]}>{status}</div>
|
||||
<div>
|
||||
<IconButton
|
||||
icon={<PowerIcon />}
|
||||
onClick={handleClose}
|
||||
shadow
|
||||
bordered
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
173
app/components/realtime-chat/realtime-config.tsx
Normal file
173
app/components/realtime-chat/realtime-config.tsx
Normal file
@ -0,0 +1,173 @@
|
||||
import { RealtimeConfig } from "@/app/store";
|
||||
|
||||
import Locale from "@/app/locales";
|
||||
import { ListItem, Select, PasswordInput } from "@/app/components/ui-lib";
|
||||
|
||||
import { InputRange } from "@/app/components/input-range";
|
||||
import { Voice } from "rt-client";
|
||||
import { ServiceProvider } from "@/app/constant";
|
||||
|
||||
const providers = [ServiceProvider.OpenAI, ServiceProvider.Azure];
|
||||
|
||||
const models = ["gpt-4o-realtime-preview-2024-10-01"];
|
||||
|
||||
const voice = ["alloy", "shimmer", "echo"];
|
||||
|
||||
export function RealtimeConfigList(props: {
|
||||
realtimeConfig: RealtimeConfig;
|
||||
updateConfig: (updater: (config: RealtimeConfig) => void) => void;
|
||||
}) {
|
||||
const azureConfigComponent = props.realtimeConfig.provider ===
|
||||
ServiceProvider.Azure && (
|
||||
<>
|
||||
<ListItem
|
||||
title={Locale.Settings.Realtime.Azure.Endpoint.Title}
|
||||
subTitle={Locale.Settings.Realtime.Azure.Endpoint.SubTitle}
|
||||
>
|
||||
<input
|
||||
value={props.realtimeConfig?.azure?.endpoint}
|
||||
type="text"
|
||||
placeholder={Locale.Settings.Realtime.Azure.Endpoint.Title}
|
||||
onChange={(e) => {
|
||||
props.updateConfig(
|
||||
(config) => (config.azure.endpoint = e.currentTarget.value),
|
||||
);
|
||||
}}
|
||||
/>
|
||||
</ListItem>
|
||||
<ListItem
|
||||
title={Locale.Settings.Realtime.Azure.Deployment.Title}
|
||||
subTitle={Locale.Settings.Realtime.Azure.Deployment.SubTitle}
|
||||
>
|
||||
<input
|
||||
value={props.realtimeConfig?.azure?.deployment}
|
||||
type="text"
|
||||
placeholder={Locale.Settings.Realtime.Azure.Deployment.Title}
|
||||
onChange={(e) => {
|
||||
props.updateConfig(
|
||||
(config) => (config.azure.deployment = e.currentTarget.value),
|
||||
);
|
||||
}}
|
||||
/>
|
||||
</ListItem>
|
||||
</>
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
<ListItem
|
||||
title={Locale.Settings.Realtime.Enable.Title}
|
||||
subTitle={Locale.Settings.Realtime.Enable.SubTitle}
|
||||
>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={props.realtimeConfig.enable}
|
||||
onChange={(e) =>
|
||||
props.updateConfig(
|
||||
(config) => (config.enable = e.currentTarget.checked),
|
||||
)
|
||||
}
|
||||
></input>
|
||||
</ListItem>
|
||||
|
||||
{props.realtimeConfig.enable && (
|
||||
<>
|
||||
<ListItem
|
||||
title={Locale.Settings.Realtime.Provider.Title}
|
||||
subTitle={Locale.Settings.Realtime.Provider.SubTitle}
|
||||
>
|
||||
<Select
|
||||
aria-label={Locale.Settings.Realtime.Provider.Title}
|
||||
value={props.realtimeConfig.provider}
|
||||
onChange={(e) => {
|
||||
props.updateConfig(
|
||||
(config) =>
|
||||
(config.provider = e.target.value as ServiceProvider),
|
||||
);
|
||||
}}
|
||||
>
|
||||
{providers.map((v, i) => (
|
||||
<option value={v} key={i}>
|
||||
{v}
|
||||
</option>
|
||||
))}
|
||||
</Select>
|
||||
</ListItem>
|
||||
<ListItem
|
||||
title={Locale.Settings.Realtime.Model.Title}
|
||||
subTitle={Locale.Settings.Realtime.Model.SubTitle}
|
||||
>
|
||||
<Select
|
||||
aria-label={Locale.Settings.Realtime.Model.Title}
|
||||
value={props.realtimeConfig.model}
|
||||
onChange={(e) => {
|
||||
props.updateConfig((config) => (config.model = e.target.value));
|
||||
}}
|
||||
>
|
||||
{models.map((v, i) => (
|
||||
<option value={v} key={i}>
|
||||
{v}
|
||||
</option>
|
||||
))}
|
||||
</Select>
|
||||
</ListItem>
|
||||
<ListItem
|
||||
title={Locale.Settings.Realtime.ApiKey.Title}
|
||||
subTitle={Locale.Settings.Realtime.ApiKey.SubTitle}
|
||||
>
|
||||
<PasswordInput
|
||||
aria={Locale.Settings.ShowPassword}
|
||||
aria-label={Locale.Settings.Realtime.ApiKey.Title}
|
||||
value={props.realtimeConfig.apiKey}
|
||||
type="text"
|
||||
placeholder={Locale.Settings.Realtime.ApiKey.Placeholder}
|
||||
onChange={(e) => {
|
||||
props.updateConfig(
|
||||
(config) => (config.apiKey = e.currentTarget.value),
|
||||
);
|
||||
}}
|
||||
/>
|
||||
</ListItem>
|
||||
{azureConfigComponent}
|
||||
<ListItem
|
||||
title={Locale.Settings.TTS.Voice.Title}
|
||||
subTitle={Locale.Settings.TTS.Voice.SubTitle}
|
||||
>
|
||||
<Select
|
||||
value={props.realtimeConfig.voice}
|
||||
onChange={(e) => {
|
||||
props.updateConfig(
|
||||
(config) => (config.voice = e.currentTarget.value as Voice),
|
||||
);
|
||||
}}
|
||||
>
|
||||
{voice.map((v, i) => (
|
||||
<option value={v} key={i}>
|
||||
{v}
|
||||
</option>
|
||||
))}
|
||||
</Select>
|
||||
</ListItem>
|
||||
<ListItem
|
||||
title={Locale.Settings.Realtime.Temperature.Title}
|
||||
subTitle={Locale.Settings.Realtime.Temperature.SubTitle}
|
||||
>
|
||||
<InputRange
|
||||
aria={Locale.Settings.Temperature.Title}
|
||||
value={props.realtimeConfig?.temperature?.toFixed(1)}
|
||||
min="0.6"
|
||||
max="1"
|
||||
step="0.1"
|
||||
onChange={(e) => {
|
||||
props.updateConfig(
|
||||
(config) =>
|
||||
(config.temperature = e.currentTarget.valueAsNumber),
|
||||
);
|
||||
}}
|
||||
></InputRange>
|
||||
</ListItem>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
@ -85,6 +85,7 @@ import { nanoid } from "nanoid";
|
||||
import { useMaskStore } from "../store/mask";
|
||||
import { ProviderType } from "../utils/cloud";
|
||||
import { TTSConfigList } from "./tts-config";
|
||||
import { RealtimeConfigList } from "./realtime-chat/realtime-config";
|
||||
|
||||
function EditPromptModal(props: { id: string; onClose: () => void }) {
|
||||
const promptStore = usePromptStore();
|
||||
@ -1799,7 +1800,18 @@ export function Settings() {
|
||||
{shouldShowPromptModal && (
|
||||
<UserPromptModal onClose={() => setShowPromptModal(false)} />
|
||||
)}
|
||||
|
||||
<List>
|
||||
<RealtimeConfigList
|
||||
realtimeConfig={config.realtimeConfig}
|
||||
updateConfig={(updater) => {
|
||||
const realtimeConfig = { ...config.realtimeConfig };
|
||||
updater(realtimeConfig);
|
||||
config.update(
|
||||
(config) => (config.realtimeConfig = realtimeConfig),
|
||||
);
|
||||
}}
|
||||
/>
|
||||
</List>
|
||||
<List>
|
||||
<TTSConfigList
|
||||
ttsConfig={config.ttsConfig}
|
||||
|
1
app/components/voice-print/index.ts
Normal file
1
app/components/voice-print/index.ts
Normal file
@ -0,0 +1 @@
|
||||
export * from "./voice-print";
|
11
app/components/voice-print/voice-print.module.scss
Normal file
11
app/components/voice-print/voice-print.module.scss
Normal file
@ -0,0 +1,11 @@
|
||||
.voice-print {
|
||||
width: 100%;
|
||||
height: 60px;
|
||||
margin: 20px 0;
|
||||
|
||||
canvas {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
filter: brightness(1.2); // 增加整体亮度
|
||||
}
|
||||
}
|
180
app/components/voice-print/voice-print.tsx
Normal file
180
app/components/voice-print/voice-print.tsx
Normal file
@ -0,0 +1,180 @@
|
||||
import { useEffect, useRef, useCallback } from "react";
|
||||
import styles from "./voice-print.module.scss";
|
||||
|
||||
interface VoicePrintProps {
|
||||
frequencies?: Uint8Array;
|
||||
isActive?: boolean;
|
||||
}
|
||||
|
||||
export function VoicePrint({ frequencies, isActive }: VoicePrintProps) {
|
||||
// Canvas引用,用于获取绘图上下文
|
||||
const canvasRef = useRef<HTMLCanvasElement>(null);
|
||||
// 存储历史频率数据,用于平滑处理
|
||||
const historyRef = useRef<number[][]>([]);
|
||||
// 控制保留的历史数据帧数,影响平滑度
|
||||
const historyLengthRef = useRef(10);
|
||||
// 存储动画帧ID,用于清理
|
||||
const animationFrameRef = useRef<number>();
|
||||
|
||||
/**
|
||||
* 更新频率历史数据
|
||||
* 使用FIFO队列维护固定长度的历史记录
|
||||
*/
|
||||
const updateHistory = useCallback((freqArray: number[]) => {
|
||||
historyRef.current.push(freqArray);
|
||||
if (historyRef.current.length > historyLengthRef.current) {
|
||||
historyRef.current.shift();
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
const canvas = canvasRef.current;
|
||||
if (!canvas) return;
|
||||
|
||||
const ctx = canvas.getContext("2d");
|
||||
if (!ctx) return;
|
||||
|
||||
/**
|
||||
* 处理高DPI屏幕显示
|
||||
* 根据设备像素比例调整canvas实际渲染分辨率
|
||||
*/
|
||||
const dpr = window.devicePixelRatio || 1;
|
||||
canvas.width = canvas.offsetWidth * dpr;
|
||||
canvas.height = canvas.offsetHeight * dpr;
|
||||
ctx.scale(dpr, dpr);
|
||||
|
||||
/**
|
||||
* 主要绘制函数
|
||||
* 使用requestAnimationFrame实现平滑动画
|
||||
* 包含以下步骤:
|
||||
* 1. 清空画布
|
||||
* 2. 更新历史数据
|
||||
* 3. 计算波形点
|
||||
* 4. 绘制上下对称的声纹
|
||||
*/
|
||||
const draw = () => {
|
||||
// 清空画布
|
||||
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
||||
|
||||
if (!frequencies || !isActive) {
|
||||
historyRef.current = [];
|
||||
return;
|
||||
}
|
||||
|
||||
const freqArray = Array.from(frequencies);
|
||||
updateHistory(freqArray);
|
||||
|
||||
// 绘制声纹
|
||||
const points: [number, number][] = [];
|
||||
const centerY = canvas.height / 2;
|
||||
const width = canvas.width;
|
||||
const sliceWidth = width / (frequencies.length - 1);
|
||||
|
||||
// 绘制主波形
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(0, centerY);
|
||||
|
||||
/**
|
||||
* 声纹绘制算法:
|
||||
* 1. 使用历史数据平均值实现平滑过渡
|
||||
* 2. 通过正弦函数添加自然波动
|
||||
* 3. 使用贝塞尔曲线连接点,使曲线更平滑
|
||||
* 4. 绘制对称部分形成完整声纹
|
||||
*/
|
||||
for (let i = 0; i < frequencies.length; i++) {
|
||||
const x = i * sliceWidth;
|
||||
let avgFrequency = frequencies[i];
|
||||
|
||||
/**
|
||||
* 波形平滑处理:
|
||||
* 1. 收集历史数据中对应位置的频率值
|
||||
* 2. 计算当前值与历史值的加权平均
|
||||
* 3. 根据平均值计算实际显示高度
|
||||
*/
|
||||
if (historyRef.current.length > 0) {
|
||||
const historicalValues = historyRef.current.map((h) => h[i] || 0);
|
||||
avgFrequency =
|
||||
(avgFrequency + historicalValues.reduce((a, b) => a + b, 0)) /
|
||||
(historyRef.current.length + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* 波形变换:
|
||||
* 1. 归一化频率值到0-1范围
|
||||
* 2. 添加时间相关的正弦变换
|
||||
* 3. 使用贝塞尔曲线平滑连接点
|
||||
*/
|
||||
const normalized = avgFrequency / 255.0;
|
||||
const height = normalized * (canvas.height / 2);
|
||||
const y = centerY + height * Math.sin(i * 0.2 + Date.now() * 0.002);
|
||||
|
||||
points.push([x, y]);
|
||||
|
||||
if (i === 0) {
|
||||
ctx.moveTo(x, y);
|
||||
} else {
|
||||
// 使用贝塞尔曲线使波形更平滑
|
||||
const prevPoint = points[i - 1];
|
||||
const midX = (prevPoint[0] + x) / 2;
|
||||
ctx.quadraticCurveTo(
|
||||
prevPoint[0],
|
||||
prevPoint[1],
|
||||
midX,
|
||||
(prevPoint[1] + y) / 2,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// 绘制对称的下半部分
|
||||
for (let i = points.length - 1; i >= 0; i--) {
|
||||
const [x, y] = points[i];
|
||||
const symmetricY = centerY - (y - centerY);
|
||||
if (i === points.length - 1) {
|
||||
ctx.lineTo(x, symmetricY);
|
||||
} else {
|
||||
const nextPoint = points[i + 1];
|
||||
const midX = (nextPoint[0] + x) / 2;
|
||||
ctx.quadraticCurveTo(
|
||||
nextPoint[0],
|
||||
centerY - (nextPoint[1] - centerY),
|
||||
midX,
|
||||
centerY - ((nextPoint[1] + y) / 2 - centerY),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.closePath();
|
||||
|
||||
/**
|
||||
* 渐变效果:
|
||||
* 从左到右应用三色渐变,带透明度
|
||||
* 使用蓝色系配色提升视觉效果
|
||||
*/
|
||||
const gradient = ctx.createLinearGradient(0, 0, canvas.width, 0);
|
||||
gradient.addColorStop(0, "rgba(100, 180, 255, 0.95)");
|
||||
gradient.addColorStop(0.5, "rgba(140, 200, 255, 0.9)");
|
||||
gradient.addColorStop(1, "rgba(180, 220, 255, 0.95)");
|
||||
|
||||
ctx.fillStyle = gradient;
|
||||
ctx.fill();
|
||||
|
||||
animationFrameRef.current = requestAnimationFrame(draw);
|
||||
};
|
||||
|
||||
// 启动动画循环
|
||||
draw();
|
||||
|
||||
// 清理函数:在组件卸载时取消动画
|
||||
return () => {
|
||||
if (animationFrameRef.current) {
|
||||
cancelAnimationFrame(animationFrameRef.current);
|
||||
}
|
||||
};
|
||||
}, [frequencies, isActive, updateHistory]);
|
||||
|
||||
return (
|
||||
<div className={styles["voice-print"]}>
|
||||
<canvas ref={canvasRef} />
|
||||
</div>
|
||||
);
|
||||
}
|
11
app/icons/headphone.svg
Normal file
11
app/icons/headphone.svg
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg width="16" height="16" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M4 28C4 26.8954 4.89543 26 6 26H10V38H6C4.89543 38 4 37.1046 4 36V28Z" fill="none" />
|
||||
<path d="M38 26H42C43.1046 26 44 26.8954 44 28V36C44 37.1046 43.1046 38 42 38H38V26Z"
|
||||
fill="none" />
|
||||
<path
|
||||
d="M10 36V24C10 16.268 16.268 10 24 10C31.732 10 38 16.268 38 24V36M10 26H6C4.89543 26 4 26.8954 4 28V36C4 37.1046 4.89543 38 6 38H10V26ZM38 26H42C43.1046 26 44 26.8954 44 28V36C44 37.1046 43.1046 38 42 38H38V26Z"
|
||||
stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
|
||||
<path d="M16 32H20L22 26L26 38L28 32H32" stroke="#333" stroke-width="4" stroke-linecap="round"
|
||||
stroke-linejoin="round" />
|
||||
</svg>
|
After Width: | Height: | Size: 808 B |
7
app/icons/power.svg
Normal file
7
app/icons/power.svg
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path
|
||||
d="M14.5 8C13.8406 8.37652 13.2062 8.79103 12.6 9.24051C11.5625 10.0097 10.6074 10.8814 9.75 11.8402C6.79377 15.1463 5 19.4891 5 24.2455C5 34.6033 13.5066 43 24 43C34.4934 43 43 34.6033 43 24.2455C43 19.4891 41.2062 15.1463 38.25 11.8402C37.3926 10.8814 36.4375 10.0097 35.4 9.24051C34.7938 8.79103 34.1594 8.37652 33.5 8"
|
||||
stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
|
||||
<path d="M24 4V24" stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
|
||||
</svg>
|
After Width: | Height: | Size: 675 B |
13
app/icons/voice-off.svg
Normal file
13
app/icons/voice-off.svg
Normal file
@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path
|
||||
d="M31 24V11C31 7.13401 27.866 4 24 4C20.134 4 17 7.13401 17 11V24C17 27.866 20.134 31 24 31C27.866 31 31 27.866 31 24Z"
|
||||
stroke="#d0021b" stroke-width="4" stroke-linejoin="round" />
|
||||
<path
|
||||
d="M9 23C9 31.2843 15.7157 38 24 38C25.7532 38 27.4361 37.6992 29 37.1465M39 23C39 25.1333 38.5547 27.1626 37.7519 29"
|
||||
stroke="#d0021b" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
|
||||
<path d="M24 38V44" stroke="#d0021b" stroke-width="4" stroke-linecap="round"
|
||||
stroke-linejoin="round" />
|
||||
<path d="M42 42L6 6" stroke="#d0021b" stroke-width="4" stroke-linecap="round"
|
||||
stroke-linejoin="round" />
|
||||
</svg>
|
After Width: | Height: | Size: 811 B |
9
app/icons/voice.svg
Normal file
9
app/icons/voice.svg
Normal file
@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect x="17" y="4" width="14" height="27" rx="7" fill="none" stroke="#333" stroke-width="4"
|
||||
stroke-linejoin="round" />
|
||||
<path d="M9 23C9 31.2843 15.7157 38 24 38C32.2843 38 39 31.2843 39 23" stroke="#333"
|
||||
stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
|
||||
<path d="M24 38V44" stroke="#333" stroke-width="4" stroke-linecap="round"
|
||||
stroke-linejoin="round" />
|
||||
</svg>
|
After Width: | Height: | Size: 549 B |
200
app/lib/audio.ts
Normal file
200
app/lib/audio.ts
Normal file
@ -0,0 +1,200 @@
|
||||
export class AudioHandler {
|
||||
private context: AudioContext;
|
||||
private mergeNode: ChannelMergerNode;
|
||||
private analyserData: Uint8Array;
|
||||
public analyser: AnalyserNode;
|
||||
private workletNode: AudioWorkletNode | null = null;
|
||||
private stream: MediaStream | null = null;
|
||||
private source: MediaStreamAudioSourceNode | null = null;
|
||||
private recordBuffer: Int16Array[] = [];
|
||||
private readonly sampleRate = 24000;
|
||||
|
||||
private nextPlayTime: number = 0;
|
||||
private isPlaying: boolean = false;
|
||||
private playbackQueue: AudioBufferSourceNode[] = [];
|
||||
private playBuffer: Int16Array[] = [];
|
||||
|
||||
constructor() {
|
||||
this.context = new AudioContext({ sampleRate: this.sampleRate });
|
||||
// using ChannelMergerNode to get merged audio data, and then get analyser data.
|
||||
this.mergeNode = new ChannelMergerNode(this.context, { numberOfInputs: 2 });
|
||||
this.analyser = new AnalyserNode(this.context, { fftSize: 256 });
|
||||
this.analyserData = new Uint8Array(this.analyser.frequencyBinCount);
|
||||
this.mergeNode.connect(this.analyser);
|
||||
}
|
||||
|
||||
getByteFrequencyData() {
|
||||
this.analyser.getByteFrequencyData(this.analyserData);
|
||||
return this.analyserData;
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
await this.context.audioWorklet.addModule("/audio-processor.js");
|
||||
}
|
||||
|
||||
async startRecording(onChunk: (chunk: Uint8Array) => void) {
|
||||
try {
|
||||
if (!this.workletNode) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
this.stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
sampleRate: this.sampleRate,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
},
|
||||
});
|
||||
|
||||
await this.context.resume();
|
||||
this.source = this.context.createMediaStreamSource(this.stream);
|
||||
this.workletNode = new AudioWorkletNode(
|
||||
this.context,
|
||||
"audio-recorder-processor",
|
||||
);
|
||||
|
||||
this.workletNode.port.onmessage = (event) => {
|
||||
if (event.data.eventType === "audio") {
|
||||
const float32Data = event.data.audioData;
|
||||
const int16Data = new Int16Array(float32Data.length);
|
||||
|
||||
for (let i = 0; i < float32Data.length; i++) {
|
||||
const s = Math.max(-1, Math.min(1, float32Data[i]));
|
||||
int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
|
||||
}
|
||||
|
||||
const uint8Data = new Uint8Array(int16Data.buffer);
|
||||
onChunk(uint8Data);
|
||||
// save recordBuffer
|
||||
// @ts-ignore
|
||||
this.recordBuffer.push.apply(this.recordBuffer, int16Data);
|
||||
}
|
||||
};
|
||||
|
||||
this.source.connect(this.workletNode);
|
||||
this.source.connect(this.mergeNode, 0, 0);
|
||||
this.workletNode.connect(this.context.destination);
|
||||
|
||||
this.workletNode.port.postMessage({ command: "START_RECORDING" });
|
||||
} catch (error) {
|
||||
console.error("Error starting recording:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
stopRecording() {
|
||||
if (!this.workletNode || !this.source || !this.stream) {
|
||||
throw new Error("Recording not started");
|
||||
}
|
||||
|
||||
this.workletNode.port.postMessage({ command: "STOP_RECORDING" });
|
||||
|
||||
this.workletNode.disconnect();
|
||||
this.source.disconnect();
|
||||
this.stream.getTracks().forEach((track) => track.stop());
|
||||
}
|
||||
startStreamingPlayback() {
|
||||
this.isPlaying = true;
|
||||
this.nextPlayTime = this.context.currentTime;
|
||||
}
|
||||
|
||||
stopStreamingPlayback() {
|
||||
this.isPlaying = false;
|
||||
this.playbackQueue.forEach((source) => source.stop());
|
||||
this.playbackQueue = [];
|
||||
this.playBuffer = [];
|
||||
}
|
||||
|
||||
playChunk(chunk: Uint8Array) {
|
||||
if (!this.isPlaying) return;
|
||||
|
||||
const int16Data = new Int16Array(chunk.buffer);
|
||||
// @ts-ignore
|
||||
this.playBuffer.push.apply(this.playBuffer, int16Data); // save playBuffer
|
||||
|
||||
const float32Data = new Float32Array(int16Data.length);
|
||||
for (let i = 0; i < int16Data.length; i++) {
|
||||
float32Data[i] = int16Data[i] / (int16Data[i] < 0 ? 0x8000 : 0x7fff);
|
||||
}
|
||||
|
||||
const audioBuffer = this.context.createBuffer(
|
||||
1,
|
||||
float32Data.length,
|
||||
this.sampleRate,
|
||||
);
|
||||
audioBuffer.getChannelData(0).set(float32Data);
|
||||
|
||||
const source = this.context.createBufferSource();
|
||||
source.buffer = audioBuffer;
|
||||
source.connect(this.context.destination);
|
||||
source.connect(this.mergeNode, 0, 1);
|
||||
|
||||
const chunkDuration = audioBuffer.length / this.sampleRate;
|
||||
|
||||
source.start(this.nextPlayTime);
|
||||
|
||||
this.playbackQueue.push(source);
|
||||
source.onended = () => {
|
||||
const index = this.playbackQueue.indexOf(source);
|
||||
if (index > -1) {
|
||||
this.playbackQueue.splice(index, 1);
|
||||
}
|
||||
};
|
||||
|
||||
this.nextPlayTime += chunkDuration;
|
||||
|
||||
if (this.nextPlayTime < this.context.currentTime) {
|
||||
this.nextPlayTime = this.context.currentTime;
|
||||
}
|
||||
}
|
||||
_saveData(data: Int16Array, bytesPerSample = 16): Blob {
|
||||
const headerLength = 44;
|
||||
const numberOfChannels = 1;
|
||||
const byteLength = data.buffer.byteLength;
|
||||
const header = new Uint8Array(headerLength);
|
||||
const view = new DataView(header.buffer);
|
||||
view.setUint32(0, 1380533830, false); // RIFF identifier 'RIFF'
|
||||
view.setUint32(4, 36 + byteLength, true); // file length minus RIFF identifier length and file description length
|
||||
view.setUint32(8, 1463899717, false); // RIFF type 'WAVE'
|
||||
view.setUint32(12, 1718449184, false); // format chunk identifier 'fmt '
|
||||
view.setUint32(16, 16, true); // format chunk length
|
||||
view.setUint16(20, 1, true); // sample format (raw)
|
||||
view.setUint16(22, numberOfChannels, true); // channel count
|
||||
view.setUint32(24, this.sampleRate, true); // sample rate
|
||||
view.setUint32(28, this.sampleRate * 4, true); // byte rate (sample rate * block align)
|
||||
view.setUint16(32, numberOfChannels * 2, true); // block align (channel count * bytes per sample)
|
||||
view.setUint16(34, bytesPerSample, true); // bits per sample
|
||||
view.setUint32(36, 1684108385, false); // data chunk identifier 'data'
|
||||
view.setUint32(40, byteLength, true); // data chunk length
|
||||
|
||||
// using data.buffer, so no need to setUint16 to view.
|
||||
return new Blob([view, data.buffer], { type: "audio/mpeg" });
|
||||
}
|
||||
savePlayFile() {
|
||||
// @ts-ignore
|
||||
return this._saveData(new Int16Array(this.playBuffer));
|
||||
}
|
||||
saveRecordFile(
|
||||
audioStartMillis: number | undefined,
|
||||
audioEndMillis: number | undefined,
|
||||
) {
|
||||
const startIndex = audioStartMillis
|
||||
? Math.floor((audioStartMillis * this.sampleRate) / 1000)
|
||||
: 0;
|
||||
const endIndex = audioEndMillis
|
||||
? Math.floor((audioEndMillis * this.sampleRate) / 1000)
|
||||
: this.recordBuffer.length;
|
||||
return this._saveData(
|
||||
// @ts-ignore
|
||||
new Int16Array(this.recordBuffer.slice(startIndex, endIndex)),
|
||||
);
|
||||
}
|
||||
async close() {
|
||||
this.recordBuffer = [];
|
||||
this.workletNode?.disconnect();
|
||||
this.source?.disconnect();
|
||||
this.stream?.getTracks().forEach((track) => track.stop());
|
||||
await this.context.close();
|
||||
}
|
||||
}
|
@ -562,6 +562,39 @@ const cn = {
|
||||
SubTitle: "生成语音的速度",
|
||||
},
|
||||
},
|
||||
Realtime: {
|
||||
Enable: {
|
||||
Title: "实时聊天",
|
||||
SubTitle: "开启实时聊天功能",
|
||||
},
|
||||
Provider: {
|
||||
Title: "模型服务商",
|
||||
SubTitle: "切换不同的服务商",
|
||||
},
|
||||
Model: {
|
||||
Title: "模型",
|
||||
SubTitle: "选择一个模型",
|
||||
},
|
||||
ApiKey: {
|
||||
Title: "API Key",
|
||||
SubTitle: "API Key",
|
||||
Placeholder: "API Key",
|
||||
},
|
||||
Azure: {
|
||||
Endpoint: {
|
||||
Title: "接口地址",
|
||||
SubTitle: "接口地址",
|
||||
},
|
||||
Deployment: {
|
||||
Title: "部署名称",
|
||||
SubTitle: "部署名称",
|
||||
},
|
||||
},
|
||||
Temperature: {
|
||||
Title: "随机性 (temperature)",
|
||||
SubTitle: "值越大,回复越随机",
|
||||
},
|
||||
},
|
||||
},
|
||||
Store: {
|
||||
DefaultTopic: "新的聊天",
|
||||
|
@ -570,6 +570,39 @@ const en: LocaleType = {
|
||||
},
|
||||
Engine: "TTS Engine",
|
||||
},
|
||||
Realtime: {
|
||||
Enable: {
|
||||
Title: "Realtime Chat",
|
||||
SubTitle: "Enable realtime chat feature",
|
||||
},
|
||||
Provider: {
|
||||
Title: "Model Provider",
|
||||
SubTitle: "Switch between different providers",
|
||||
},
|
||||
Model: {
|
||||
Title: "Model",
|
||||
SubTitle: "Select a model",
|
||||
},
|
||||
ApiKey: {
|
||||
Title: "API Key",
|
||||
SubTitle: "API Key",
|
||||
Placeholder: "API Key",
|
||||
},
|
||||
Azure: {
|
||||
Endpoint: {
|
||||
Title: "Endpoint",
|
||||
SubTitle: "Endpoint",
|
||||
},
|
||||
Deployment: {
|
||||
Title: "Deployment Name",
|
||||
SubTitle: "Deployment Name",
|
||||
},
|
||||
},
|
||||
Temperature: {
|
||||
Title: "Randomness (temperature)",
|
||||
SubTitle: "Higher values result in more random responses",
|
||||
},
|
||||
},
|
||||
},
|
||||
Store: {
|
||||
DefaultTopic: "New Conversation",
|
||||
|
@ -52,6 +52,7 @@ export type ChatMessage = RequestMessage & {
|
||||
id: string;
|
||||
model?: ModelType;
|
||||
tools?: ChatMessageTool[];
|
||||
audio_url?: string;
|
||||
};
|
||||
|
||||
export function createMessage(override: Partial<ChatMessage>): ChatMessage {
|
||||
|
@ -15,6 +15,7 @@ import {
|
||||
ServiceProvider,
|
||||
} from "../constant";
|
||||
import { createPersistStore } from "../utils/store";
|
||||
import type { Voice } from "rt-client";
|
||||
|
||||
export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
|
||||
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
|
||||
@ -90,12 +91,26 @@ export const DEFAULT_CONFIG = {
|
||||
voice: DEFAULT_TTS_VOICE,
|
||||
speed: 1.0,
|
||||
},
|
||||
|
||||
realtimeConfig: {
|
||||
enable: false,
|
||||
provider: "OpenAI" as ServiceProvider,
|
||||
model: "gpt-4o-realtime-preview-2024-10-01",
|
||||
apiKey: "",
|
||||
azure: {
|
||||
endpoint: "",
|
||||
deployment: "",
|
||||
},
|
||||
temperature: 0.9,
|
||||
voice: "alloy" as Voice,
|
||||
},
|
||||
};
|
||||
|
||||
export type ChatConfig = typeof DEFAULT_CONFIG;
|
||||
|
||||
export type ModelConfig = ChatConfig["modelConfig"];
|
||||
export type TTSConfig = ChatConfig["ttsConfig"];
|
||||
export type RealtimeConfig = ChatConfig["realtimeConfig"];
|
||||
|
||||
export function limitNumber(
|
||||
x: number,
|
||||
|
@ -138,7 +138,7 @@ export function uploadImage(file: Blob): Promise<string> {
|
||||
})
|
||||
.then((res) => res.json())
|
||||
.then((res) => {
|
||||
console.log("res", res);
|
||||
// console.log("res", res);
|
||||
if (res?.code == 0 && res?.data) {
|
||||
return res?.data;
|
||||
}
|
||||
|
@ -52,7 +52,8 @@
|
||||
"sass": "^1.59.2",
|
||||
"spark-md5": "^3.0.2",
|
||||
"use-debounce": "^9.0.4",
|
||||
"zustand": "^4.3.8"
|
||||
"zustand": "^4.3.8",
|
||||
"rt-client": "https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tauri-apps/api": "^1.6.0",
|
||||
|
48
public/audio-processor.js
Normal file
48
public/audio-processor.js
Normal file
@ -0,0 +1,48 @@
|
||||
// @ts-nocheck
|
||||
class AudioRecorderProcessor extends AudioWorkletProcessor {
|
||||
constructor() {
|
||||
super();
|
||||
this.isRecording = false;
|
||||
this.bufferSize = 2400; // 100ms at 24kHz
|
||||
this.currentBuffer = [];
|
||||
|
||||
this.port.onmessage = (event) => {
|
||||
if (event.data.command === "START_RECORDING") {
|
||||
this.isRecording = true;
|
||||
} else if (event.data.command === "STOP_RECORDING") {
|
||||
this.isRecording = false;
|
||||
|
||||
if (this.currentBuffer.length > 0) {
|
||||
this.sendBuffer();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
sendBuffer() {
|
||||
if (this.currentBuffer.length > 0) {
|
||||
const audioData = new Float32Array(this.currentBuffer);
|
||||
this.port.postMessage({
|
||||
eventType: "audio",
|
||||
audioData: audioData,
|
||||
});
|
||||
this.currentBuffer = [];
|
||||
}
|
||||
}
|
||||
|
||||
process(inputs) {
|
||||
const input = inputs[0];
|
||||
if (input.length > 0 && this.isRecording) {
|
||||
const audioData = input[0];
|
||||
|
||||
this.currentBuffer.push(...audioData);
|
||||
|
||||
if (this.currentBuffer.length >= this.bufferSize) {
|
||||
this.sendBuffer();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor("audio-recorder-processor", AudioRecorderProcessor);
|
10
yarn.lock
10
yarn.lock
@ -7455,6 +7455,12 @@ robust-predicates@^3.0.0:
|
||||
resolved "https://registry.npmmirror.com/robust-predicates/-/robust-predicates-3.0.1.tgz#ecde075044f7f30118682bd9fb3f123109577f9a"
|
||||
integrity sha512-ndEIpszUHiG4HtDsQLeIuMvRsDnn8c8rYStabochtUeCvfuvNptb5TUbVD68LRAILPX7p9nqQGh4xJgn3EHS/g==
|
||||
|
||||
"rt-client@https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz":
|
||||
version "0.5.0"
|
||||
resolved "https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz#abf2e9a850201e3571b8d36830f77bc52af3de9b"
|
||||
dependencies:
|
||||
ws "^8.18.0"
|
||||
|
||||
run-parallel@^1.1.9:
|
||||
version "1.2.0"
|
||||
resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee"
|
||||
@ -8498,9 +8504,9 @@ write-file-atomic@^4.0.2:
|
||||
imurmurhash "^0.1.4"
|
||||
signal-exit "^3.0.7"
|
||||
|
||||
ws@^8.11.0:
|
||||
ws@^8.11.0, ws@^8.18.0:
|
||||
version "8.18.0"
|
||||
resolved "https://registry.npmmirror.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc"
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc"
|
||||
integrity sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==
|
||||
|
||||
xml-name-validator@^4.0.0:
|
||||
|
Loading…
Reference in New Issue
Block a user