Merge pull request #5786 from ConnectAI-E/feature/realtime-chat

Feature/realtime chat
This commit is contained in:
Lloyd Zhou 2024-11-11 13:19:26 +08:00 committed by GitHub
commit 289aeec8af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 1937 additions and 634 deletions

View File

@ -96,10 +96,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
- [x] Artifacts: Easily preview, copy and share generated content/webpages through a separate window [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092)
- [x] Plugins: support network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
- [x] network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
- [x] Supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
- [ ] local knowledge base
## What's New
- 🚀 v2.15.8 Now supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
- 🚀 v2.15.4 The Application supports using Tauri fetch LLM API, MORE SECURITY! [#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379)
- 🚀 v2.15.0 Now supports Plugins! Read this: [NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
- 🚀 v2.14.0 Now supports Artifacts & SD
@ -134,10 +135,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
- [x] Artifacts: 通过独立窗口,轻松预览、复制和分享生成的内容/可交互网页 [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092)
- [x] 插件机制,支持`联网搜索`、`计算器`、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
- [x] 支持联网搜索、计算器、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
- [x] 支持 Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
- [ ] 本地知识库
## 最新动态
- 🚀 v2.15.8 现在支持Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
- 🚀 v2.15.4 客户端支持Tauri本地直接调用大模型API更安全[#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379)
- 🚀 v2.15.0 现在支持插件功能了!了解更多:[NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
- 🚀 v2.14.0 现在支持 Artifacts & SD 了。

View File

@ -45,6 +45,14 @@
.chat-input-actions {
display: flex;
flex-wrap: wrap;
justify-content: space-between;
gap: 5px;
&-end {
display: flex;
margin-left: auto;
gap: 5px;
}
.chat-input-action {
display: inline-flex;
@ -62,10 +70,6 @@
width: var(--icon-width);
overflow: hidden;
&:not(:last-child) {
margin-right: 5px;
}
.text {
white-space: nowrap;
padding-left: 5px;
@ -231,10 +235,12 @@
animation: slide-in ease 0.3s;
$linear: linear-gradient(to right,
rgba(0, 0, 0, 0),
rgba(0, 0, 0, 1),
rgba(0, 0, 0, 0));
$linear: linear-gradient(
to right,
rgba(0, 0, 0, 0),
rgba(0, 0, 0, 1),
rgba(0, 0, 0, 0)
);
mask-image: $linear;
@mixin show {
@ -373,7 +379,7 @@
}
}
.chat-message-user>.chat-message-container {
.chat-message-user > .chat-message-container {
align-items: flex-end;
}
@ -443,6 +449,25 @@
transition: all ease 0.3s;
}
.chat-message-audio {
display: flex;
align-items: center;
justify-content: space-between;
border-radius: 10px;
background-color: rgba(0, 0, 0, 0.05);
border: var(--border-in-light);
position: relative;
transition: all ease 0.3s;
margin-top: 10px;
font-size: 14px;
user-select: text;
word-break: break-word;
box-sizing: border-box;
audio {
height: 30px; /* 调整高度 */
}
}
.chat-message-item-image {
width: 100%;
margin-top: 10px;
@ -471,23 +496,27 @@
border: rgba($color: #888, $alpha: 0.2) 1px solid;
}
@media only screen and (max-width: 600px) {
$calc-image-width: calc(100vw/3*2/var(--image-count));
$calc-image-width: calc(100vw / 3 * 2 / var(--image-count));
.chat-message-item-image-multi {
width: $calc-image-width;
height: $calc-image-width;
}
.chat-message-item-image {
max-width: calc(100vw/3*2);
max-width: calc(100vw / 3 * 2);
}
}
@media screen and (min-width: 600px) {
$max-image-width: calc(calc(1200px - var(--sidebar-width))/3*2/var(--image-count));
$image-width: calc(calc(var(--window-width) - var(--sidebar-width))/3*2/var(--image-count));
$max-image-width: calc(
calc(1200px - var(--sidebar-width)) / 3 * 2 / var(--image-count)
);
$image-width: calc(
calc(var(--window-width) - var(--sidebar-width)) / 3 * 2 /
var(--image-count)
);
.chat-message-item-image-multi {
width: $image-width;
@ -497,7 +526,7 @@
}
.chat-message-item-image {
max-width: calc(calc(1200px - var(--sidebar-width))/3*2);
max-width: calc(calc(1200px - var(--sidebar-width)) / 3 * 2);
}
}
@ -515,7 +544,7 @@
z-index: 1;
}
.chat-message-user>.chat-message-container>.chat-message-item {
.chat-message-user > .chat-message-container > .chat-message-item {
background-color: var(--second);
&:hover {
@ -626,7 +655,8 @@
min-height: 68px;
}
.chat-input:focus {}
.chat-input:focus {
}
.chat-input-send {
background-color: var(--primary);
@ -693,4 +723,31 @@
.shortcut-key span {
font-size: 12px;
color: var(--black);
}
}
.chat-main {
display: flex;
height: 100%;
width: 100%;
position: relative;
overflow: hidden;
.chat-body-container {
height: 100%;
display: flex;
flex-direction: column;
flex: 1;
width: 100%;
}
.chat-side-panel {
position: absolute;
inset: 0;
background: var(--white);
overflow: hidden;
z-index: 10;
transform: translateX(100%);
transition: all ease 0.3s;
&-show {
transform: translateX(0);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
export * from "./realtime-chat";

View File

@ -0,0 +1,74 @@
.realtime-chat {
width: 100%;
justify-content: center;
align-items: center;
position: relative;
display: flex;
flex-direction: column;
height: 100%;
padding: 20px;
box-sizing: border-box;
.circle-mic {
width: 150px;
height: 150px;
border-radius: 50%;
background: linear-gradient(to bottom right, #a0d8ef, #f0f8ff);
display: flex;
justify-content: center;
align-items: center;
}
.icon-center {
font-size: 24px;
}
.bottom-icons {
display: flex;
justify-content: space-between;
align-items: center;
width: 100%;
position: absolute;
bottom: 20px;
box-sizing: border-box;
padding: 0 20px;
}
.icon-left,
.icon-right {
width: 46px;
height: 46px;
font-size: 36px;
background: var(--second);
border-radius: 50%;
padding: 2px;
display: flex;
justify-content: center;
align-items: center;
cursor: pointer;
&:hover {
opacity: 0.8;
}
}
&.mobile {
display: none;
}
}
.pulse {
animation: pulse 1.5s infinite;
}
@keyframes pulse {
0% {
transform: scale(1);
opacity: 0.7;
}
50% {
transform: scale(1.1);
opacity: 1;
}
100% {
transform: scale(1);
opacity: 0.7;
}
}

View File

@ -0,0 +1,359 @@
import VoiceIcon from "@/app/icons/voice.svg";
import VoiceOffIcon from "@/app/icons/voice-off.svg";
import PowerIcon from "@/app/icons/power.svg";
import styles from "./realtime-chat.module.scss";
import clsx from "clsx";
import { useState, useRef, useEffect } from "react";
import { useChatStore, createMessage, useAppConfig } from "@/app/store";
import { IconButton } from "@/app/components/button";
import {
Modality,
RTClient,
RTInputAudioItem,
RTResponse,
TurnDetection,
} from "rt-client";
import { AudioHandler } from "@/app/lib/audio";
import { uploadImage } from "@/app/utils/chat";
import { VoicePrint } from "@/app/components/voice-print";
interface RealtimeChatProps {
onClose?: () => void;
onStartVoice?: () => void;
onPausedVoice?: () => void;
}
export function RealtimeChat({
onClose,
onStartVoice,
onPausedVoice,
}: RealtimeChatProps) {
const chatStore = useChatStore();
const session = chatStore.currentSession();
const config = useAppConfig();
const [status, setStatus] = useState("");
const [isRecording, setIsRecording] = useState(false);
const [isConnected, setIsConnected] = useState(false);
const [isConnecting, setIsConnecting] = useState(false);
const [modality, setModality] = useState("audio");
const [useVAD, setUseVAD] = useState(true);
const [frequencies, setFrequencies] = useState<Uint8Array | undefined>();
const clientRef = useRef<RTClient | null>(null);
const audioHandlerRef = useRef<AudioHandler | null>(null);
const initRef = useRef(false);
const temperature = config.realtimeConfig.temperature;
const apiKey = config.realtimeConfig.apiKey;
const model = config.realtimeConfig.model;
const azure = config.realtimeConfig.provider === "Azure";
const azureEndpoint = config.realtimeConfig.azure.endpoint;
const azureDeployment = config.realtimeConfig.azure.deployment;
const voice = config.realtimeConfig.voice;
const handleConnect = async () => {
if (isConnecting) return;
if (!isConnected) {
try {
setIsConnecting(true);
clientRef.current = azure
? new RTClient(
new URL(azureEndpoint),
{ key: apiKey },
{ deployment: azureDeployment },
)
: new RTClient({ key: apiKey }, { model });
const modalities: Modality[] =
modality === "audio" ? ["text", "audio"] : ["text"];
const turnDetection: TurnDetection = useVAD
? { type: "server_vad" }
: null;
await clientRef.current.configure({
instructions: "",
voice,
input_audio_transcription: { model: "whisper-1" },
turn_detection: turnDetection,
tools: [],
temperature,
modalities,
});
startResponseListener();
setIsConnected(true);
// TODO
// try {
// const recentMessages = chatStore.getMessagesWithMemory();
// for (const message of recentMessages) {
// const { role, content } = message;
// if (typeof content === "string") {
// await clientRef.current.sendItem({
// type: "message",
// role: role as any,
// content: [
// {
// type: (role === "assistant" ? "text" : "input_text") as any,
// text: content as string,
// },
// ],
// });
// }
// }
// // await clientRef.current.generateResponse();
// } catch (error) {
// console.error("Set message failed:", error);
// }
} catch (error) {
console.error("Connection failed:", error);
setStatus("Connection failed");
} finally {
setIsConnecting(false);
}
} else {
await disconnect();
}
};
const disconnect = async () => {
if (clientRef.current) {
try {
await clientRef.current.close();
clientRef.current = null;
setIsConnected(false);
} catch (error) {
console.error("Disconnect failed:", error);
}
}
};
const startResponseListener = async () => {
if (!clientRef.current) return;
try {
for await (const serverEvent of clientRef.current.events()) {
if (serverEvent.type === "response") {
await handleResponse(serverEvent);
} else if (serverEvent.type === "input_audio") {
await handleInputAudio(serverEvent);
}
}
} catch (error) {
if (clientRef.current) {
console.error("Response iteration error:", error);
}
}
};
const handleResponse = async (response: RTResponse) => {
for await (const item of response) {
if (item.type === "message" && item.role === "assistant") {
const botMessage = createMessage({
role: item.role,
content: "",
});
// add bot message first
chatStore.updateTargetSession(session, (session) => {
session.messages = session.messages.concat([botMessage]);
});
let hasAudio = false;
for await (const content of item) {
if (content.type === "text") {
for await (const text of content.textChunks()) {
botMessage.content += text;
}
} else if (content.type === "audio") {
const textTask = async () => {
for await (const text of content.transcriptChunks()) {
botMessage.content += text;
}
};
const audioTask = async () => {
audioHandlerRef.current?.startStreamingPlayback();
for await (const audio of content.audioChunks()) {
hasAudio = true;
audioHandlerRef.current?.playChunk(audio);
}
};
await Promise.all([textTask(), audioTask()]);
}
// update message.content
chatStore.updateTargetSession(session, (session) => {
session.messages = session.messages.concat();
});
}
if (hasAudio) {
// upload audio get audio_url
const blob = audioHandlerRef.current?.savePlayFile();
uploadImage(blob!).then((audio_url) => {
botMessage.audio_url = audio_url;
// update text and audio_url
chatStore.updateTargetSession(session, (session) => {
session.messages = session.messages.concat();
});
});
}
}
}
};
const handleInputAudio = async (item: RTInputAudioItem) => {
await item.waitForCompletion();
if (item.transcription) {
const userMessage = createMessage({
role: "user",
content: item.transcription,
});
chatStore.updateTargetSession(session, (session) => {
session.messages = session.messages.concat([userMessage]);
});
// save input audio_url, and update session
const { audioStartMillis, audioEndMillis } = item;
// upload audio get audio_url
const blob = audioHandlerRef.current?.saveRecordFile(
audioStartMillis,
audioEndMillis,
);
uploadImage(blob!).then((audio_url) => {
userMessage.audio_url = audio_url;
chatStore.updateTargetSession(session, (session) => {
session.messages = session.messages.concat();
});
});
}
// stop streaming play after get input audio.
audioHandlerRef.current?.stopStreamingPlayback();
};
const toggleRecording = async () => {
if (!isRecording && clientRef.current) {
try {
if (!audioHandlerRef.current) {
audioHandlerRef.current = new AudioHandler();
await audioHandlerRef.current.initialize();
}
await audioHandlerRef.current.startRecording(async (chunk) => {
await clientRef.current?.sendAudio(chunk);
});
setIsRecording(true);
} catch (error) {
console.error("Failed to start recording:", error);
}
} else if (audioHandlerRef.current) {
try {
audioHandlerRef.current.stopRecording();
if (!useVAD) {
const inputAudio = await clientRef.current?.commitAudio();
await handleInputAudio(inputAudio!);
await clientRef.current?.generateResponse();
}
setIsRecording(false);
} catch (error) {
console.error("Failed to stop recording:", error);
}
}
};
useEffect(() => {
// 防止重复初始化
if (initRef.current) return;
initRef.current = true;
const initAudioHandler = async () => {
const handler = new AudioHandler();
await handler.initialize();
audioHandlerRef.current = handler;
await handleConnect();
await toggleRecording();
};
initAudioHandler().catch((error) => {
setStatus(error);
console.error(error);
});
return () => {
if (isRecording) {
toggleRecording();
}
audioHandlerRef.current?.close().catch(console.error);
disconnect();
};
}, []);
useEffect(() => {
let animationFrameId: number;
if (isConnected && isRecording) {
const animationFrame = () => {
if (audioHandlerRef.current) {
const freqData = audioHandlerRef.current.getByteFrequencyData();
setFrequencies(freqData);
}
animationFrameId = requestAnimationFrame(animationFrame);
};
animationFrameId = requestAnimationFrame(animationFrame);
} else {
setFrequencies(undefined);
}
return () => {
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
}
};
}, [isConnected, isRecording]);
// update session params
useEffect(() => {
clientRef.current?.configure({ voice });
}, [voice]);
useEffect(() => {
clientRef.current?.configure({ temperature });
}, [temperature]);
const handleClose = async () => {
onClose?.();
if (isRecording) {
await toggleRecording();
}
disconnect().catch(console.error);
};
return (
<div className={styles["realtime-chat"]}>
<div
className={clsx(styles["circle-mic"], {
[styles["pulse"]]: isRecording,
})}
>
<VoicePrint frequencies={frequencies} isActive={isRecording} />
</div>
<div className={styles["bottom-icons"]}>
<div>
<IconButton
icon={isRecording ? <VoiceIcon /> : <VoiceOffIcon />}
onClick={toggleRecording}
disabled={!isConnected}
shadow
bordered
/>
</div>
<div className={styles["icon-center"]}>{status}</div>
<div>
<IconButton
icon={<PowerIcon />}
onClick={handleClose}
shadow
bordered
/>
</div>
</div>
</div>
);
}

View File

@ -0,0 +1,173 @@
import { RealtimeConfig } from "@/app/store";
import Locale from "@/app/locales";
import { ListItem, Select, PasswordInput } from "@/app/components/ui-lib";
import { InputRange } from "@/app/components/input-range";
import { Voice } from "rt-client";
import { ServiceProvider } from "@/app/constant";
const providers = [ServiceProvider.OpenAI, ServiceProvider.Azure];
const models = ["gpt-4o-realtime-preview-2024-10-01"];
const voice = ["alloy", "shimmer", "echo"];
export function RealtimeConfigList(props: {
realtimeConfig: RealtimeConfig;
updateConfig: (updater: (config: RealtimeConfig) => void) => void;
}) {
const azureConfigComponent = props.realtimeConfig.provider ===
ServiceProvider.Azure && (
<>
<ListItem
title={Locale.Settings.Realtime.Azure.Endpoint.Title}
subTitle={Locale.Settings.Realtime.Azure.Endpoint.SubTitle}
>
<input
value={props.realtimeConfig?.azure?.endpoint}
type="text"
placeholder={Locale.Settings.Realtime.Azure.Endpoint.Title}
onChange={(e) => {
props.updateConfig(
(config) => (config.azure.endpoint = e.currentTarget.value),
);
}}
/>
</ListItem>
<ListItem
title={Locale.Settings.Realtime.Azure.Deployment.Title}
subTitle={Locale.Settings.Realtime.Azure.Deployment.SubTitle}
>
<input
value={props.realtimeConfig?.azure?.deployment}
type="text"
placeholder={Locale.Settings.Realtime.Azure.Deployment.Title}
onChange={(e) => {
props.updateConfig(
(config) => (config.azure.deployment = e.currentTarget.value),
);
}}
/>
</ListItem>
</>
);
return (
<>
<ListItem
title={Locale.Settings.Realtime.Enable.Title}
subTitle={Locale.Settings.Realtime.Enable.SubTitle}
>
<input
type="checkbox"
checked={props.realtimeConfig.enable}
onChange={(e) =>
props.updateConfig(
(config) => (config.enable = e.currentTarget.checked),
)
}
></input>
</ListItem>
{props.realtimeConfig.enable && (
<>
<ListItem
title={Locale.Settings.Realtime.Provider.Title}
subTitle={Locale.Settings.Realtime.Provider.SubTitle}
>
<Select
aria-label={Locale.Settings.Realtime.Provider.Title}
value={props.realtimeConfig.provider}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.provider = e.target.value as ServiceProvider),
);
}}
>
{providers.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.Realtime.Model.Title}
subTitle={Locale.Settings.Realtime.Model.SubTitle}
>
<Select
aria-label={Locale.Settings.Realtime.Model.Title}
value={props.realtimeConfig.model}
onChange={(e) => {
props.updateConfig((config) => (config.model = e.target.value));
}}
>
{models.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.Realtime.ApiKey.Title}
subTitle={Locale.Settings.Realtime.ApiKey.SubTitle}
>
<PasswordInput
aria={Locale.Settings.ShowPassword}
aria-label={Locale.Settings.Realtime.ApiKey.Title}
value={props.realtimeConfig.apiKey}
type="text"
placeholder={Locale.Settings.Realtime.ApiKey.Placeholder}
onChange={(e) => {
props.updateConfig(
(config) => (config.apiKey = e.currentTarget.value),
);
}}
/>
</ListItem>
{azureConfigComponent}
<ListItem
title={Locale.Settings.TTS.Voice.Title}
subTitle={Locale.Settings.TTS.Voice.SubTitle}
>
<Select
value={props.realtimeConfig.voice}
onChange={(e) => {
props.updateConfig(
(config) => (config.voice = e.currentTarget.value as Voice),
);
}}
>
{voice.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.Realtime.Temperature.Title}
subTitle={Locale.Settings.Realtime.Temperature.SubTitle}
>
<InputRange
aria={Locale.Settings.Temperature.Title}
value={props.realtimeConfig?.temperature?.toFixed(1)}
min="0.6"
max="1"
step="0.1"
onChange={(e) => {
props.updateConfig(
(config) =>
(config.temperature = e.currentTarget.valueAsNumber),
);
}}
></InputRange>
</ListItem>
</>
)}
</>
);
}

View File

@ -85,6 +85,7 @@ import { nanoid } from "nanoid";
import { useMaskStore } from "../store/mask";
import { ProviderType } from "../utils/cloud";
import { TTSConfigList } from "./tts-config";
import { RealtimeConfigList } from "./realtime-chat/realtime-config";
function EditPromptModal(props: { id: string; onClose: () => void }) {
const promptStore = usePromptStore();
@ -1799,7 +1800,18 @@ export function Settings() {
{shouldShowPromptModal && (
<UserPromptModal onClose={() => setShowPromptModal(false)} />
)}
<List>
<RealtimeConfigList
realtimeConfig={config.realtimeConfig}
updateConfig={(updater) => {
const realtimeConfig = { ...config.realtimeConfig };
updater(realtimeConfig);
config.update(
(config) => (config.realtimeConfig = realtimeConfig),
);
}}
/>
</List>
<List>
<TTSConfigList
ttsConfig={config.ttsConfig}

View File

@ -0,0 +1 @@
export * from "./voice-print";

View File

@ -0,0 +1,11 @@
.voice-print {
width: 100%;
height: 60px;
margin: 20px 0;
canvas {
width: 100%;
height: 100%;
filter: brightness(1.2); // 增加整体亮度
}
}

View File

@ -0,0 +1,180 @@
import { useEffect, useRef, useCallback } from "react";
import styles from "./voice-print.module.scss";
interface VoicePrintProps {
frequencies?: Uint8Array;
isActive?: boolean;
}
export function VoicePrint({ frequencies, isActive }: VoicePrintProps) {
// Canvas引用用于获取绘图上下文
const canvasRef = useRef<HTMLCanvasElement>(null);
// 存储历史频率数据,用于平滑处理
const historyRef = useRef<number[][]>([]);
// 控制保留的历史数据帧数,影响平滑度
const historyLengthRef = useRef(10);
// 存储动画帧ID用于清理
const animationFrameRef = useRef<number>();
/**
*
* 使FIFO队列维护固定长度的历史记录
*/
const updateHistory = useCallback((freqArray: number[]) => {
historyRef.current.push(freqArray);
if (historyRef.current.length > historyLengthRef.current) {
historyRef.current.shift();
}
}, []);
useEffect(() => {
const canvas = canvasRef.current;
if (!canvas) return;
const ctx = canvas.getContext("2d");
if (!ctx) return;
/**
* DPI屏幕显示
* canvas实际渲染分辨率
*/
const dpr = window.devicePixelRatio || 1;
canvas.width = canvas.offsetWidth * dpr;
canvas.height = canvas.offsetHeight * dpr;
ctx.scale(dpr, dpr);
/**
*
* 使requestAnimationFrame实现平滑动画
*
* 1.
* 2.
* 3.
* 4.
*/
const draw = () => {
// 清空画布
ctx.clearRect(0, 0, canvas.width, canvas.height);
if (!frequencies || !isActive) {
historyRef.current = [];
return;
}
const freqArray = Array.from(frequencies);
updateHistory(freqArray);
// 绘制声纹
const points: [number, number][] = [];
const centerY = canvas.height / 2;
const width = canvas.width;
const sliceWidth = width / (frequencies.length - 1);
// 绘制主波形
ctx.beginPath();
ctx.moveTo(0, centerY);
/**
*
* 1. 使
* 2.
* 3. 使线使线
* 4.
*/
for (let i = 0; i < frequencies.length; i++) {
const x = i * sliceWidth;
let avgFrequency = frequencies[i];
/**
*
* 1.
* 2.
* 3.
*/
if (historyRef.current.length > 0) {
const historicalValues = historyRef.current.map((h) => h[i] || 0);
avgFrequency =
(avgFrequency + historicalValues.reduce((a, b) => a + b, 0)) /
(historyRef.current.length + 1);
}
/**
*
* 1. 0-1
* 2.
* 3. 使线
*/
const normalized = avgFrequency / 255.0;
const height = normalized * (canvas.height / 2);
const y = centerY + height * Math.sin(i * 0.2 + Date.now() * 0.002);
points.push([x, y]);
if (i === 0) {
ctx.moveTo(x, y);
} else {
// 使用贝塞尔曲线使波形更平滑
const prevPoint = points[i - 1];
const midX = (prevPoint[0] + x) / 2;
ctx.quadraticCurveTo(
prevPoint[0],
prevPoint[1],
midX,
(prevPoint[1] + y) / 2,
);
}
}
// 绘制对称的下半部分
for (let i = points.length - 1; i >= 0; i--) {
const [x, y] = points[i];
const symmetricY = centerY - (y - centerY);
if (i === points.length - 1) {
ctx.lineTo(x, symmetricY);
} else {
const nextPoint = points[i + 1];
const midX = (nextPoint[0] + x) / 2;
ctx.quadraticCurveTo(
nextPoint[0],
centerY - (nextPoint[1] - centerY),
midX,
centerY - ((nextPoint[1] + y) / 2 - centerY),
);
}
}
ctx.closePath();
/**
*
*
* 使
*/
const gradient = ctx.createLinearGradient(0, 0, canvas.width, 0);
gradient.addColorStop(0, "rgba(100, 180, 255, 0.95)");
gradient.addColorStop(0.5, "rgba(140, 200, 255, 0.9)");
gradient.addColorStop(1, "rgba(180, 220, 255, 0.95)");
ctx.fillStyle = gradient;
ctx.fill();
animationFrameRef.current = requestAnimationFrame(draw);
};
// 启动动画循环
draw();
// 清理函数:在组件卸载时取消动画
return () => {
if (animationFrameRef.current) {
cancelAnimationFrame(animationFrameRef.current);
}
};
}, [frequencies, isActive, updateHistory]);
return (
<div className={styles["voice-print"]}>
<canvas ref={canvasRef} />
</div>
);
}

11
app/icons/headphone.svg Normal file
View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg width="16" height="16" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M4 28C4 26.8954 4.89543 26 6 26H10V38H6C4.89543 38 4 37.1046 4 36V28Z" fill="none" />
<path d="M38 26H42C43.1046 26 44 26.8954 44 28V36C44 37.1046 43.1046 38 42 38H38V26Z"
fill="none" />
<path
d="M10 36V24C10 16.268 16.268 10 24 10C31.732 10 38 16.268 38 24V36M10 26H6C4.89543 26 4 26.8954 4 28V36C4 37.1046 4.89543 38 6 38H10V26ZM38 26H42C43.1046 26 44 26.8954 44 28V36C44 37.1046 43.1046 38 42 38H38V26Z"
stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
<path d="M16 32H20L22 26L26 38L28 32H32" stroke="#333" stroke-width="4" stroke-linecap="round"
stroke-linejoin="round" />
</svg>

After

Width:  |  Height:  |  Size: 808 B

7
app/icons/power.svg Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
<path
d="M14.5 8C13.8406 8.37652 13.2062 8.79103 12.6 9.24051C11.5625 10.0097 10.6074 10.8814 9.75 11.8402C6.79377 15.1463 5 19.4891 5 24.2455C5 34.6033 13.5066 43 24 43C34.4934 43 43 34.6033 43 24.2455C43 19.4891 41.2062 15.1463 38.25 11.8402C37.3926 10.8814 36.4375 10.0097 35.4 9.24051C34.7938 8.79103 34.1594 8.37652 33.5 8"
stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
<path d="M24 4V24" stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
</svg>

After

Width:  |  Height:  |  Size: 675 B

13
app/icons/voice-off.svg Normal file
View File

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
<path
d="M31 24V11C31 7.13401 27.866 4 24 4C20.134 4 17 7.13401 17 11V24C17 27.866 20.134 31 24 31C27.866 31 31 27.866 31 24Z"
stroke="#d0021b" stroke-width="4" stroke-linejoin="round" />
<path
d="M9 23C9 31.2843 15.7157 38 24 38C25.7532 38 27.4361 37.6992 29 37.1465M39 23C39 25.1333 38.5547 27.1626 37.7519 29"
stroke="#d0021b" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
<path d="M24 38V44" stroke="#d0021b" stroke-width="4" stroke-linecap="round"
stroke-linejoin="round" />
<path d="M42 42L6 6" stroke="#d0021b" stroke-width="4" stroke-linecap="round"
stroke-linejoin="round" />
</svg>

After

Width:  |  Height:  |  Size: 811 B

9
app/icons/voice.svg Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect x="17" y="4" width="14" height="27" rx="7" fill="none" stroke="#333" stroke-width="4"
stroke-linejoin="round" />
<path d="M9 23C9 31.2843 15.7157 38 24 38C32.2843 38 39 31.2843 39 23" stroke="#333"
stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
<path d="M24 38V44" stroke="#333" stroke-width="4" stroke-linecap="round"
stroke-linejoin="round" />
</svg>

After

Width:  |  Height:  |  Size: 549 B

200
app/lib/audio.ts Normal file
View File

@ -0,0 +1,200 @@
export class AudioHandler {
private context: AudioContext;
private mergeNode: ChannelMergerNode;
private analyserData: Uint8Array;
public analyser: AnalyserNode;
private workletNode: AudioWorkletNode | null = null;
private stream: MediaStream | null = null;
private source: MediaStreamAudioSourceNode | null = null;
private recordBuffer: Int16Array[] = [];
private readonly sampleRate = 24000;
private nextPlayTime: number = 0;
private isPlaying: boolean = false;
private playbackQueue: AudioBufferSourceNode[] = [];
private playBuffer: Int16Array[] = [];
constructor() {
this.context = new AudioContext({ sampleRate: this.sampleRate });
// using ChannelMergerNode to get merged audio data, and then get analyser data.
this.mergeNode = new ChannelMergerNode(this.context, { numberOfInputs: 2 });
this.analyser = new AnalyserNode(this.context, { fftSize: 256 });
this.analyserData = new Uint8Array(this.analyser.frequencyBinCount);
this.mergeNode.connect(this.analyser);
}
getByteFrequencyData() {
this.analyser.getByteFrequencyData(this.analyserData);
return this.analyserData;
}
async initialize() {
await this.context.audioWorklet.addModule("/audio-processor.js");
}
async startRecording(onChunk: (chunk: Uint8Array) => void) {
try {
if (!this.workletNode) {
await this.initialize();
}
this.stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
sampleRate: this.sampleRate,
echoCancellation: true,
noiseSuppression: true,
},
});
await this.context.resume();
this.source = this.context.createMediaStreamSource(this.stream);
this.workletNode = new AudioWorkletNode(
this.context,
"audio-recorder-processor",
);
this.workletNode.port.onmessage = (event) => {
if (event.data.eventType === "audio") {
const float32Data = event.data.audioData;
const int16Data = new Int16Array(float32Data.length);
for (let i = 0; i < float32Data.length; i++) {
const s = Math.max(-1, Math.min(1, float32Data[i]));
int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
}
const uint8Data = new Uint8Array(int16Data.buffer);
onChunk(uint8Data);
// save recordBuffer
// @ts-ignore
this.recordBuffer.push.apply(this.recordBuffer, int16Data);
}
};
this.source.connect(this.workletNode);
this.source.connect(this.mergeNode, 0, 0);
this.workletNode.connect(this.context.destination);
this.workletNode.port.postMessage({ command: "START_RECORDING" });
} catch (error) {
console.error("Error starting recording:", error);
throw error;
}
}
stopRecording() {
if (!this.workletNode || !this.source || !this.stream) {
throw new Error("Recording not started");
}
this.workletNode.port.postMessage({ command: "STOP_RECORDING" });
this.workletNode.disconnect();
this.source.disconnect();
this.stream.getTracks().forEach((track) => track.stop());
}
startStreamingPlayback() {
this.isPlaying = true;
this.nextPlayTime = this.context.currentTime;
}
stopStreamingPlayback() {
this.isPlaying = false;
this.playbackQueue.forEach((source) => source.stop());
this.playbackQueue = [];
this.playBuffer = [];
}
playChunk(chunk: Uint8Array) {
if (!this.isPlaying) return;
const int16Data = new Int16Array(chunk.buffer);
// @ts-ignore
this.playBuffer.push.apply(this.playBuffer, int16Data); // save playBuffer
const float32Data = new Float32Array(int16Data.length);
for (let i = 0; i < int16Data.length; i++) {
float32Data[i] = int16Data[i] / (int16Data[i] < 0 ? 0x8000 : 0x7fff);
}
const audioBuffer = this.context.createBuffer(
1,
float32Data.length,
this.sampleRate,
);
audioBuffer.getChannelData(0).set(float32Data);
const source = this.context.createBufferSource();
source.buffer = audioBuffer;
source.connect(this.context.destination);
source.connect(this.mergeNode, 0, 1);
const chunkDuration = audioBuffer.length / this.sampleRate;
source.start(this.nextPlayTime);
this.playbackQueue.push(source);
source.onended = () => {
const index = this.playbackQueue.indexOf(source);
if (index > -1) {
this.playbackQueue.splice(index, 1);
}
};
this.nextPlayTime += chunkDuration;
if (this.nextPlayTime < this.context.currentTime) {
this.nextPlayTime = this.context.currentTime;
}
}
_saveData(data: Int16Array, bytesPerSample = 16): Blob {
const headerLength = 44;
const numberOfChannels = 1;
const byteLength = data.buffer.byteLength;
const header = new Uint8Array(headerLength);
const view = new DataView(header.buffer);
view.setUint32(0, 1380533830, false); // RIFF identifier 'RIFF'
view.setUint32(4, 36 + byteLength, true); // file length minus RIFF identifier length and file description length
view.setUint32(8, 1463899717, false); // RIFF type 'WAVE'
view.setUint32(12, 1718449184, false); // format chunk identifier 'fmt '
view.setUint32(16, 16, true); // format chunk length
view.setUint16(20, 1, true); // sample format (raw)
view.setUint16(22, numberOfChannels, true); // channel count
view.setUint32(24, this.sampleRate, true); // sample rate
view.setUint32(28, this.sampleRate * 4, true); // byte rate (sample rate * block align)
view.setUint16(32, numberOfChannels * 2, true); // block align (channel count * bytes per sample)
view.setUint16(34, bytesPerSample, true); // bits per sample
view.setUint32(36, 1684108385, false); // data chunk identifier 'data'
view.setUint32(40, byteLength, true); // data chunk length
// using data.buffer, so no need to setUint16 to view.
return new Blob([view, data.buffer], { type: "audio/mpeg" });
}
savePlayFile() {
// @ts-ignore
return this._saveData(new Int16Array(this.playBuffer));
}
saveRecordFile(
audioStartMillis: number | undefined,
audioEndMillis: number | undefined,
) {
const startIndex = audioStartMillis
? Math.floor((audioStartMillis * this.sampleRate) / 1000)
: 0;
const endIndex = audioEndMillis
? Math.floor((audioEndMillis * this.sampleRate) / 1000)
: this.recordBuffer.length;
return this._saveData(
// @ts-ignore
new Int16Array(this.recordBuffer.slice(startIndex, endIndex)),
);
}
async close() {
this.recordBuffer = [];
this.workletNode?.disconnect();
this.source?.disconnect();
this.stream?.getTracks().forEach((track) => track.stop());
await this.context.close();
}
}

View File

@ -562,6 +562,39 @@ const cn = {
SubTitle: "生成语音的速度",
},
},
Realtime: {
Enable: {
Title: "实时聊天",
SubTitle: "开启实时聊天功能",
},
Provider: {
Title: "模型服务商",
SubTitle: "切换不同的服务商",
},
Model: {
Title: "模型",
SubTitle: "选择一个模型",
},
ApiKey: {
Title: "API Key",
SubTitle: "API Key",
Placeholder: "API Key",
},
Azure: {
Endpoint: {
Title: "接口地址",
SubTitle: "接口地址",
},
Deployment: {
Title: "部署名称",
SubTitle: "部署名称",
},
},
Temperature: {
Title: "随机性 (temperature)",
SubTitle: "值越大,回复越随机",
},
},
},
Store: {
DefaultTopic: "新的聊天",

View File

@ -570,6 +570,39 @@ const en: LocaleType = {
},
Engine: "TTS Engine",
},
Realtime: {
Enable: {
Title: "Realtime Chat",
SubTitle: "Enable realtime chat feature",
},
Provider: {
Title: "Model Provider",
SubTitle: "Switch between different providers",
},
Model: {
Title: "Model",
SubTitle: "Select a model",
},
ApiKey: {
Title: "API Key",
SubTitle: "API Key",
Placeholder: "API Key",
},
Azure: {
Endpoint: {
Title: "Endpoint",
SubTitle: "Endpoint",
},
Deployment: {
Title: "Deployment Name",
SubTitle: "Deployment Name",
},
},
Temperature: {
Title: "Randomness (temperature)",
SubTitle: "Higher values result in more random responses",
},
},
},
Store: {
DefaultTopic: "New Conversation",

View File

@ -52,6 +52,7 @@ export type ChatMessage = RequestMessage & {
id: string;
model?: ModelType;
tools?: ChatMessageTool[];
audio_url?: string;
};
export function createMessage(override: Partial<ChatMessage>): ChatMessage {

View File

@ -15,6 +15,7 @@ import {
ServiceProvider,
} from "../constant";
import { createPersistStore } from "../utils/store";
import type { Voice } from "rt-client";
export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
@ -90,12 +91,26 @@ export const DEFAULT_CONFIG = {
voice: DEFAULT_TTS_VOICE,
speed: 1.0,
},
realtimeConfig: {
enable: false,
provider: "OpenAI" as ServiceProvider,
model: "gpt-4o-realtime-preview-2024-10-01",
apiKey: "",
azure: {
endpoint: "",
deployment: "",
},
temperature: 0.9,
voice: "alloy" as Voice,
},
};
export type ChatConfig = typeof DEFAULT_CONFIG;
export type ModelConfig = ChatConfig["modelConfig"];
export type TTSConfig = ChatConfig["ttsConfig"];
export type RealtimeConfig = ChatConfig["realtimeConfig"];
export function limitNumber(
x: number,

View File

@ -138,7 +138,7 @@ export function uploadImage(file: Blob): Promise<string> {
})
.then((res) => res.json())
.then((res) => {
console.log("res", res);
// console.log("res", res);
if (res?.code == 0 && res?.data) {
return res?.data;
}

View File

@ -52,7 +52,8 @@
"sass": "^1.59.2",
"spark-md5": "^3.0.2",
"use-debounce": "^9.0.4",
"zustand": "^4.3.8"
"zustand": "^4.3.8",
"rt-client": "https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz"
},
"devDependencies": {
"@tauri-apps/api": "^1.6.0",

48
public/audio-processor.js Normal file
View File

@ -0,0 +1,48 @@
// @ts-nocheck
class AudioRecorderProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.isRecording = false;
this.bufferSize = 2400; // 100ms at 24kHz
this.currentBuffer = [];
this.port.onmessage = (event) => {
if (event.data.command === "START_RECORDING") {
this.isRecording = true;
} else if (event.data.command === "STOP_RECORDING") {
this.isRecording = false;
if (this.currentBuffer.length > 0) {
this.sendBuffer();
}
}
};
}
sendBuffer() {
if (this.currentBuffer.length > 0) {
const audioData = new Float32Array(this.currentBuffer);
this.port.postMessage({
eventType: "audio",
audioData: audioData,
});
this.currentBuffer = [];
}
}
process(inputs) {
const input = inputs[0];
if (input.length > 0 && this.isRecording) {
const audioData = input[0];
this.currentBuffer.push(...audioData);
if (this.currentBuffer.length >= this.bufferSize) {
this.sendBuffer();
}
}
return true;
}
}
registerProcessor("audio-recorder-processor", AudioRecorderProcessor);

View File

@ -7455,6 +7455,12 @@ robust-predicates@^3.0.0:
resolved "https://registry.npmmirror.com/robust-predicates/-/robust-predicates-3.0.1.tgz#ecde075044f7f30118682bd9fb3f123109577f9a"
integrity sha512-ndEIpszUHiG4HtDsQLeIuMvRsDnn8c8rYStabochtUeCvfuvNptb5TUbVD68LRAILPX7p9nqQGh4xJgn3EHS/g==
"rt-client@https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz":
version "0.5.0"
resolved "https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz#abf2e9a850201e3571b8d36830f77bc52af3de9b"
dependencies:
ws "^8.18.0"
run-parallel@^1.1.9:
version "1.2.0"
resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee"
@ -8498,9 +8504,9 @@ write-file-atomic@^4.0.2:
imurmurhash "^0.1.4"
signal-exit "^3.0.7"
ws@^8.11.0:
ws@^8.11.0, ws@^8.18.0:
version "8.18.0"
resolved "https://registry.npmmirror.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc"
resolved "https://registry.yarnpkg.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc"
integrity sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==
xml-name-validator@^4.0.0: