diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index 243caef61..5cc7f07e7 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -147,7 +147,7 @@ export class ChatGPTApi implements LLMApi { method: "POST", body: formData, signal: controller.signal, - headers: getHeaders(), + headers: getHeaders(true), }; // make a fetch request diff --git a/app/components/chat.tsx b/app/components/chat.tsx index 0321f9d7f..98fb06077 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -899,7 +899,7 @@ function _Chat() { }); // eslint-disable-next-line react-hooks/exhaustive-deps setSpeechApi( - new WebTranscriptionApi((transcription) => + new OpenAITranscriptionApi((transcription) => onRecognitionEnd(transcription), ), ); diff --git a/app/utils/speech.ts b/app/utils/speech.ts index 3defc5f38..0c74fc0d9 100644 --- a/app/utils/speech.ts +++ b/app/utils/speech.ts @@ -13,35 +13,12 @@ export abstract class SpeechApi { onTranscriptionReceived(callback: TranscriptionCallback) { this.onTranscription = callback; } - - protected async getMediaStream(): Promise { - if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { - return await navigator.mediaDevices.getUserMedia({ audio: true }); - } else if (navigator.getUserMedia) { - return new Promise((resolve, reject) => { - navigator.getUserMedia({ audio: true }, resolve, reject); - }); - } else { - console.warn("当前浏览器不支持 getUserMedia"); - return null; - } - } - - protected createRecorder(stream: MediaStream): MediaRecorder | null { - if (MediaRecorder.isTypeSupported("audio/webm")) { - return new MediaRecorder(stream, { mimeType: "audio/webm" }); - } else if (MediaRecorder.isTypeSupported("audio/ogg")) { - return new MediaRecorder(stream, { mimeType: "audio/ogg" }); - } else { - console.warn("当前浏览器不支持 MediaRecorder"); - return null; - } - } } export class OpenAITranscriptionApi extends SpeechApi { private listeningStatus = false; - private recorder: MediaRecorder | null = null; + private mediaRecorder: MediaRecorder | null = null; + private stream: MediaStream | null = null; private audioChunks: Blob[] = []; isListening = () => this.listeningStatus; @@ -54,35 +31,44 @@ export class OpenAITranscriptionApi extends SpeechApi { } async start(): Promise { - const stream = await this.getMediaStream(); - if (!stream) { - console.error("无法获取音频流"); - return; - } + // @ts-ignore + navigator.getUserMedia = + navigator.getUserMedia || + navigator.webkitGetUserMedia || + navigator.mozGetUserMedia || + navigator.msGetUserMedia; + if (navigator.mediaDevices) { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + this.mediaRecorder = new MediaRecorder(stream); + this.mediaRecorder.ondataavailable = (e) => { + if (e.data && e.data.size > 0) { + this.audioChunks.push(e.data); + } + }; - this.recorder = this.createRecorder(stream); - if (!this.recorder) { - console.error("无法创建 MediaRecorder"); + this.stream = stream; + } else { + console.warn("Media Decives will work only with SSL"); return; } this.audioChunks = []; - this.recorder.addEventListener("dataavailable", (event) => { - this.audioChunks.push(event.data); - }); + // this.recorder.addEventListener("dataavailable", (event) => { + // this.audioChunks.push(event.data); + // }); - this.recorder.start(); + this.mediaRecorder.start(); this.listeningStatus = true; } async stop(): Promise { - if (!this.recorder || !this.listeningStatus) { + if (!this.mediaRecorder || !this.listeningStatus) { return; } return new Promise((resolve) => { - this.recorder!.addEventListener("stop", async () => { + this.mediaRecorder!.addEventListener("stop", async () => { const audioBlob = new Blob(this.audioChunks, { type: "audio/wav" }); const llm = new ChatGPTApi(); const transcription = await llm.transcription({ file: audioBlob }); @@ -91,7 +77,7 @@ export class OpenAITranscriptionApi extends SpeechApi { resolve(); }); - this.recorder!.stop(); + this.mediaRecorder!.stop(); }); } }