最近在搞一个移动端的react项目,必要用的讯飞的文字转语音的api开辟,考虑后让我们前端自己处理了,所以自己就去研究了讯飞的web文档语音合成(流式版)WebAPI 文档 | 讯飞开放平台文档中央
其时把demo下载下来在欣赏器运行的时候觉得结果还可以,就直接采用这个方法进行开辟了,随后部署到服务器的时候,才想起来iOS手机上一个很致命的问题,iOS手机和欣赏器不答应自动播放音频文件,因此在运行讯飞demo的时候你就会发现,输入文字合成音频后不会播放(可能是我自己的方法的问题),必要再次手动触发一次点击?我们的需求是点击文字就直接进行播放,天塌了
又重新研究了一下demo的代码,结果demo里面关键的AudioPlayer(index.umd.js)是压缩的代码,没办法就提交了工单,问技能职员要到了未压缩的版本,有兴趣的可以自己研究一下,代码如下:
- import {geWAV, getPCM} from "./download"
- type IStatus = "uninit" | "init" | "play" | "pause" | "stop";
- type ISaveAudioData = "pcm" | "wav";
- class AudioPlayer {
- constructor(processorPath?: string) {
- this.processor = new Worker(`${processorPath}/processor.worker.js`);
- }
- private toSampleRate: number = 22050;
- private resumePlayDuration: number = 1000;
- private fromSampleRate: number = 16000;
- private isAudioDataEnded: boolean = false;
- private playAudioTime?: any
- private status: IStatus = "uninit";
- private audioContext?: AudioContext;
- private bufferSource?: AudioBufferSourceNode;
- private audioDatas: Float32Array[] = [];
- private pcmAudioDatas: Int16Array[] = [];
- private audioDataOffset: number = 0;
- private processor: Worker;
- postMessage({
- type,
- data,
- isLastData
- }: {
- type: "base64" | "string" | "Int16Array" | "Float32Array";
- data: string | Int16Array | Float32Array;
- isLastData: boolean
- }) {
- if (this.status === "uninit") {
- return
- }
- this.processor.postMessage({
- type,
- data,
- });
- this.isAudioDataEnded = isLastData;
- }
- private onPlay?: () => void;
- private onStop?: (audioDatas: Float32Array[]) => void;
- private playAudio() {
- clearTimeout(this.playAudioTime)
- if (this.audioContext) {
- let audioDataLen = 0;
- for (let i = this.audioDataOffset; i < this.audioDatas.length; i++) {
- audioDataLen += this.audioDatas[i].length;
- }
- if (!audioDataLen) {
- if (this.status === "play") {
- if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {
- this.stop();
- } else {
- this.playAudioTime = setTimeout(() => {
- this.playAudio()
- }, this.resumePlayDuration)
- }
- }
- return;
- }
- let audioBuffer = this.audioContext.createBuffer(
- 1,
- audioDataLen,
- this.toSampleRate
- );
- let channelData = audioBuffer.getChannelData(0);
- let audioData = this.audioDatas[this.audioDataOffset];
- let offset = 0;
- while (audioData) {
- this.audioDataOffset += 1;
- if (audioBuffer.copyToChannel) {
- audioBuffer.copyToChannel(audioData, 0, offset);
- offset += audioData.length;
- } else {
- for (let i = 0; i < audioData.length; i++) {
- channelData[i] = audioData[i];
- }
- }
- audioData = this.audioDatas[this.audioDataOffset];
- }
- let bufferSource = this.audioContext.createBufferSource();
- this.bufferSource = bufferSource;
- bufferSource.buffer = audioBuffer;
- bufferSource.connect(this.audioContext.destination);
- bufferSource.start();
- bufferSource.onended = (event) => {
- if (this.status !== "play") {
- return;
- }
- if (this.audioDatas.length) {
- this.playAudio();
- } else if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {
- this.stop();
- } else {
- this.playAudioTime = setTimeout(() => {
- this.playAudio()
- }, this.resumePlayDuration)
- }
- };
- }
- }
- reset() {
- this.processor.onmessage = null;
- this.audioDataOffset = 0;
- this.audioDatas = [];
- this.pcmAudioDatas = [];
- this.status = "uninit";
- this.isAudioDataEnded = false;
- clearTimeout(this.playAudioTime)
- try {
- this.bufferSource?.stop();
- } catch (e) {
- console.log(e);
- }
- }
- start({
- autoPlay = true,
- sampleRate = 16000,
- resumePlayDuration = 1000
- }: {
- autoPlay?: boolean;
- sampleRate?: number;
- resumePlayDuration?: number;
- } = {}) {
- this.reset();
- this.status = "init";
- this.resumePlayDuration = resumePlayDuration
- let fromSampleRate = sampleRate;
- let toSampleRate = Math.max(fromSampleRate, 22050);
- toSampleRate = Math.min(toSampleRate, 96000);
- this.fromSampleRate = fromSampleRate;
- this.toSampleRate = toSampleRate;
- this.processor.postMessage({
- type: "init",
- data: {
- fromSampleRate,
- toSampleRate,
- },
- });
- this.processor.onmessage = (event) => {
- const { audioData, pcmAudioData } = event.data;
- this.audioDatas.push(audioData);
- this.pcmAudioDatas.push(pcmAudioData);
- if (this.audioDatas.length === 1 && autoPlay && this.status === "init") {
- this.play();
- }
- };
- }
- play() {
- if (!this.audioContext) {
- this.audioContext = new (window.AudioContext ||
- (window as any).webkitAudioContext)();
- this.audioContext.resume();
- }
- if (this.audioContext) {
- this.status = "play";
- this.onPlay?.();
- this.playAudio();
- }
- }
- stop() {
- this.audioDataOffset = 0;
- this.status = "stop";
- clearTimeout(this.playAudioTime)
- try {
- this.bufferSource?.stop();
- this.onStop?.(this.audioDatas);
- } catch (e) {
- console.log(e);
- }
- }
- getAudioDataBlob(type: ISaveAudioData) {
- if (!this.pcmAudioDatas?.length) {
- return undefined
- }
- if (type === "wav") {
- return geWAV(this.pcmAudioDatas, this.fromSampleRate, 16)
- }
- return getPCM(this.pcmAudioDatas)
- }
- }
- export default AudioPlayer;
复制代码 接下来就是对代码进行二次开辟,我不太会用文字去解说代码和流程,我就跟我们项目的需求来简单的说一下改动代码,针对在play音频的时候,判断一下音频的状态(iOS自动播放音乐是suspended状态);
- if (this.audioContext.state === 'suspended') {
- this.audioContext.resume().then(() => {
- console.log('AudioContext resumed during play');
- }).catch(err => {
- console.error('Failed to resume AudioContext:', err);
- });
- }
复制代码 确保 AudioContext 在调用音频播放功能之前处于活动状态,同时根据我们的需求添加了暂停播放的功能
- // 暂停播放
- pause() {
- if (this.audioContext && this.status === "play") {
- this.audioContext.suspend();
- this.status = "pause";
- this.onPause?.();
- }
- }
- // 恢复播放
- resume() {
- if (this.audioContext && this.status === "pause") {
- console.log(this.status)
- this.audioContext.resume();
- this.status = "play";
- this.onResume?.();
- this.playAudio();
- }
- }
复制代码 这个就是这次改动的关键,一会我会放上完整的代码和调用,这个就是通过交互事故(如 click 或 touchstart)解锁 AudioContext,以应对IOS欣赏器默认暂停 AudioContext 的问题。
- // 解锁 AudioContext
- unlockAudioContext() {
- if (!this.audioContext) {
- this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
- }
- if (this.audioContext.state === 'suspended') {
- const unlock = () => {
- this.audioContext?.resume().then(() => {
- this.isAudioContextUnlocked = true; // 标记为已解锁
- console.log('AudioContext unlocked');
- document.removeEventListener('click', unlock); // 解锁后移除事件监听
- document.removeEventListener('touchstart', unlock);
- }).catch(err => {
- console.error('Failed to unlock AudioContext:', err);
- });
- };
- document.addEventListener('click', unlock);
- document.addEventListener('touchstart', unlock);
- } else {
- this.isAudioContextUnlocked = true; // 已经解锁
- }
- }
复制代码 所以完整版的AudioPlayer.ts代码是
- type IStatus = "uninit" | "init" | "play" | "pause" | "stop";let currentPlayer: AudioPlayer | null = null; class AudioPlayer { constructor(processorPath?: string) { this.processor = new Worker(`${processorPath}/processor.worker.js`); } private toSampleRate: number = 22050; private resumePlayDuration: number = 1000; private fromSampleRate: number = 16000; private isAudioDataEnded: boolean = false; private playAudioTime?: any; private status: IStatus = "uninit"; private audioContext?: AudioContext; private bufferSource?: AudioBufferSourceNode; private audioDatas: Float32Array[] = []; private pcmAudioDatas: Int16Array[] = []; private audioDataOffset: number = 0; private processor: Worker; private isAudioContextUnlocked: boolean = false; // 标志 AudioContext 是否解锁 private onPlay?: () => void; private onPause?: () => void; // 添加 pause 回调 private onResume?: () => void; // 添加 resume 回调 private onStop?: (audioDatas: Float32Array[]) => void; private onPlaybackComplete?: () => void; // 添加音频播放完成回调 // 设置音频数据 postMessage({ type, data, isLastData }: { type: "base64" | "string" | "Int16Array" | "Float32Array"; data: string | Int16Array | Float32Array; isLastData: boolean; }) { if (this.status === "uninit") { return; } this.processor.postMessage({ type, data, }); this.isAudioDataEnded = isLastData; } // 播放音频 private playAudio() { clearTimeout(this.playAudioTime); if (this.audioContext) { let audioDataLen = 0; for (let i = this.audioDataOffset; i < this.audioDatas.length; i++) { audioDataLen += this.audioDatas[i].length; } if (!audioDataLen) { if (this.status === "play") { if (this.isAudioDataEnded || this.resumePlayDuration <= 0) { this.stop(); // 音频播放完成时调用回调 this.onPlaybackComplete?.(); } else { this.playAudioTime = setTimeout(() => { this.playAudio(); }, this.resumePlayDuration); } } return; } let audioBuffer = this.audioContext.createBuffer( 1, audioDataLen, this.toSampleRate ); let channelData = audioBuffer.getChannelData(0); let audioData = this.audioDatas[this.audioDataOffset]; let offset = 0; while (audioData) { this.audioDataOffset += 1; if (audioBuffer.copyToChannel) { audioBuffer.copyToChannel(audioData, 0, offset); offset += audioData.length; } else { for (let i = 0; i < audioData.length; i++) { channelData[i] = audioData[i]; } } audioData = this.audioDatas[this.audioDataOffset]; } let bufferSource = this.audioContext.createBufferSource(); this.bufferSource = bufferSource; bufferSource.buffer = audioBuffer; bufferSource.connect(this.audioContext.destination); bufferSource.start(); bufferSource.onended = () => { if (this.status !== "play") { return; } if (this.audioDatas.length) { this.playAudio(); } else if (this.isAudioDataEnded || this.resumePlayDuration <= 0) { this.stop(); } else { this.playAudioTime = setTimeout(() => { this.playAudio(); }, this.resumePlayDuration); } }; } } // 添加播放完毕的回调 setPlaybackCompleteCallback(callback: () => void) { this.onPlaybackComplete = callback; } // 重置播放器 reset() { this.processor.onmessage = null; this.audioDataOffset = 0; this.audioDatas = []; this.pcmAudioDatas = []; this.status = "uninit"; this.isAudioDataEnded = false; clearTimeout(this.playAudioTime); try { this.bufferSource?.stop(); } catch (e) { console.log(e); } } // 初始化播放器并设置参数 start({ autoPlay = true, sampleRate = 16000, resumePlayDuration = 1000, }: { autoPlay?: boolean; sampleRate?: number; resumePlayDuration?: number; } = {}) { this.reset(); this.status = "init"; this.resumePlayDuration = resumePlayDuration; let fromSampleRate = sampleRate; let toSampleRate = Math.max(fromSampleRate, 22050); toSampleRate = Math.min(toSampleRate, 96000); this.fromSampleRate = fromSampleRate; this.toSampleRate = toSampleRate; this.processor.postMessage({ type: "init", data: { fromSampleRate, toSampleRate, }, }); this.processor.onmessage = (event) => { const { audioData, pcmAudioData } = event.data; this.audioDatas.push(audioData); this.pcmAudioDatas.push(pcmAudioData); if (this.audioDatas.length === 1 && autoPlay && this.status === "init") { this.play(); } }; } // 解锁 AudioContext
- unlockAudioContext() {
- if (!this.audioContext) {
- this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
- }
- if (this.audioContext.state === 'suspended') {
- const unlock = () => {
- this.audioContext?.resume().then(() => {
- this.isAudioContextUnlocked = true; // 标记为已解锁
- console.log('AudioContext unlocked');
- document.removeEventListener('click', unlock); // 解锁后移除事件监听
- document.removeEventListener('touchstart', unlock);
- }).catch(err => {
- console.error('Failed to unlock AudioContext:', err);
- });
- };
- document.addEventListener('click', unlock);
- document.addEventListener('touchstart', unlock);
- } else {
- this.isAudioContextUnlocked = true; // 已经解锁
- }
- } // 停止当前音频并播放新音频 pauseCurrentAudio() { if (currentPlayer) { currentPlayer.stop(); // 彻底停止当前音频 currentPlayer = null; // 停止后清除当前播放器实例 } } // 播放音频 play() { if (currentPlayer) { currentPlayer.stop(); // 停止当前音频 } if (!this.audioContext) { this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)(); } if (this.audioContext.state === 'suspended') {
- this.audioContext.resume().then(() => {
- console.log('AudioContext resumed during play');
- }).catch(err => {
- console.error('Failed to resume AudioContext:', err);
- });
- } if (this.audioContext) { currentPlayer = this; this.status = "play"; this.onPlay?.(); this.playAudio(); } } // 暂停播放
- pause() {
- if (this.audioContext && this.status === "play") {
- this.audioContext.suspend();
- this.status = "pause";
- this.onPause?.();
- }
- }
- // 恢复播放
- resume() {
- if (this.audioContext && this.status === "pause") {
- console.log(this.status)
- this.audioContext.resume();
- this.status = "play";
- this.onResume?.();
- this.playAudio();
- }
- } // 停止播放 stop() { this.audioDataOffset = 0; this.status = "stop"; clearTimeout(this.playAudioTime); try { this.bufferSource?.stop(); this.onStop?.(this.audioDatas); } catch (e) { console.log(e); } }}export default AudioPlayer;
复制代码 说完AudioPlayer。下面是我封装好的文字转音频方法,必要把appId,apiKey和apiSecret更换为你自己的,肯定要留意把demo里面的processor.worker.js导入到你的项目里面
- import { useState, useRef, useCallback, useEffect } from 'react';
- import CryptoJS from 'crypto-js';
- import { Base64 } from 'js-base64';
- import AudioPlayer from '../utils/audio.umd';
- const Status = {
- UNDEFINED: 'UNDEFINED',
- CONNECTING: 'CONNECTING',
- STOP: 'STOP',
- };
- interface UseTextToSpeechOptions {
- onPlaybackComplete: () => void;
- }
- const appId = '';
- const apiKey = '';
- const apiSecret = '';
- const getWebSocketUrl = (): string => {
- const host = 'tts-api.xfyun.cn';
- const date = new Date().toGMTString();
- const algorithm = 'hmac-sha256';
- const headers = 'host date request-line';
- const signatureOrigin = `host: ${host}\ndate: ${date}\nGET /v2/tts HTTP/1.1`;
- const signatureSha = CryptoJS.HmacSHA256(signatureOrigin, apiSecret);
- const signature = CryptoJS.enc.Base64.stringify(signatureSha);
- const authorizationOrigin = `api_key="${apiKey}", algorithm="${algorithm}", headers="${headers}", signature="${signature}"`;
- const authorization = btoa(authorizationOrigin);
- return `wss://tts-api.xfyun.cn/v2/tts?authorization=${authorization}&date=${date}&host=${host}`;
- };
- const encodeText = (text: string): string => {
- return Base64.encode(text);
- };
- const useTextToSpeech = ({ onPlaybackComplete }: UseTextToSpeechOptions) => {
- const [error, setError] = useState<string | null>(null);
- const [status, setStatus] = useState(Status.UNDEFINED);
- const audioPlayerRef = useRef<any>(null);
- const socketRef = useRef<WebSocket | null>(null);
- if (!audioPlayerRef.current) {
- audioPlayerRef.current = new AudioPlayer("");
- }
- // 设置音频播放完成回调
- useEffect(() => {
- if (audioPlayerRef.current) {
- console.log("调用结束");
- audioPlayerRef.current.setPlaybackCompleteCallback(() => {
- onPlaybackComplete();
- });
- }
- }, [onPlaybackComplete]);
- const changeStatus = (newStatus: string) => {
- setStatus(newStatus);
- };
- const startSynthesis = useCallback(async (text: string) => {
- if (!audioPlayerRef.current) {
- audioPlayerRef.current = new AudioPlayer("");
- }
-
- audioPlayerRef.current.unlockAudioContext();
-
- const ws = new WebSocket(getWebSocketUrl());
- socketRef.current = ws;
-
- ws.onopen = (e) => {
- audioPlayerRef.current.start({
- autoPlay: true,
- sampleRate: 16000,
- resumePlayDuration: 1000
- });
- changeStatus(Status.CONNECTING);
- var params = {
- common: {
- app_id: appId,
- },
- business: { aue: 'raw', auf: 'audio/L16;rate=16000', vcn: 'x4_doudou', tte: 'UTF8' },
- data: {
- status: 2,
- text: encodeText(text),
- },
- };
- ws.send(JSON.stringify(params));
- };
-
- ws.onmessage = async (event: MessageEvent) => {
- const response = JSON.parse(event.data);
- if (response.code !== 0) {
- setError(`合成错误: ${response.code}`);
- changeStatus(Status.UNDEFINED);
- ws.close();
- return;
- }
- audioPlayerRef.current?.postMessage({
- type: "base64",
- data: response.data.audio,
- isLastData: response.data.status === 2,
- });
- if (response.code === 0 && response.data.status === 2) {
- ws.close();
- }
- };
-
- ws.onerror = (err) => {
- console.error('WebSocket 发生错误:', err);
- setError('WebSocket 错误');
- changeStatus(Status.UNDEFINED);
- };
-
- ws.onclose = () => {
- console.log('WebSocket 已关闭');
- changeStatus(Status.STOP);
- };
- }, [onPlaybackComplete]);
- const stopSynthesis = useCallback(() => {
- if (socketRef.current) {
- socketRef.current.close();
- socketRef.current = null;
- console.log('WebSocket 连接已手动关闭');
- changeStatus(Status.STOP);
- }
- if (audioPlayerRef.current) {
- audioPlayerRef.current.stop();
- audioPlayerRef.current = null;
- }
- }, []);
- useEffect(() => {
- return () => {
- stopSynthesis();
- };
- }, [stopSynthesis]);
- return { startSynthesis, stopSynthesis, status, error };
- };
- export default useTextToSpeech;
复制代码 每次通过startSynthesis调用文字转音频方法的时候,都先audioPlayerRef.current.unlockAudioContext();调用一下确保音频上下文可以在欣赏器环境中正常初始化(避免被静音计谋阻止)
代码还有很多逻辑可能处理的不好,也是第一次写分享,有问题的话可以一起讨论沟通一下啊
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |