圆咕噜咕噜 发表于 2024-12-30 22:34:03

讯飞文字合成语音在iOS手机上的坑

最近在搞一个移动端的react项目,必要用的讯飞的文字转语音的api开辟,考虑后让我们前端自己处理了,所以自己就去研究了讯飞的web文档语音合成(流式版)WebAPI 文档 | 讯飞开放平台文档中央
其时把demo下载下来在欣赏器运行的时候觉得结果还可以,就直接采用这个方法进行开辟了,随后部署到服务器的时候,才想起来iOS手机上一个很致命的问题,iOS手机和欣赏器不答应自动播放音频文件,因此在运行讯飞demo的时候你就会发现,输入文字合成音频后不会播放(可能是我自己的方法的问题),必要再次手动触发一次点击?我们的需求是点击文字就直接进行播放,天塌了
又重新研究了一下demo的代码,结果demo里面关键的AudioPlayer(index.umd.js)是压缩的代码,没办法就提交了工单,问技能职员要到了未压缩的版本,有兴趣的可以自己研究一下,代码如下:
import {geWAV, getPCM} from "./download"
type IStatus = "uninit" | "init" | "play" | "pause" | "stop";
type ISaveAudioData = "pcm" | "wav";


class AudioPlayer {
constructor(processorPath?: string) {
    this.processor = new Worker(`${processorPath}/processor.worker.js`);
}
private toSampleRate: number = 22050;
private resumePlayDuration: number = 1000;
private fromSampleRate: number = 16000;
private isAudioDataEnded: boolean = false;
private playAudioTime?: any
private status: IStatus = "uninit";
private audioContext?: AudioContext;
private bufferSource?: AudioBufferSourceNode;
private audioDatas: Float32Array[] = [];
private pcmAudioDatas: Int16Array[] = [];
private audioDataOffset: number = 0;
private processor: Worker;
postMessage({
    type,
    data,
    isLastData
}: {
    type: "base64" | "string" | "Int16Array" | "Float32Array";
    data: string | Int16Array | Float32Array;
    isLastData: boolean
}) {
    if (this.status === "uninit") {
      return
    }
    this.processor.postMessage({
      type,
      data,
    });
    this.isAudioDataEnded = isLastData;
}
private onPlay?: () => void;
private onStop?: (audioDatas: Float32Array[]) => void;
private playAudio() {
    clearTimeout(this.playAudioTime)
    if (this.audioContext) {
      let audioDataLen = 0;
      for (let i = this.audioDataOffset; i < this.audioDatas.length; i++) {
      audioDataLen += this.audioDatas.length;
      }
      if (!audioDataLen) {
      if (this.status === "play") {
          if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {
            this.stop();
          } else {
            this.playAudioTime = setTimeout(() => {
            this.playAudio()
            }, this.resumePlayDuration)
          }
      }
      return;
      }
      let audioBuffer = this.audioContext.createBuffer(
      1,
      audioDataLen,
      this.toSampleRate
      );
      let channelData = audioBuffer.getChannelData(0);
      let audioData = this.audioDatas;
      let offset = 0;
      while (audioData) {
      this.audioDataOffset += 1;
      if (audioBuffer.copyToChannel) {
          audioBuffer.copyToChannel(audioData, 0, offset);
          offset += audioData.length;
      } else {
          for (let i = 0; i < audioData.length; i++) {
            channelData = audioData;
          }
      }
      audioData = this.audioDatas;
      }
      let bufferSource = this.audioContext.createBufferSource();
      this.bufferSource = bufferSource;
      bufferSource.buffer = audioBuffer;
      bufferSource.connect(this.audioContext.destination);
      bufferSource.start();
      bufferSource.onended = (event) => {
      if (this.status !== "play") {
          return;
      }
      if (this.audioDatas.length) {
          this.playAudio();
      } else if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {
          this.stop();
      } else {
          this.playAudioTime = setTimeout(() => {
            this.playAudio()
          }, this.resumePlayDuration)
      }
      };
    }
}
reset() {
    this.processor.onmessage = null;
    this.audioDataOffset = 0;
    this.audioDatas = [];
    this.pcmAudioDatas = [];
    this.status = "uninit";
    this.isAudioDataEnded = false;
    clearTimeout(this.playAudioTime)
    try {
      this.bufferSource?.stop();
    } catch (e) {
      console.log(e);
    }
}
start({
    autoPlay = true,
    sampleRate = 16000,
    resumePlayDuration = 1000
}: {
    autoPlay?: boolean;
    sampleRate?: number;
    resumePlayDuration?: number;
} = {}) {
    this.reset();
    this.status = "init";
    this.resumePlayDuration = resumePlayDuration
    let fromSampleRate = sampleRate;
    let toSampleRate = Math.max(fromSampleRate, 22050);
    toSampleRate = Math.min(toSampleRate, 96000);
    this.fromSampleRate = fromSampleRate;
    this.toSampleRate = toSampleRate;
    this.processor.postMessage({
      type: "init",
      data: {
      fromSampleRate,
      toSampleRate,
      },
    });
    this.processor.onmessage = (event) => {
      const { audioData, pcmAudioData } = event.data;
      this.audioDatas.push(audioData);
      this.pcmAudioDatas.push(pcmAudioData);
      if (this.audioDatas.length === 1 && autoPlay && this.status === "init") {
      this.play();
      }
    };
}
play() {
    if (!this.audioContext) {
      this.audioContext = new (window.AudioContext ||
      (window as any).webkitAudioContext)();
      this.audioContext.resume();
    }
    if (this.audioContext) {
      this.status = "play";
      this.onPlay?.();
      this.playAudio();
    }
}
stop() {
    this.audioDataOffset = 0;
    this.status = "stop";
    clearTimeout(this.playAudioTime)
    try {
      this.bufferSource?.stop();
      this.onStop?.(this.audioDatas);
    } catch (e) {
      console.log(e);
    }
}
getAudioDataBlob(type: ISaveAudioData) {
    if (!this.pcmAudioDatas?.length) {
      return undefined
    }
    if (type === "wav") {
      return geWAV(this.pcmAudioDatas, this.fromSampleRate, 16)
    }
    return getPCM(this.pcmAudioDatas)
}
}

export default AudioPlayer;
接下来就是对代码进行二次开辟,我不太会用文字去解说代码和流程,我就跟我们项目的需求来简单的说一下改动代码,针对在play音频的时候,判断一下音频的状态(iOS自动播放音乐是suspended状态);
if (this.audioContext.state === 'suspended') {
      this.audioContext.resume().then(() => {
      console.log('AudioContext resumed during play');
      }).catch(err => {
      console.error('Failed to resume AudioContext:', err);
      });
    } 确保 AudioContext 在调用音频播放功能之前处于活动状态,同时根据我们的需求添加了暂停播放的功能
// 暂停播放
pause() {
    if (this.audioContext && this.status === "play") {
      this.audioContext.suspend();
      this.status = "pause";
      this.onPause?.();
    }
}

// 恢复播放
resume() {
    if (this.audioContext && this.status === "pause") {
      console.log(this.status)
      this.audioContext.resume();
      this.status = "play";
      this.onResume?.();
      this.playAudio();
    }
} 这个就是这次改动的关键,一会我会放上完整的代码和调用,这个就是通过交互事故(如 click 或 touchstart)解锁 AudioContext,以应对IOS欣赏器默认暂停 AudioContext 的问题。
// 解锁 AudioContext
unlockAudioContext() {
    if (!this.audioContext) {
      this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
    }
    if (this.audioContext.state === 'suspended') {
      const unlock = () => {
      this.audioContext?.resume().then(() => {
          this.isAudioContextUnlocked = true; // 标记为已解锁
          console.log('AudioContext unlocked');
          document.removeEventListener('click', unlock); // 解锁后移除事件监听
          document.removeEventListener('touchstart', unlock);
      }).catch(err => {
          console.error('Failed to unlock AudioContext:', err);
      });
      };
      document.addEventListener('click', unlock);
      document.addEventListener('touchstart', unlock);
    } else {
      this.isAudioContextUnlocked = true; // 已经解锁
    }
} 所以完整版的AudioPlayer.ts代码是
type IStatus = "uninit" | "init" | "play" | "pause" | "stop";let currentPlayer: AudioPlayer | null = null; class AudioPlayer {constructor(processorPath?: string) {    this.processor = new Worker(`${processorPath}/processor.worker.js`);}private toSampleRate: number = 22050;private resumePlayDuration: number = 1000;private fromSampleRate: number = 16000;private isAudioDataEnded: boolean = false;private playAudioTime?: any;private status: IStatus = "uninit";private audioContext?: AudioContext;private bufferSource?: AudioBufferSourceNode;private audioDatas: Float32Array[] = [];private pcmAudioDatas: Int16Array[] = [];private audioDataOffset: number = 0;private processor: Worker;private isAudioContextUnlocked: boolean = false; // 标志 AudioContext 是否解锁private onPlay?: () => void;private onPause?: () => void; // 添加 pause 回调private onResume?: () => void; // 添加 resume 回调private onStop?: (audioDatas: Float32Array[]) => void;private onPlaybackComplete?: () => void;// 添加音频播放完成回调// 设置音频数据postMessage({    type,    data,    isLastData}: {    type: "base64" | "string" | "Int16Array" | "Float32Array";    data: string | Int16Array | Float32Array;    isLastData: boolean;}) {    if (this.status === "uninit") {      return;    }    this.processor.postMessage({      type,      data,    });    this.isAudioDataEnded = isLastData;}// 播放音频private playAudio() {    clearTimeout(this.playAudioTime);    if (this.audioContext) {      let audioDataLen = 0;      for (let i = this.audioDataOffset; i < this.audioDatas.length; i++) {      audioDataLen += this.audioDatas.length;      }      if (!audioDataLen) {      if (this.status === "play") {          if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {            this.stop();            // 音频播放完成时调用回调            this.onPlaybackComplete?.();          } else {            this.playAudioTime = setTimeout(() => {            this.playAudio();            }, this.resumePlayDuration);          }      }      return;      }      let audioBuffer = this.audioContext.createBuffer(      1,      audioDataLen,      this.toSampleRate      );      let channelData = audioBuffer.getChannelData(0);      let audioData = this.audioDatas;      let offset = 0;      while (audioData) {      this.audioDataOffset += 1;      if (audioBuffer.copyToChannel) {          audioBuffer.copyToChannel(audioData, 0, offset);          offset += audioData.length;      } else {          for (let i = 0; i < audioData.length; i++) {            channelData = audioData;          }      }      audioData = this.audioDatas;      }      let bufferSource = this.audioContext.createBufferSource();      this.bufferSource = bufferSource;      bufferSource.buffer = audioBuffer;      bufferSource.connect(this.audioContext.destination);      bufferSource.start();      bufferSource.onended = () => {      if (this.status !== "play") {          return;      }      if (this.audioDatas.length) {          this.playAudio();      } else if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {          this.stop();      } else {          this.playAudioTime = setTimeout(() => {            this.playAudio();          }, this.resumePlayDuration);      }      };    }}// 添加播放完毕的回调setPlaybackCompleteCallback(callback: () => void) {    this.onPlaybackComplete = callback;}// 重置播放器reset() {    this.processor.onmessage = null;    this.audioDataOffset = 0;    this.audioDatas = [];    this.pcmAudioDatas = [];    this.status = "uninit";    this.isAudioDataEnded = false;    clearTimeout(this.playAudioTime);    try {      this.bufferSource?.stop();    } catch (e) {      console.log(e);    }}// 初始化播放器并设置参数start({    autoPlay = true,    sampleRate = 16000,    resumePlayDuration = 1000,}: {    autoPlay?: boolean;    sampleRate?: number;    resumePlayDuration?: number;} = {}) {    this.reset();    this.status = "init";    this.resumePlayDuration = resumePlayDuration;    let fromSampleRate = sampleRate;    let toSampleRate = Math.max(fromSampleRate, 22050);    toSampleRate = Math.min(toSampleRate, 96000);    this.fromSampleRate = fromSampleRate;    this.toSampleRate = toSampleRate;    this.processor.postMessage({      type: "init",      data: {      fromSampleRate,      toSampleRate,      },    });    this.processor.onmessage = (event) => {      const { audioData, pcmAudioData } = event.data;      this.audioDatas.push(audioData);      this.pcmAudioDatas.push(pcmAudioData);      if (this.audioDatas.length === 1 && autoPlay && this.status === "init") {      this.play();      }    };}// 解锁 AudioContext
unlockAudioContext() {
    if (!this.audioContext) {
      this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
    }
    if (this.audioContext.state === 'suspended') {
      const unlock = () => {
      this.audioContext?.resume().then(() => {
          this.isAudioContextUnlocked = true; // 标记为已解锁
          console.log('AudioContext unlocked');
          document.removeEventListener('click', unlock); // 解锁后移除事件监听
          document.removeEventListener('touchstart', unlock);
      }).catch(err => {
          console.error('Failed to unlock AudioContext:', err);
      });
      };
      document.addEventListener('click', unlock);
      document.addEventListener('touchstart', unlock);
    } else {
      this.isAudioContextUnlocked = true; // 已经解锁
    }
}// 停止当前音频并播放新音频pauseCurrentAudio() {    if (currentPlayer) {      currentPlayer.stop(); // 彻底停止当前音频      currentPlayer = null; // 停止后清除当前播放器实例    }}// 播放音频play() {    if (currentPlayer) {      currentPlayer.stop(); // 停止当前音频    }    if (!this.audioContext) {      this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();    }    if (this.audioContext.state === 'suspended') {
      this.audioContext.resume().then(() => {
      console.log('AudioContext resumed during play');
      }).catch(err => {
      console.error('Failed to resume AudioContext:', err);
      });
    }    if (this.audioContext) {      currentPlayer = this;       this.status = "play";      this.onPlay?.();      this.playAudio();    }}// 暂停播放
pause() {
    if (this.audioContext && this.status === "play") {
      this.audioContext.suspend();
      this.status = "pause";
      this.onPause?.();
    }
}

// 恢复播放
resume() {
    if (this.audioContext && this.status === "pause") {
      console.log(this.status)
      this.audioContext.resume();
      this.status = "play";
      this.onResume?.();
      this.playAudio();
    }
}// 停止播放stop() {    this.audioDataOffset = 0;    this.status = "stop";    clearTimeout(this.playAudioTime);    try {      this.bufferSource?.stop();      this.onStop?.(this.audioDatas);    } catch (e) {      console.log(e);    }}}export default AudioPlayer; 说完AudioPlayer。下面是我封装好的文字转音频方法,必要把appId,apiKey和apiSecret更换为你自己的,肯定要留意把demo里面的processor.worker.js导入到你的项目里面
import { useState, useRef, useCallback, useEffect } from 'react';
import CryptoJS from 'crypto-js';
import { Base64 } from 'js-base64';
import AudioPlayer from '../utils/audio.umd';

const Status = {
UNDEFINED: 'UNDEFINED',
CONNECTING: 'CONNECTING',
STOP: 'STOP',
};

interface UseTextToSpeechOptions {
onPlaybackComplete: () => void;
}

const appId = '';
const apiKey = '';
const apiSecret = '';

const getWebSocketUrl = (): string => {
const host = 'tts-api.xfyun.cn';
const date = new Date().toGMTString();
const algorithm = 'hmac-sha256';
const headers = 'host date request-line';
const signatureOrigin = `host: ${host}\ndate: ${date}\nGET /v2/tts HTTP/1.1`;
const signatureSha = CryptoJS.HmacSHA256(signatureOrigin, apiSecret);
const signature = CryptoJS.enc.Base64.stringify(signatureSha);
const authorizationOrigin = `api_key="${apiKey}", algorithm="${algorithm}", headers="${headers}", signature="${signature}"`;
const authorization = btoa(authorizationOrigin);
return `wss://tts-api.xfyun.cn/v2/tts?authorization=${authorization}&date=${date}&host=${host}`;
};

const encodeText = (text: string): string => {
return Base64.encode(text);
};

const useTextToSpeech = ({ onPlaybackComplete }: UseTextToSpeechOptions) => {
const = useState<string | null>(null);
const = useState(Status.UNDEFINED);
const audioPlayerRef = useRef<any>(null);
const socketRef = useRef<WebSocket | null>(null);

if (!audioPlayerRef.current) {
    audioPlayerRef.current = new AudioPlayer("");
}

// 设置音频播放完成回调
useEffect(() => {
    if (audioPlayerRef.current) {
      console.log("调用结束");
      audioPlayerRef.current.setPlaybackCompleteCallback(() => {
      onPlaybackComplete();
      });
    }
}, );

const changeStatus = (newStatus: string) => {
    setStatus(newStatus);
};

const startSynthesis = useCallback(async (text: string) => {
    if (!audioPlayerRef.current) {
      audioPlayerRef.current = new AudioPlayer("");
    }

    audioPlayerRef.current.unlockAudioContext();

    const ws = new WebSocket(getWebSocketUrl());
    socketRef.current = ws;

    ws.onopen = (e) => {
      audioPlayerRef.current.start({
      autoPlay: true,
      sampleRate: 16000,
      resumePlayDuration: 1000
      });
      changeStatus(Status.CONNECTING);
      var params = {
      common: {
          app_id: appId,
      },
      business: { aue: 'raw', auf: 'audio/L16;rate=16000', vcn: 'x4_doudou', tte: 'UTF8' },
      data: {
          status: 2,
          text: encodeText(text),
      },
      };
      ws.send(JSON.stringify(params));
    };

    ws.onmessage = async (event: MessageEvent) => {
      const response = JSON.parse(event.data);
      if (response.code !== 0) {
      setError(`合成错误: ${response.code}`);
      changeStatus(Status.UNDEFINED);
      ws.close();
      return;
      }

      audioPlayerRef.current?.postMessage({
      type: "base64",
      data: response.data.audio,
      isLastData: response.data.status === 2,
      });

      if (response.code === 0 && response.data.status === 2) {
      ws.close();
      }
    };

    ws.onerror = (err) => {
      console.error('WebSocket 发生错误:', err);
      setError('WebSocket 错误');
      changeStatus(Status.UNDEFINED);
    };

    ws.onclose = () => {
      console.log('WebSocket 已关闭');
      changeStatus(Status.STOP);
    };
}, );

const stopSynthesis = useCallback(() => {
    if (socketRef.current) {
      socketRef.current.close();
      socketRef.current = null;
      console.log('WebSocket 连接已手动关闭');
      changeStatus(Status.STOP);
    }

    if (audioPlayerRef.current) {
      audioPlayerRef.current.stop();
      audioPlayerRef.current = null;
    }
}, []);

useEffect(() => {
    return () => {
      stopSynthesis();
    };
}, );

return { startSynthesis, stopSynthesis, status, error };
};

export default useTextToSpeech;
每次通过startSynthesis调用文字转音频方法的时候,都先audioPlayerRef.current.unlockAudioContext();调用一下确保音频上下文可以在欣赏器环境中正常初始化(避免被静音计谋阻止)
代码还有很多逻辑可能处理的不好,也是第一次写分享,有问题的话可以一起讨论沟通一下啊

免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。
页: [1]
查看完整版本: 讯飞文字合成语音在iOS手机上的坑