讯飞文字合成语音在iOS手机上的坑

打印 上一主题 下一主题

主题 1019|帖子 1019|积分 3057

最近在搞一个移动端的react项目,必要用的讯飞的文字转语音的api开辟,考虑后让我们前端自己处理了,所以自己就去研究了讯飞的web文档语音合成(流式版)WebAPI 文档 | 讯飞开放平台文档中央
其时把demo下载下来在欣赏器运行的时候觉得结果还可以,就直接采用这个方法进行开辟了,随后部署到服务器的时候,才想起来iOS手机上一个很致命的问题,iOS手机和欣赏器不答应自动播放音频文件,因此在运行讯飞demo的时候你就会发现,输入文字合成音频后不会播放(可能是我自己的方法的问题),必要再次手动触发一次点击?我们的需求是点击文字就直接进行播放,天塌了
又重新研究了一下demo的代码,结果demo里面关键的AudioPlayer(index.umd.js)是压缩的代码,没办法就提交了工单,问技能职员要到了未压缩的版本,有兴趣的可以自己研究一下,代码如下:
  1. import {geWAV, getPCM} from "./download"
  2. type IStatus = "uninit" | "init" | "play" | "pause" | "stop";
  3. type ISaveAudioData = "pcm" | "wav";
  4. class AudioPlayer {
  5.   constructor(processorPath?: string) {
  6.     this.processor = new Worker(`${processorPath}/processor.worker.js`);
  7.   }
  8.   private toSampleRate: number = 22050;
  9.   private resumePlayDuration: number = 1000;
  10.   private fromSampleRate: number = 16000;
  11.   private isAudioDataEnded: boolean = false;
  12.   private playAudioTime?: any
  13.   private status: IStatus = "uninit";
  14.   private audioContext?: AudioContext;
  15.   private bufferSource?: AudioBufferSourceNode;
  16.   private audioDatas: Float32Array[] = [];
  17.   private pcmAudioDatas: Int16Array[] = [];
  18.   private audioDataOffset: number = 0;
  19.   private processor: Worker;
  20.   postMessage({
  21.     type,
  22.     data,
  23.     isLastData
  24.   }: {
  25.     type: "base64" | "string" | "Int16Array" | "Float32Array";
  26.     data: string | Int16Array | Float32Array;
  27.     isLastData: boolean
  28.   }) {
  29.     if (this.status === "uninit") {
  30.       return
  31.     }
  32.     this.processor.postMessage({
  33.       type,
  34.       data,
  35.     });
  36.     this.isAudioDataEnded = isLastData;
  37.   }
  38.   private onPlay?: () => void;
  39.   private onStop?: (audioDatas: Float32Array[]) => void;
  40.   private playAudio() {
  41.     clearTimeout(this.playAudioTime)
  42.     if (this.audioContext) {
  43.       let audioDataLen = 0;
  44.       for (let i = this.audioDataOffset; i < this.audioDatas.length; i++) {
  45.         audioDataLen += this.audioDatas[i].length;
  46.       }
  47.       if (!audioDataLen) {
  48.         if (this.status === "play") {
  49.           if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {
  50.             this.stop();
  51.           } else {
  52.             this.playAudioTime = setTimeout(() => {
  53.               this.playAudio()
  54.             }, this.resumePlayDuration)
  55.           }
  56.         }
  57.         return;
  58.       }
  59.       let audioBuffer = this.audioContext.createBuffer(
  60.         1,
  61.         audioDataLen,
  62.         this.toSampleRate
  63.       );
  64.       let channelData = audioBuffer.getChannelData(0);
  65.       let audioData = this.audioDatas[this.audioDataOffset];
  66.       let offset = 0;
  67.       while (audioData) {
  68.         this.audioDataOffset += 1;
  69.         if (audioBuffer.copyToChannel) {
  70.           audioBuffer.copyToChannel(audioData, 0, offset);
  71.           offset += audioData.length;
  72.         } else {
  73.           for (let i = 0; i < audioData.length; i++) {
  74.             channelData[i] = audioData[i];
  75.           }
  76.         }
  77.         audioData = this.audioDatas[this.audioDataOffset];
  78.       }
  79.       let bufferSource = this.audioContext.createBufferSource();
  80.       this.bufferSource = bufferSource;
  81.       bufferSource.buffer = audioBuffer;
  82.       bufferSource.connect(this.audioContext.destination);
  83.       bufferSource.start();
  84.       bufferSource.onended = (event) => {
  85.         if (this.status !== "play") {
  86.           return;
  87.         }
  88.         if (this.audioDatas.length) {
  89.           this.playAudio();
  90.         } else if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {
  91.           this.stop();
  92.         } else {
  93.           this.playAudioTime = setTimeout(() => {
  94.             this.playAudio()
  95.           }, this.resumePlayDuration)
  96.         }
  97.       };
  98.     }
  99.   }
  100.   reset() {
  101.     this.processor.onmessage = null;
  102.     this.audioDataOffset = 0;
  103.     this.audioDatas = [];
  104.     this.pcmAudioDatas = [];
  105.     this.status = "uninit";
  106.     this.isAudioDataEnded = false;
  107.     clearTimeout(this.playAudioTime)
  108.     try {
  109.       this.bufferSource?.stop();
  110.     } catch (e) {
  111.       console.log(e);
  112.     }
  113.   }
  114.   start({
  115.     autoPlay = true,
  116.     sampleRate = 16000,
  117.     resumePlayDuration = 1000
  118.   }: {
  119.     autoPlay?: boolean;
  120.     sampleRate?: number;
  121.     resumePlayDuration?: number;
  122.   } = {}) {
  123.     this.reset();
  124.     this.status = "init";
  125.     this.resumePlayDuration = resumePlayDuration
  126.     let fromSampleRate = sampleRate;
  127.     let toSampleRate = Math.max(fromSampleRate, 22050);
  128.     toSampleRate = Math.min(toSampleRate, 96000);
  129.     this.fromSampleRate = fromSampleRate;
  130.     this.toSampleRate = toSampleRate;
  131.     this.processor.postMessage({
  132.       type: "init",
  133.       data: {
  134.         fromSampleRate,
  135.         toSampleRate,
  136.       },
  137.     });
  138.     this.processor.onmessage = (event) => {
  139.       const { audioData, pcmAudioData } = event.data;
  140.       this.audioDatas.push(audioData);
  141.       this.pcmAudioDatas.push(pcmAudioData);
  142.       if (this.audioDatas.length === 1 && autoPlay && this.status === "init") {
  143.         this.play();
  144.       }
  145.     };
  146.   }
  147.   play() {
  148.     if (!this.audioContext) {
  149.       this.audioContext = new (window.AudioContext ||
  150.         (window as any).webkitAudioContext)();
  151.       this.audioContext.resume();
  152.     }
  153.     if (this.audioContext) {
  154.       this.status = "play";
  155.       this.onPlay?.();
  156.       this.playAudio();
  157.     }
  158.   }
  159.   stop() {
  160.     this.audioDataOffset = 0;
  161.     this.status = "stop";
  162.     clearTimeout(this.playAudioTime)
  163.     try {
  164.       this.bufferSource?.stop();
  165.       this.onStop?.(this.audioDatas);
  166.     } catch (e) {
  167.       console.log(e);
  168.     }
  169.   }
  170.   getAudioDataBlob(type: ISaveAudioData) {
  171.     if (!this.pcmAudioDatas?.length) {
  172.       return undefined
  173.     }
  174.     if (type === "wav") {
  175.       return geWAV(this.pcmAudioDatas, this.fromSampleRate, 16)
  176.     }
  177.     return getPCM(this.pcmAudioDatas)
  178.   }
  179. }
  180. export default AudioPlayer;
复制代码
接下来就是对代码进行二次开辟,我不太会用文字去解说代码和流程,我就跟我们项目的需求来简单的说一下改动代码,针对在play音频的时候,判断一下音频的状态(iOS自动播放音乐是suspended状态);
  1. if (this.audioContext.state === 'suspended') {
  2.       this.audioContext.resume().then(() => {
  3.         console.log('AudioContext resumed during play');
  4.       }).catch(err => {
  5.         console.error('Failed to resume AudioContext:', err);
  6.       });
  7.     }
复制代码
确保 AudioContext 在调用音频播放功能之前处于活动状态,同时根据我们的需求添加了暂停播放的功能
  1. // 暂停播放
  2.   pause() {
  3.     if (this.audioContext && this.status === "play") {
  4.       this.audioContext.suspend();
  5.       this.status = "pause";
  6.       this.onPause?.();
  7.     }
  8.   }
  9.   // 恢复播放
  10.   resume() {
  11.     if (this.audioContext && this.status === "pause") {
  12.       console.log(this.status)
  13.       this.audioContext.resume();
  14.       this.status = "play";
  15.       this.onResume?.();
  16.       this.playAudio();
  17.     }
  18.   }
复制代码
这个就是这次改动的关键,一会我会放上完整的代码和调用,这个就是通过交互事故(如 click 或 touchstart)解锁 AudioContext,以应对IOS欣赏器默认暂停 AudioContext 的问题。
  1. // 解锁 AudioContext
  2.   unlockAudioContext() {
  3.     if (!this.audioContext) {
  4.       this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
  5.     }
  6.     if (this.audioContext.state === 'suspended') {
  7.       const unlock = () => {
  8.         this.audioContext?.resume().then(() => {
  9.           this.isAudioContextUnlocked = true; // 标记为已解锁
  10.           console.log('AudioContext unlocked');
  11.           document.removeEventListener('click', unlock); // 解锁后移除事件监听
  12.           document.removeEventListener('touchstart', unlock);
  13.         }).catch(err => {
  14.           console.error('Failed to unlock AudioContext:', err);
  15.         });
  16.       };
  17.       document.addEventListener('click', unlock);
  18.       document.addEventListener('touchstart', unlock);
  19.     } else {
  20.       this.isAudioContextUnlocked = true; // 已经解锁
  21.     }
  22.   }
复制代码
所以完整版的AudioPlayer.ts代码是
  1. type IStatus = "uninit" | "init" | "play" | "pause" | "stop";let currentPlayer: AudioPlayer | null = null; class AudioPlayer {  constructor(processorPath?: string) {    this.processor = new Worker(`${processorPath}/processor.worker.js`);  }  private toSampleRate: number = 22050;  private resumePlayDuration: number = 1000;  private fromSampleRate: number = 16000;  private isAudioDataEnded: boolean = false;  private playAudioTime?: any;  private status: IStatus = "uninit";  private audioContext?: AudioContext;  private bufferSource?: AudioBufferSourceNode;  private audioDatas: Float32Array[] = [];  private pcmAudioDatas: Int16Array[] = [];  private audioDataOffset: number = 0;  private processor: Worker;  private isAudioContextUnlocked: boolean = false; // 标志 AudioContext 是否解锁  private onPlay?: () => void;  private onPause?: () => void; // 添加 pause 回调  private onResume?: () => void; // 添加 resume 回调  private onStop?: (audioDatas: Float32Array[]) => void;  private onPlaybackComplete?: () => void;  // 添加音频播放完成回调  // 设置音频数据  postMessage({    type,    data,    isLastData  }: {    type: "base64" | "string" | "Int16Array" | "Float32Array";    data: string | Int16Array | Float32Array;    isLastData: boolean;  }) {    if (this.status === "uninit") {      return;    }    this.processor.postMessage({      type,      data,    });    this.isAudioDataEnded = isLastData;  }  // 播放音频  private playAudio() {    clearTimeout(this.playAudioTime);    if (this.audioContext) {      let audioDataLen = 0;      for (let i = this.audioDataOffset; i < this.audioDatas.length; i++) {        audioDataLen += this.audioDatas[i].length;      }      if (!audioDataLen) {        if (this.status === "play") {          if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {            this.stop();            // 音频播放完成时调用回调            this.onPlaybackComplete?.();          } else {            this.playAudioTime = setTimeout(() => {              this.playAudio();            }, this.resumePlayDuration);          }        }        return;      }      let audioBuffer = this.audioContext.createBuffer(        1,        audioDataLen,        this.toSampleRate      );      let channelData = audioBuffer.getChannelData(0);      let audioData = this.audioDatas[this.audioDataOffset];      let offset = 0;      while (audioData) {        this.audioDataOffset += 1;        if (audioBuffer.copyToChannel) {          audioBuffer.copyToChannel(audioData, 0, offset);          offset += audioData.length;        } else {          for (let i = 0; i < audioData.length; i++) {            channelData[i] = audioData[i];          }        }        audioData = this.audioDatas[this.audioDataOffset];      }      let bufferSource = this.audioContext.createBufferSource();      this.bufferSource = bufferSource;      bufferSource.buffer = audioBuffer;      bufferSource.connect(this.audioContext.destination);      bufferSource.start();      bufferSource.onended = () => {        if (this.status !== "play") {          return;        }        if (this.audioDatas.length) {          this.playAudio();        } else if (this.isAudioDataEnded || this.resumePlayDuration <= 0) {          this.stop();        } else {          this.playAudioTime = setTimeout(() => {            this.playAudio();          }, this.resumePlayDuration);        }      };    }  }  // 添加播放完毕的回调  setPlaybackCompleteCallback(callback: () => void) {    this.onPlaybackComplete = callback;  }  // 重置播放器  reset() {    this.processor.onmessage = null;    this.audioDataOffset = 0;    this.audioDatas = [];    this.pcmAudioDatas = [];    this.status = "uninit";    this.isAudioDataEnded = false;    clearTimeout(this.playAudioTime);    try {      this.bufferSource?.stop();    } catch (e) {      console.log(e);    }  }  // 初始化播放器并设置参数  start({    autoPlay = true,    sampleRate = 16000,    resumePlayDuration = 1000,  }: {    autoPlay?: boolean;    sampleRate?: number;    resumePlayDuration?: number;  } = {}) {    this.reset();    this.status = "init";    this.resumePlayDuration = resumePlayDuration;    let fromSampleRate = sampleRate;    let toSampleRate = Math.max(fromSampleRate, 22050);    toSampleRate = Math.min(toSampleRate, 96000);    this.fromSampleRate = fromSampleRate;    this.toSampleRate = toSampleRate;    this.processor.postMessage({      type: "init",      data: {        fromSampleRate,        toSampleRate,      },    });    this.processor.onmessage = (event) => {      const { audioData, pcmAudioData } = event.data;      this.audioDatas.push(audioData);      this.pcmAudioDatas.push(pcmAudioData);      if (this.audioDatas.length === 1 && autoPlay && this.status === "init") {        this.play();      }    };  }  // 解锁 AudioContext
  2.   unlockAudioContext() {
  3.     if (!this.audioContext) {
  4.       this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
  5.     }
  6.     if (this.audioContext.state === 'suspended') {
  7.       const unlock = () => {
  8.         this.audioContext?.resume().then(() => {
  9.           this.isAudioContextUnlocked = true; // 标记为已解锁
  10.           console.log('AudioContext unlocked');
  11.           document.removeEventListener('click', unlock); // 解锁后移除事件监听
  12.           document.removeEventListener('touchstart', unlock);
  13.         }).catch(err => {
  14.           console.error('Failed to unlock AudioContext:', err);
  15.         });
  16.       };
  17.       document.addEventListener('click', unlock);
  18.       document.addEventListener('touchstart', unlock);
  19.     } else {
  20.       this.isAudioContextUnlocked = true; // 已经解锁
  21.     }
  22.   }  // 停止当前音频并播放新音频  pauseCurrentAudio() {    if (currentPlayer) {      currentPlayer.stop(); // 彻底停止当前音频      currentPlayer = null; // 停止后清除当前播放器实例    }  }  // 播放音频  play() {    if (currentPlayer) {      currentPlayer.stop(); // 停止当前音频    }    if (!this.audioContext) {      this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();    }    if (this.audioContext.state === 'suspended') {
  23.       this.audioContext.resume().then(() => {
  24.         console.log('AudioContext resumed during play');
  25.       }).catch(err => {
  26.         console.error('Failed to resume AudioContext:', err);
  27.       });
  28.     }    if (this.audioContext) {      currentPlayer = this;       this.status = "play";      this.onPlay?.();      this.playAudio();    }  }  // 暂停播放
  29.   pause() {
  30.     if (this.audioContext && this.status === "play") {
  31.       this.audioContext.suspend();
  32.       this.status = "pause";
  33.       this.onPause?.();
  34.     }
  35.   }
  36.   // 恢复播放
  37.   resume() {
  38.     if (this.audioContext && this.status === "pause") {
  39.       console.log(this.status)
  40.       this.audioContext.resume();
  41.       this.status = "play";
  42.       this.onResume?.();
  43.       this.playAudio();
  44.     }
  45.   }  // 停止播放  stop() {    this.audioDataOffset = 0;    this.status = "stop";    clearTimeout(this.playAudioTime);    try {      this.bufferSource?.stop();      this.onStop?.(this.audioDatas);    } catch (e) {      console.log(e);    }  }}export default AudioPlayer;
复制代码
说完AudioPlayer。下面是我封装好的文字转音频方法,必要把appId,apiKey和apiSecret更换为你自己的,肯定要留意把demo里面的processor.worker.js导入到你的项目里面
  1. import { useState, useRef, useCallback, useEffect } from 'react';
  2. import CryptoJS from 'crypto-js';
  3. import { Base64 } from 'js-base64';
  4. import AudioPlayer from '../utils/audio.umd';
  5. const Status = {
  6.   UNDEFINED: 'UNDEFINED',
  7.   CONNECTING: 'CONNECTING',
  8.   STOP: 'STOP',
  9. };
  10. interface UseTextToSpeechOptions {
  11.   onPlaybackComplete: () => void;
  12. }
  13. const appId = '';
  14. const apiKey = '';
  15. const apiSecret = '';
  16. const getWebSocketUrl = (): string => {
  17.   const host = 'tts-api.xfyun.cn';
  18.   const date = new Date().toGMTString();
  19.   const algorithm = 'hmac-sha256';
  20.   const headers = 'host date request-line';
  21.   const signatureOrigin = `host: ${host}\ndate: ${date}\nGET /v2/tts HTTP/1.1`;
  22.   const signatureSha = CryptoJS.HmacSHA256(signatureOrigin, apiSecret);
  23.   const signature = CryptoJS.enc.Base64.stringify(signatureSha);
  24.   const authorizationOrigin = `api_key="${apiKey}", algorithm="${algorithm}", headers="${headers}", signature="${signature}"`;
  25.   const authorization = btoa(authorizationOrigin);
  26.   return `wss://tts-api.xfyun.cn/v2/tts?authorization=${authorization}&date=${date}&host=${host}`;
  27. };
  28. const encodeText = (text: string): string => {
  29.   return Base64.encode(text);
  30. };
  31. const useTextToSpeech = ({ onPlaybackComplete }: UseTextToSpeechOptions) => {
  32.   const [error, setError] = useState<string | null>(null);
  33.   const [status, setStatus] = useState(Status.UNDEFINED);
  34.   const audioPlayerRef = useRef<any>(null);
  35.   const socketRef = useRef<WebSocket | null>(null);
  36.   if (!audioPlayerRef.current) {
  37.     audioPlayerRef.current = new AudioPlayer("");
  38.   }
  39.   // 设置音频播放完成回调
  40.   useEffect(() => {
  41.     if (audioPlayerRef.current) {
  42.       console.log("调用结束");
  43.       audioPlayerRef.current.setPlaybackCompleteCallback(() => {
  44.         onPlaybackComplete();
  45.       });
  46.     }
  47.   }, [onPlaybackComplete]);
  48.   const changeStatus = (newStatus: string) => {
  49.     setStatus(newStatus);
  50.   };
  51.   const startSynthesis = useCallback(async (text: string) => {
  52.     if (!audioPlayerRef.current) {
  53.       audioPlayerRef.current = new AudioPlayer("");
  54.     }
  55.   
  56.     audioPlayerRef.current.unlockAudioContext();
  57.   
  58.     const ws = new WebSocket(getWebSocketUrl());
  59.     socketRef.current = ws;
  60.   
  61.     ws.onopen = (e) => {
  62.       audioPlayerRef.current.start({
  63.         autoPlay: true,
  64.         sampleRate: 16000,
  65.         resumePlayDuration: 1000
  66.       });
  67.       changeStatus(Status.CONNECTING);
  68.       var params = {
  69.         common: {
  70.           app_id: appId,
  71.         },
  72.         business: { aue: 'raw', auf: 'audio/L16;rate=16000', vcn: 'x4_doudou', tte: 'UTF8' },
  73.         data: {
  74.           status: 2,
  75.           text: encodeText(text),
  76.         },
  77.       };
  78.       ws.send(JSON.stringify(params));
  79.     };
  80.   
  81.     ws.onmessage = async (event: MessageEvent) => {
  82.       const response = JSON.parse(event.data);
  83.       if (response.code !== 0) {
  84.         setError(`合成错误: ${response.code}`);
  85.         changeStatus(Status.UNDEFINED);
  86.         ws.close();
  87.         return;
  88.       }
  89.       audioPlayerRef.current?.postMessage({
  90.         type: "base64",
  91.         data: response.data.audio,
  92.         isLastData: response.data.status === 2,
  93.       });
  94.       if (response.code === 0 && response.data.status === 2) {
  95.         ws.close();
  96.       }
  97.     };
  98.   
  99.     ws.onerror = (err) => {
  100.       console.error('WebSocket 发生错误:', err);
  101.       setError('WebSocket 错误');
  102.       changeStatus(Status.UNDEFINED);
  103.     };
  104.   
  105.     ws.onclose = () => {
  106.       console.log('WebSocket 已关闭');
  107.       changeStatus(Status.STOP);
  108.     };
  109.   }, [onPlaybackComplete]);
  110.   const stopSynthesis = useCallback(() => {
  111.     if (socketRef.current) {
  112.       socketRef.current.close();
  113.       socketRef.current = null;
  114.       console.log('WebSocket 连接已手动关闭');
  115.       changeStatus(Status.STOP);
  116.     }
  117.     if (audioPlayerRef.current) {
  118.       audioPlayerRef.current.stop();
  119.       audioPlayerRef.current = null;
  120.     }
  121.   }, []);
  122.   useEffect(() => {
  123.     return () => {
  124.       stopSynthesis();
  125.     };
  126.   }, [stopSynthesis]);
  127.   return { startSynthesis, stopSynthesis, status, error };
  128. };
  129. export default useTextToSpeech;
复制代码
每次通过startSynthesis调用文字转音频方法的时候,都先audioPlayerRef.current.unlockAudioContext();调用一下确保音频上下文可以在欣赏器环境中正常初始化(避免被静音计谋阻止)
代码还有很多逻辑可能处理的不好,也是第一次写分享,有问题的话可以一起讨论沟通一下啊

免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。
回复

使用道具 举报

0 个回复

倒序浏览

快速回复

您需要登录后才可以回帖 登录 or 立即注册

本版积分规则

圆咕噜咕噜

论坛元老
这个人很懒什么都没写!
快速回复 返回顶部 返回列表