We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
在ios15.1的情况下,它能收到声音,但是不会出字,
同样的运行环境下,ios15.2或者17+的时候,就没有问题
目前ios16.0也出现过这个问题 今早定位了一下,发现音频录制完之后,基本都是机器杂音,但是听最终的结果,音频文件又是好的。实时处理中的数据,都是杂音。
下面是代码情况
javascript
//必须引入的核心 import Recorder from "recorder-core";
//录制wav格式的用这一句就行 import "recorder-core/src/engine/wav"; import "recorder-core/src/engine/pcm"; //可选的插件支持项,这个是波形可视化插件 import "recorder-core/src/extensions/waveview";
//创建录音对象 recorderObj.value = Recorder({ type: "wav", //录音格式,可以换成 wav,mp3 等其他格式 sampleRate: 16000, //录音的采样率,越大细节越丰富越细腻 bitRate: 16, //录音的比特率,越大音质越好 onProcess: ( buffers, powerLevel, bufferDuration, bufferSampleRate, newBufferIdx, asyncEnd ) => { //录音实时回调,大约1秒调用12次本回调 //实时上传(发送)数据 handleProcess(buffers, bufferSampleRate, false); //实时绘制波形 if (wave.value) wave.value.input( buffers[buffers.length - 1], powerLevel, bufferSampleRate ); }, }); //打开录音,获得权限 recorderObj.value.open( async () => { console.log("录音已打开"); if (waveRef.value) { //创建音频可视化图形绘制对象 wave.value = Recorder.WaveView({ elem: waveRef.value }); } await openWebSocket(); }, (msg, isUserNotAllow) => { //用户拒绝了录音权限,或者浏览器不支持录音 if (isUserNotAllow) { Toast({ message: "请去开启录音权限", duration: 2000, }); } console.log((isUserNotAllow ? "UserNotAllow," : "") + "无法录音:" + msg); } );
const handleProcess = (buffers, bufferSampleRate, isClose) => { const t1 = Date.now(); if (lastTimeSendTime === 0) { //第一次进入,初始化值 lastTimeSendTime = t1; realTimeSendTryEncBusy = 0; realTimeSendTryChunk = null; } if (!isClose && t1 - lastTimeSendTime < SendInterval) { return; //控制缓冲达到指定间隔才进行传输 } lastTimeSendTime = t1; let pcm = []; if (buffers.length > 0) { //借用 SampleData 函数进行数据的连续处理,采样率转换是顺带的,得到新的pcm数据 const chunk = Recorder.SampleData( buffers, bufferSampleRate, testSampleRate, realTimeSendTryChunk, { frameType: isClose ? "" : realTimeSendTryType } ); //这里不清除数据,因为需要最终存到后端进行保存 realTimeSendTryChunk = chunk; //此时的chunk.data就是原始的音频16位pcm数据(小端LE),直接保存即为16位pcm文件、加个wav头即为wav文件、丢给mp3编码器转一下码即为mp3文件 pcm = chunk.data; } //没有新数据,或结束时的数据量太小,不能进行mock转码 if (pcm.length === 0 || (isClose && pcm.length < 2000)) { sendData(null); return; } //实时编码队列阻塞处理 if (!isClose) { if (realTimeSendTryEncBusy >= 2) { console.log("编码队列阻塞,已丢弃一帧", 1); return; } } //队列 ++ realTimeSendTryEncBusy++; //通过mock方法实时转码成mp3、wav;16位pcm格式可以不经过此操作,直接发送new Blob([pcm.buffer],{type:"audio/pcm"}) 要8位的就必须转码 sendData(new Blob([pcm.buffer], { type: "audio/pcm" })); //编码队列数 -1,让下一帧数据可以进来 realTimeSendTryEncBusy && realTimeSendTryEncBusy--; };
const openWebSocket = () => { recordingStatus.value = "正在连接阿里云"; const TOKEN = tokenInfo.value.token; const URL = wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1?token=${TOKEN};
wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1?token=${TOKEN}
//连接到阿里云服务器 websocket.value = new WebSocket(URL); //连接成功事件 websocket.value.onopen = () => { console.error('websocket连接成功') const instruction = { header: { appkey: tokenInfo.value.appKey, message_id: generateUniqueID(), task_id: generateUniqueID(), namespace: "SpeechTranscriber", name: "StartTranscription", }, payload: { format: "PCM", sample_rate: 16000, enable_intermediate_result: true, enable_punctuation_prediction: true, enable_inverse_text_normalization: true, max_sentence_silence: 1000, speech_noise_threshold: 0.9 }, }; //连接成功后,发送开始任务的指令,文档:https://help.aliyun.com/document_detail/324262.html?spm=a2c4g.84428.0.0.756c589dV4l84r websocket.value.send(JSON.stringify(instruction)); }; //收到消息之后的处理 websocket.value.onmessage = (e) => { const ret = JSON.parse(e.data); console.log('websocket', ret) //每个事件的含义,可以参考官方文档: https://help.aliyun.com/document_detail/324262.html?spm=a2c4g.84428.0.0.756c589dV4l84r //表示识别结果发生了变化。一般发生在动态修正的情况 if (ret.header.name === "TranscriptionResultChanged") { console.log(ret.payload.result, "resultTranscriptionResultChanged---") onTextChanged(ret.payload.result); } else if (ret.header.name === "SentenceBegin") { //表示服务端检测到了一句话的开始。这里往会话记录中加入一个元素,达到断句的效果 console.log("SentenceBegin--------") recordings.value.push(""); } else if (ret.header.name === "TranscriptionStarted") { //表示服务端已经准备好了进行识别,客户端可以发送音频数据了。这时开启麦克风,开始实时传送数据 recStart(); } else if (ret.header.name === "SentenceEnd") { console.log("SentenceEndssssss--------") //表示服务端检测到了一句话的结束。处理数据和展示 onTextFinished(ret.payload.result); } else if (ret.header.name === "TranscriptionCompleted") { //表示服务端已停止了语音转写。可以做一些别的操作,例如关闭 websocket 连接,关闭麦克风等 Toast({ message: "服务端已停止了语音转写", forbidClick: true, duration: 1000, }); console.log("服务端已停止了语音转写", ret); } else { console.log("response: ", e); } }; };
The text was updated successfully, but these errors were encountered:
如果实时处理中的数据不正常,那最后得到的录音文件应该也是不正常的,但你这个就比较奇怪了,最后得到的文件又是正常的,不清楚是哪里的问题😂
Sorry, something went wrong.
那些有问题的,是不是每次都有问题,还是前面1 2次正常,后面就不正常了?
有一个人的手机就是前面几句挺正常,但是后面就不正常了.里面听起来很多杂音 iOS17或以上版本的就没有此问题。15.1和15.0的都有问题。15.2的就没问题。真的是好奇怪,我前面以为是阿里云没返回数据,后面将我传过去的数据抓到,发现全是杂音
有时候通过刷新能解决此问题,在 stop 后,我加上了Recorder.Destory方法。貌似能解决一些问题。我的这个项目在进入录音之前有调用过播放声音的,也有可能播放音频的没释放干净?后面我拿测试机升级到17.6,就复现不出问题了😂
No branches or pull requests
javascript
//必须引入的核心
import Recorder from "recorder-core";
//录制wav格式的用这一句就行
import "recorder-core/src/engine/wav";
import "recorder-core/src/engine/pcm";
//可选的插件支持项,这个是波形可视化插件
import "recorder-core/src/extensions/waveview";
//创建录音对象
recorderObj.value = Recorder({
type: "wav", //录音格式,可以换成 wav,mp3 等其他格式
sampleRate: 16000, //录音的采样率,越大细节越丰富越细腻
bitRate: 16, //录音的比特率,越大音质越好
onProcess: (
buffers,
powerLevel,
bufferDuration,
bufferSampleRate,
newBufferIdx,
asyncEnd
) => {
//录音实时回调,大约1秒调用12次本回调
//实时上传(发送)数据
handleProcess(buffers, bufferSampleRate, false);
//实时绘制波形
if (wave.value)
wave.value.input(
buffers[buffers.length - 1],
powerLevel,
bufferSampleRate
);
},
});
//打开录音,获得权限
recorderObj.value.open(
async () => {
console.log("录音已打开");
if (waveRef.value) {
//创建音频可视化图形绘制对象
wave.value = Recorder.WaveView({ elem: waveRef.value });
}
await openWebSocket();
},
(msg, isUserNotAllow) => {
//用户拒绝了录音权限,或者浏览器不支持录音
if (isUserNotAllow) {
Toast({
message: "请去开启录音权限",
duration: 2000,
});
}
console.log((isUserNotAllow ? "UserNotAllow," : "") + "无法录音:" + msg);
}
);
const handleProcess = (buffers, bufferSampleRate, isClose) => {
const t1 = Date.now();
if (lastTimeSendTime === 0) {
//第一次进入,初始化值
lastTimeSendTime = t1;
realTimeSendTryEncBusy = 0;
realTimeSendTryChunk = null;
}
if (!isClose && t1 - lastTimeSendTime < SendInterval) {
return; //控制缓冲达到指定间隔才进行传输
}
lastTimeSendTime = t1;
let pcm = [];
if (buffers.length > 0) {
//借用 SampleData 函数进行数据的连续处理,采样率转换是顺带的,得到新的pcm数据
const chunk = Recorder.SampleData(
buffers,
bufferSampleRate,
testSampleRate,
realTimeSendTryChunk,
{ frameType: isClose ? "" : realTimeSendTryType }
);
//这里不清除数据,因为需要最终存到后端进行保存
realTimeSendTryChunk = chunk;
//此时的chunk.data就是原始的音频16位pcm数据(小端LE),直接保存即为16位pcm文件、加个wav头即为wav文件、丢给mp3编码器转一下码即为mp3文件
pcm = chunk.data;
}
//没有新数据,或结束时的数据量太小,不能进行mock转码
if (pcm.length === 0 || (isClose && pcm.length < 2000)) {
sendData(null);
return;
}
//实时编码队列阻塞处理
if (!isClose) {
if (realTimeSendTryEncBusy >= 2) {
console.log("编码队列阻塞,已丢弃一帧", 1);
return;
}
}
//队列 ++
realTimeSendTryEncBusy++;
//通过mock方法实时转码成mp3、wav;16位pcm格式可以不经过此操作,直接发送new Blob([pcm.buffer],{type:"audio/pcm"}) 要8位的就必须转码
sendData(new Blob([pcm.buffer], { type: "audio/pcm" }));
//编码队列数 -1,让下一帧数据可以进来
realTimeSendTryEncBusy && realTimeSendTryEncBusy--;
};
const openWebSocket = () => {
recordingStatus.value = "正在连接阿里云";
const TOKEN = tokenInfo.value.token;
const URL =
wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1?token=${TOKEN}
;//连接到阿里云服务器
websocket.value = new WebSocket(URL);
//连接成功事件
websocket.value.onopen = () => {
console.error('websocket连接成功')
const instruction = {
header: {
appkey: tokenInfo.value.appKey,
message_id: generateUniqueID(),
task_id: generateUniqueID(),
namespace: "SpeechTranscriber",
name: "StartTranscription",
},
payload: {
format: "PCM",
sample_rate: 16000,
enable_intermediate_result: true,
enable_punctuation_prediction: true,
enable_inverse_text_normalization: true,
max_sentence_silence: 1000,
speech_noise_threshold: 0.9
},
};
//连接成功后,发送开始任务的指令,文档:https://help.aliyun.com/document_detail/324262.html?spm=a2c4g.84428.0.0.756c589dV4l84r
websocket.value.send(JSON.stringify(instruction));
};
//收到消息之后的处理
websocket.value.onmessage = (e) => {
const ret = JSON.parse(e.data);
console.log('websocket', ret)
//每个事件的含义,可以参考官方文档: https://help.aliyun.com/document_detail/324262.html?spm=a2c4g.84428.0.0.756c589dV4l84r
//表示识别结果发生了变化。一般发生在动态修正的情况
if (ret.header.name === "TranscriptionResultChanged") {
console.log(ret.payload.result, "resultTranscriptionResultChanged---")
onTextChanged(ret.payload.result);
} else if (ret.header.name === "SentenceBegin") {
//表示服务端检测到了一句话的开始。这里往会话记录中加入一个元素,达到断句的效果
console.log("SentenceBegin--------")
recordings.value.push("");
} else if (ret.header.name === "TranscriptionStarted") {
//表示服务端已经准备好了进行识别,客户端可以发送音频数据了。这时开启麦克风,开始实时传送数据
recStart();
} else if (ret.header.name === "SentenceEnd") {
console.log("SentenceEndssssss--------")
//表示服务端检测到了一句话的结束。处理数据和展示
onTextFinished(ret.payload.result);
} else if (ret.header.name === "TranscriptionCompleted") {
//表示服务端已停止了语音转写。可以做一些别的操作,例如关闭 websocket 连接,关闭麦克风等
Toast({
message: "服务端已停止了语音转写",
forbidClick: true,
duration: 1000,
});
console.log("服务端已停止了语音转写", ret);
} else {
console.log("response: ", e);
}
};
};
The text was updated successfully, but these errors were encountered: