diff --git a/src/iflytopvoicecpp/soundbox4mic_voice_processer.cpp b/src/iflytopvoicecpp/soundbox4mic_voice_processer.cpp index d0cb10b..fc4512d 100644 --- a/src/iflytopvoicecpp/soundbox4mic_voice_processer.cpp +++ b/src/iflytopvoicecpp/soundbox4mic_voice_processer.cpp @@ -4,47 +4,22 @@ using namespace iflytop; using namespace core; #define MIC_NUM 4 -#define FRAME_LEN 128 // 必须是128,不支持其他 +#define CH_NUM (MIC_NUM + 1) +#define FRAME_NUM 128 // 必须是128,不支持其他 float s_coord_data[18] = {-0.060, 0, 0, // -0.020, 0, 0, // +0.020, 0, 0, // +0.060, 0, 0, // 0, 0, 0, // 0, 0, 0}; - -#if 0 -float *audio_aec_init(int mic_num); -int audio_aec_process(float *obj, short *mic_data, short *ref_data, short *aec_out, int *aec_state, int *aec_farend); -void audio_aec_uninit(float *obj); - -//====================doa================================================= -float *audio_doa_init(int mic_num, float *coord_data, float location_range1, float location_range2); -int audio_doa_process(float *obj, short *in_buff ,int aec_state, int aec_farend, float *doa1, float *doa2, float *doa3, int *vad_stat); -void audio_doa_uninit(float *audio_doa_obj); - -//====================gsc================================================= -float *audio_gsc_init(int mic_num, float *coord_data); -int audio_gsc_amb(float *obj, short *in_buff, int aec_state, int aec_farend, float dest_doa , float *location_obj, short *out_data, int *vad_stat); -int audio_gsc_fixed(float *obj, short *in_buff ,int aec_state, int aec_farend, float dest_doa, float int_doa, short *out_data,int *vad_stat); -void audio_gsc_uninit( float *audio_gsc_obj ); - -//====================ns================================================= -float *audio_ns_init(int mode); -int audio_ns_process(float *audio_ns_obj, short *in_data, short *out_data ,int aec_stat); -void audio_ns_uninit(float *audio_ns_obj); - -//====================agc================================================= -float *audio_agc_init(int frame_len, int mode, float arg_val); -int audio_agc_process(float *audio_agc_obj, short *in_data, short *out_data, int vad_stat, int aec_stat); -void audio_agc_uninit(float *audio_agc_obj); -#endif +#define ENABLE_AEC 1 void SoundBox4MicVoiceProcesser::initialize() { // - m_common_aec = audio_aec_init(4); + m_common_aec = audio_aec_init(MIC_NUM); - m_wakeup_ns = audio_ns_init(1); - m_wakeuop_agc = audio_agc_init(128, 1, 24000.0); + m_wakeup_ns = audio_ns_init(1); + m_wakeup_agc = audio_agc_init(128, 1, 24000.0); // | 40.00mm | 40.00mm | 40.00mm | // 60 20 0 20 60 @@ -58,79 +33,194 @@ void SoundBox4MicVoiceProcesser::initialize() { while (!thisThread.getExitFlag()) { shared_ptr audioClip; if (m_commonVoiceProcessQ.try_dequeue(audioClip)) { + processCommonVoiceInter(audioClip); + } + thisThread.sleepForMs(5); + } + })); + + m_bfAsrVoiceProcessThread.reset(new Thread("bfAsrVoiceProcessThread", [this]() { + ThisThread thisThread; + while (!thisThread.getExitFlag()) { + shared_ptr audioClip; + if (m_bfAsrVoiceProcessQ.try_dequeue(audioClip)) { + processBfAsrVoiceInter(audioClip); + } + thisThread.sleepForMs(5); + } + })); + + m_bfWakeupVoiceProcessThread.reset(new Thread("bfWakeupVoiceProcessThread", [this]() { + ThisThread thisThread; + while (!thisThread.getExitFlag()) { + shared_ptr audioClip; + if (m_bfWakeupVoiceProcessQ.try_dequeue(audioClip)) { + processBfWakeupVoiceInter(audioClip); } thisThread.sleepForMs(5); } })); + // onAfterBfWakeupVoiceProcess } void SoundBox4MicVoiceProcesser::commonVoiceProcess(shared_ptr audioClip) { ZCHECK(audioClip != nullptr, "audioClip is null"); - ZCHECK(audioClip->getCh() == 5, "audioClip ch is not 5"); + ZCHECK(audioClip->getCh() == CH_NUM, "audioClip ch is not 5"); ZCHECK(audioClip->getRate() == 16000, "audioClip sampleRate is not 16000"); ZCHECK(audioClip->getBitsPerSample() == 16, "audioClip bitsPerSample is not 16"); - // shared_ptr audioContext = make_shared(); - // audioClip->setContext("audioContext", audioContext); - m_commonVoiceProcessQ.enqueue(audioClip); } // 处理语音送给ASR void SoundBox4MicVoiceProcesser::bfAsrVoiceProcess(shared_ptr audioClip) { - shared_ptr audioContext = audioClip->getContext("audioContext"); - if (audioContext == nullptr) { - return; - } + ZCHECK(audioClip != nullptr, "audioClip is null"); + ZCHECK(audioClip->getCh() == CH_NUM, "audioClip ch is not 5"); + ZCHECK(audioClip->getRate() == 16000, "audioClip sampleRate is not 16000"); + ZCHECK(audioClip->getBitsPerSample() == 16, "audioClip bitsPerSample is not 16"); + + m_bfAsrVoiceProcessQ.enqueue(audioClip); } -// 通用处理语音 -void SoundBox4MicVoiceProcesser::bfWakeupVoiceProcess(shared_ptr audioClip) {} - -void SoundBox4MicVoiceProcesser::commonVoiceProcessThreadFunc(shared_ptr audioClip) { - ThisThread thisThread; - while (!thisThread.getExitFlag()) { - shared_ptr audioClip; - if (m_commonVoiceProcessQ.try_dequeue(audioClip)) { - if (audioClip) { - processCommonVoiceInter(audioClip); - } +void SoundBox4MicVoiceProcesser::bfWakeupVoiceProcess(shared_ptr audioClip) { + ZCHECK(audioClip != nullptr, "audioClip is null"); + ZCHECK(audioClip->getCh() == CH_NUM, "audioClip ch is not 5"); + ZCHECK(audioClip->getRate() == 16000, "audioClip sampleRate is not 16000"); + ZCHECK(audioClip->getBitsPerSample() == 16, "audioClip bitsPerSample is not 16"); + + m_bfWakeupVoiceProcessQ.enqueue(audioClip); +} + +shared_ptr SoundBox4MicVoiceProcesser::processVoice(shared_ptr audioClip, int outputchNum, + voicealgo_t voicealgo) { + vector outputbuf; + if (outputchNum == 1) { + outputbuf.resize(audioClip->size() / CH_NUM); + } else if (outputchNum == CH_NUM) { + outputbuf.resize(audioClip->size()); + } else { + ZCHECK(false, "outputchNum is not 1 or 5"); + } + + // get context + shared_ptr oldcontext = audioClip->getContext("audioContext"); + shared_ptr newcontext = make_shared(); + + for (size_t i = 0; i < audioClip->size(); i++) { + uint8_t* data = audioClip->data() + i; + uint8_t* output = outputbuf.data() + i / CH_NUM; + size_t len = FRAME_NUM * CH_NUM * 2; + int frameIndex = i / len; + AudioContext::OneFrameContext oneFrameContext = {.aec_state = 1, .aec_farend = 1}; + if (oldcontext && (int)oldcontext->oneFrameContexts.size() > frameIndex) { + oneFrameContext = oldcontext->oneFrameContexts[frameIndex]; } - thisThread.sleepForMs(5); + voicealgo(oneFrameContext, data, output, len); + newcontext->oneFrameContexts.push_back(oneFrameContext); + i += len; } -} -void SoundBox4MicVoiceProcesser::bfAsrVoiceProcessThreadFunc(shared_ptr audioClip) {} -void SoundBox4MicVoiceProcesser::bfWakeupVoiceProcessThreadFunc(shared_ptr audioClip) {} + shared_ptr outputClip = make_shared(outputbuf.data(), outputbuf.size(), outputchNum, + audioClip->getRate(), audioClip->getFormat()); + outputClip->updateTp(audioClip->getTp(), audioClip->getHumanReadableTp()); + outputClip->setContext("audioContext", newcontext); + return outputClip; +} void SoundBox4MicVoiceProcesser::processCommonVoiceInter(shared_ptr audioClip) { + shared_ptr outaudioClip = + processVoice(audioClip, CH_NUM /*output ch*/, // + [&](AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len) { + processCommonVoiceInter(context, data, output, len); + }); + onAfterCommonVoiceProcess(outaudioClip); +} +// 通用处理语音 +void SoundBox4MicVoiceProcesser::processBfWakeupVoiceInter(shared_ptr audioClip) { + shared_ptr outaudioClip = + processVoice(audioClip, 1 /*output ch*/, // + [&](AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len) { + processBfWakeupVoiceInter(context, data, output, len); + }); + onAfterBfWakeupVoiceProcess(outaudioClip); +} + +void SoundBox4MicVoiceProcesser::processBfAsrVoiceInter(shared_ptr audioClip) { + shared_ptr outaudioClip = + processVoice(audioClip, 1 /*output ch*/, // + [&](AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len) { + processBfAsrVoiceInter(context, data, output, len); + }); + onAfterBfAsrVoiceProcess(outaudioClip); +} + +void SoundBox4MicVoiceProcesser::processBfWakeupVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, + uint8_t* output, size_t len) { /** - * @brief 处理来自MIC的语音,主要是对语音进行AEC + * @brief + * 1. 5MIC to 1MIC + * 2. NS + * 3. AGC */ - vector outputbuf; - shared_ptr context = make_shared(); + // 1. 5MIC to 1MIC - outputbuf.resize(audioClip->size()); - for (size_t i = 0; i < audioClip->size();) { - uint8_t* data = audioClip->data() + i; - uint8_t* output = outputbuf.data() + i; - size_t len = FRAME_LEN * 5; - AudioContext::OneFrameAECContext oneFrameContext; + vector ch1voice; + vector nsoutvoice; + vector agcoutvoice; + int framenum = len / 2 / CH_NUM; - processCommonVoiceInter(oneFrameContext, data, output, len); - context->oneFrameContexts.push_back(oneFrameContext); - i += len; + ch1voice.resize(framenum); + nsoutvoice.resize(framenum); + agcoutvoice.resize(framenum); + + int16_t* indata16 = (int16_t*)data; + + for (int frameoff = 0; frameoff < framenum; frameoff++) { + ch1voice[frameoff] = indata16[frameoff * CH_NUM]; } + // 2. NS + audio_ns_process(m_wakeup_ns, ch1voice.data(), nsoutvoice.data(), context.aec_state); - shared_ptr outputClip = make_shared(outputbuf.data(), outputbuf.size(), audioClip->getCh(), - audioClip->getRate(), audioClip->getFormat()); - outputClip->updateTp(audioClip->getTp(), audioClip->getHumanReadableTp()); - outputClip->setContext("audioContext", context); + // 3. AGC + audio_agc_process(m_wakeup_agc, nsoutvoice.data(), agcoutvoice.data(), 1, context.aec_farend); - onAfterCommonVoiceProcess(outputClip); + // 4 . copy to output + memcpy(output, agcoutvoice.data(), framenum * 2); } -void SoundBox4MicVoiceProcesser::processBfAsrVoiceInter(shared_ptr audioClip) {} -void SoundBox4MicVoiceProcesser::processBfWakeupVoiceInter(shared_ptr audioClip) {} -void SoundBox4MicVoiceProcesser::processCommonVoiceInter(AudioContext::OneFrameAECContext& aec, uint8_t* data, +void SoundBox4MicVoiceProcesser::processBfAsrVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, + uint8_t* output, size_t len) { + /** + * @brief + * 1. GSC 5MIC to 1MIC + * 2. NS + * 3. AGC + */ + + // 1. 5MIC to 1MIC + + vector ch1voice; + vector nsoutvoice; + vector agcoutvoice; + int framenum = len / 2 / CH_NUM; + + ch1voice.resize(framenum); + nsoutvoice.resize(framenum); + agcoutvoice.resize(framenum); + + int16_t* indata16 = (int16_t*)data; + audio_gsc_fixed(m_asr_gsc, indata16, context.aec_state, context.aec_farend, 0.0, 180.0, ch1voice.data(), + &context.vad_state); + + // 2. NS + audio_ns_process(m_wakeup_ns, ch1voice.data(), nsoutvoice.data(), context.aec_state); + + // 3. AGC + audio_agc_process(m_wakeup_agc, nsoutvoice.data(), agcoutvoice.data(), 1, context.aec_farend); + + // 4 . copy to output + memcpy(output, agcoutvoice.data(), framenum * 2); +} + +void SoundBox4MicVoiceProcesser::processCommonVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len) { /** * @brief @@ -148,37 +238,36 @@ void SoundBox4MicVoiceProcesser::processCommonVoiceInter(AudioContext::OneFrameA * */ - ZCHECK(len == FRAME_LEN * 5, "len is not FRAME_LEN * 5"); + ZCHECK(len == FRAME_NUM * CH_NUM * 2, "len is not FRAME_NUM * CH_NUM"); + if (!m_config.enableAec) { + memcpy(output, data, len); + context.aec_farend = 1; + context.aec_state = 1; + } - int eachframelen = 2 * 5; // 5mic*2byte + int eachframelen = 2 * CH_NUM; // 5mic*2byte int frameNum = len / eachframelen; vector voice; vector ref; vector aecvoice; vector outputvoice; - voice.resize(len / 5 * 4); - aecvoice.resize(len / 5 * 4); - ref.resize(len / 5); + voice.resize(len / CH_NUM * MIC_NUM); + aecvoice.resize(len / CH_NUM * MIC_NUM); + ref.resize(len / CH_NUM); outputvoice.resize(len); - for (size_t i = 0; i < frameNum; frameNum++) { - memcpy(voice.data() + i * 4 * 2 /*4 channel 2byte*/, data + i * eachframelen, 4 * 2); - memcpy(ref.data() + i * 2 /*1 channel 2byte*/, data + i * eachframelen + 4 * 2, 2); + for (int i = 0; i < frameNum; frameNum++) { + memcpy(voice.data() + i * MIC_NUM * 2 /*4 channel 2byte*/, data + i * eachframelen, MIC_NUM * 2); + memcpy(ref.data() + i * 2 /*1 channel 2byte*/, data + i * eachframelen + MIC_NUM * 2, 2); } - int aec_state = 0; - int aec_farend = 0; - audio_aec_process(m_common_aec, voice.data(), ref.data(), aecvoice.data(), &aec.aec_state, &aec.aec_farend); + audio_aec_process(m_common_aec, voice.data(), ref.data(), aecvoice.data(), &context.aec_state, &context.aec_farend); - for (size_t i = 0; i < frameNum; frameNum++) { - memcpy(outputvoice.data() + i * eachframelen, /* */ aecvoice.data() + i * 4 * 2, 4 * 2); - memcpy(outputvoice.data() + i * eachframelen + 4 * 2, ref.data() + i * 2 /* */, 2); + for (int i = 0; i < frameNum; frameNum++) { + memcpy(outputvoice.data() + i * eachframelen, /* */ aecvoice.data() + i * MIC_NUM * 2, MIC_NUM * 2); + memcpy(outputvoice.data() + i * eachframelen + MIC_NUM * 2, ref.data() + i * 2 /* */, 2); } memcpy(output, outputvoice.data(), outputvoice.size()); } -void SoundBox4MicVoiceProcesser::processBfAsrVoiceInter(shared_ptr audioContext, uint8_t* data, - size_t len) {} -void SoundBox4MicVoiceProcesser::processBfWakeupVoiceInter(shared_ptr audioContext, uint8_t* data, - size_t len) {} \ No newline at end of file diff --git a/src/iflytopvoicecpp/soundbox4mic_voice_processer.hpp b/src/iflytopvoicecpp/soundbox4mic_voice_processer.hpp index ba110f9..59091c7 100644 --- a/src/iflytopvoicecpp/soundbox4mic_voice_processer.hpp +++ b/src/iflytopvoicecpp/soundbox4mic_voice_processer.hpp @@ -41,21 +41,29 @@ using namespace moodycamel; class SoundBox4MicVoiceProcesser : public enable_shared_from_this { ENABLE_LOGGER(SoundBox4MicVoiceProcesser); + public: class AudioContext { public: - struct OneFrameAECContext { + struct OneFrameContext { public: - int aec_state; - int aec_farend; + int aec_state = 1; + int aec_farend = 1; + int vad_state = 1; }; - vector oneFrameContexts; + vector oneFrameContexts; }; typedef float iflytop_voice_hander_t; + class Config { + public: + bool enableAec = true; + }; + + private: iflytop_voice_hander_t* m_common_aec = nullptr; - iflytop_voice_hander_t* m_wakeup_ns = nullptr; - iflytop_voice_hander_t* m_wakeuop_agc = nullptr; + iflytop_voice_hander_t* m_wakeup_ns = nullptr; + iflytop_voice_hander_t* m_wakeup_agc = nullptr; iflytop_voice_hander_t* m_asr_ns = nullptr; iflytop_voice_hander_t* m_asr_agc = nullptr; @@ -69,6 +77,11 @@ class SoundBox4MicVoiceProcesser : public enable_shared_from_this m_bfAsrVoiceProcessThread; unique_ptr m_bfWakeupVoiceProcessThread; + Config m_config; + + typedef function + voicealgo_t; + public: SoundBox4MicVoiceProcesser(){}; void initialize(); @@ -93,19 +106,15 @@ class SoundBox4MicVoiceProcesser : public enable_shared_from_this audioClip)> onAfterBfAsrVoiceProcess; nod::signal audioClip)> onAfterBfWakeupVoiceProcess; - private: - void commonVoiceProcessThreadFunc(shared_ptr audioClip); - void bfAsrVoiceProcessThreadFunc(shared_ptr audioClip); - void bfWakeupVoiceProcessThreadFunc(shared_ptr audioClip); - void processCommonVoiceInter(shared_ptr audioClip); void processBfAsrVoiceInter(shared_ptr audioClip); void processBfWakeupVoiceInter(shared_ptr audioClip); - void processCommonVoiceInter(AudioContext::OneFrameAECContext& aec, uint8_t* data, uint8_t* output, - size_t len); - void processBfAsrVoiceInter(shared_ptr audioContext, uint8_t* data, size_t len); - void processBfWakeupVoiceInter(shared_ptr audioContext, uint8_t* data, size_t len); + shared_ptr processVoice(shared_ptr audioClip, int outputchNum, voicealgo_t voicealgo); + + void processCommonVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len); + void processBfAsrVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len); + void processBfWakeupVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len); }; } // namespace iflytop