Browse Source

add soundbox 4mic voice processer

master
zhaohe 2 years ago
parent
commit
830f171c08
  1. 267
      src/iflytopvoicecpp/soundbox4mic_voice_processer.cpp
  2. 37
      src/iflytopvoicecpp/soundbox4mic_voice_processer.hpp

267
src/iflytopvoicecpp/soundbox4mic_voice_processer.cpp

@ -4,47 +4,22 @@ using namespace iflytop;
using namespace core; using namespace core;
#define MIC_NUM 4 #define MIC_NUM 4
#define FRAME_LEN 128 // 必须是128,不支持其他
#define CH_NUM (MIC_NUM + 1)
#define FRAME_NUM 128 // 必须是128,不支持其他
float s_coord_data[18] = {-0.060, 0, 0, // float s_coord_data[18] = {-0.060, 0, 0, //
-0.020, 0, 0, // -0.020, 0, 0, //
+0.020, 0, 0, // +0.020, 0, 0, //
+0.060, 0, 0, // +0.060, 0, 0, //
0, 0, 0, // 0, 0, 0, //
0, 0, 0}; 0, 0, 0};
#if 0
float *audio_aec_init(int mic_num);
int audio_aec_process(float *obj, short *mic_data, short *ref_data, short *aec_out, int *aec_state, int *aec_farend);
void audio_aec_uninit(float *obj);
//====================doa=================================================
float *audio_doa_init(int mic_num, float *coord_data, float location_range1, float location_range2);
int audio_doa_process(float *obj, short *in_buff ,int aec_state, int aec_farend, float *doa1, float *doa2, float *doa3, int *vad_stat);
void audio_doa_uninit(float *audio_doa_obj);
//====================gsc=================================================
float *audio_gsc_init(int mic_num, float *coord_data);
int audio_gsc_amb(float *obj, short *in_buff, int aec_state, int aec_farend, float dest_doa , float *location_obj, short *out_data, int *vad_stat);
int audio_gsc_fixed(float *obj, short *in_buff ,int aec_state, int aec_farend, float dest_doa, float int_doa, short *out_data,int *vad_stat);
void audio_gsc_uninit( float *audio_gsc_obj );
//====================ns=================================================
float *audio_ns_init(int mode);
int audio_ns_process(float *audio_ns_obj, short *in_data, short *out_data ,int aec_stat);
void audio_ns_uninit(float *audio_ns_obj);
//====================agc=================================================
float *audio_agc_init(int frame_len, int mode, float arg_val);
int audio_agc_process(float *audio_agc_obj, short *in_data, short *out_data, int vad_stat, int aec_stat);
void audio_agc_uninit(float *audio_agc_obj);
#endif
#define ENABLE_AEC 1
void SoundBox4MicVoiceProcesser::initialize() { void SoundBox4MicVoiceProcesser::initialize() {
// //
m_common_aec = audio_aec_init(4);
m_common_aec = audio_aec_init(MIC_NUM);
m_wakeup_ns = audio_ns_init(1); m_wakeup_ns = audio_ns_init(1);
m_wakeuop_agc = audio_agc_init(128, 1, 24000.0);
m_wakeup_agc = audio_agc_init(128, 1, 24000.0);
// | 40.00mm | 40.00mm | 40.00mm | // | 40.00mm | 40.00mm | 40.00mm |
// 60 20 0 20 60 // 60 20 0 20 60
@ -58,79 +33,194 @@ void SoundBox4MicVoiceProcesser::initialize() {
while (!thisThread.getExitFlag()) { while (!thisThread.getExitFlag()) {
shared_ptr<AudioClip> audioClip; shared_ptr<AudioClip> audioClip;
if (m_commonVoiceProcessQ.try_dequeue(audioClip)) { if (m_commonVoiceProcessQ.try_dequeue(audioClip)) {
processCommonVoiceInter(audioClip);
}
thisThread.sleepForMs(5);
}
}));
m_bfAsrVoiceProcessThread.reset(new Thread("bfAsrVoiceProcessThread", [this]() {
ThisThread thisThread;
while (!thisThread.getExitFlag()) {
shared_ptr<AudioClip> audioClip;
if (m_bfAsrVoiceProcessQ.try_dequeue(audioClip)) {
processBfAsrVoiceInter(audioClip);
} }
thisThread.sleepForMs(5); thisThread.sleepForMs(5);
} }
})); }));
m_bfWakeupVoiceProcessThread.reset(new Thread("bfWakeupVoiceProcessThread", [this]() {
ThisThread thisThread;
while (!thisThread.getExitFlag()) {
shared_ptr<AudioClip> audioClip;
if (m_bfWakeupVoiceProcessQ.try_dequeue(audioClip)) {
processBfWakeupVoiceInter(audioClip);
}
thisThread.sleepForMs(5);
}
}));
// onAfterBfWakeupVoiceProcess
} }
void SoundBox4MicVoiceProcesser::commonVoiceProcess(shared_ptr<AudioClip> audioClip) { void SoundBox4MicVoiceProcesser::commonVoiceProcess(shared_ptr<AudioClip> audioClip) {
ZCHECK(audioClip != nullptr, "audioClip is null"); ZCHECK(audioClip != nullptr, "audioClip is null");
ZCHECK(audioClip->getCh() == 5, "audioClip ch is not 5");
ZCHECK(audioClip->getCh() == CH_NUM, "audioClip ch is not 5");
ZCHECK(audioClip->getRate() == 16000, "audioClip sampleRate is not 16000"); ZCHECK(audioClip->getRate() == 16000, "audioClip sampleRate is not 16000");
ZCHECK(audioClip->getBitsPerSample() == 16, "audioClip bitsPerSample is not 16"); ZCHECK(audioClip->getBitsPerSample() == 16, "audioClip bitsPerSample is not 16");
// shared_ptr<AudioContext> audioContext = make_shared<AudioContext>();
// audioClip->setContext("audioContext", audioContext);
m_commonVoiceProcessQ.enqueue(audioClip); m_commonVoiceProcessQ.enqueue(audioClip);
} }
// 处理语音送给ASR // 处理语音送给ASR
void SoundBox4MicVoiceProcesser::bfAsrVoiceProcess(shared_ptr<AudioClip> audioClip) { void SoundBox4MicVoiceProcesser::bfAsrVoiceProcess(shared_ptr<AudioClip> audioClip) {
shared_ptr<AudioContext> audioContext = audioClip->getContext<AudioContext>("audioContext");
if (audioContext == nullptr) {
return;
}
ZCHECK(audioClip != nullptr, "audioClip is null");
ZCHECK(audioClip->getCh() == CH_NUM, "audioClip ch is not 5");
ZCHECK(audioClip->getRate() == 16000, "audioClip sampleRate is not 16000");
ZCHECK(audioClip->getBitsPerSample() == 16, "audioClip bitsPerSample is not 16");
m_bfAsrVoiceProcessQ.enqueue(audioClip);
} }
// 通用处理语音
void SoundBox4MicVoiceProcesser::bfWakeupVoiceProcess(shared_ptr<AudioClip> audioClip) {}
void SoundBox4MicVoiceProcesser::bfWakeupVoiceProcess(shared_ptr<AudioClip> audioClip) {
ZCHECK(audioClip != nullptr, "audioClip is null");
ZCHECK(audioClip->getCh() == CH_NUM, "audioClip ch is not 5");
ZCHECK(audioClip->getRate() == 16000, "audioClip sampleRate is not 16000");
ZCHECK(audioClip->getBitsPerSample() == 16, "audioClip bitsPerSample is not 16");
void SoundBox4MicVoiceProcesser::commonVoiceProcessThreadFunc(shared_ptr<AudioClip> audioClip) {
ThisThread thisThread;
while (!thisThread.getExitFlag()) {
shared_ptr<AudioClip> audioClip;
if (m_commonVoiceProcessQ.try_dequeue(audioClip)) {
if (audioClip) {
processCommonVoiceInter(audioClip);
m_bfWakeupVoiceProcessQ.enqueue(audioClip);
}
shared_ptr<AudioClip> SoundBox4MicVoiceProcesser::processVoice(shared_ptr<AudioClip> audioClip, int outputchNum,
voicealgo_t voicealgo) {
vector<uint8_t> outputbuf;
if (outputchNum == 1) {
outputbuf.resize(audioClip->size() / CH_NUM);
} else if (outputchNum == CH_NUM) {
outputbuf.resize(audioClip->size());
} else {
ZCHECK(false, "outputchNum is not 1 or 5");
} }
// get context
shared_ptr<AudioContext> oldcontext = audioClip->getContext<AudioContext>("audioContext");
shared_ptr<AudioContext> newcontext = make_shared<AudioContext>();
for (size_t i = 0; i < audioClip->size(); i++) {
uint8_t* data = audioClip->data() + i;
uint8_t* output = outputbuf.data() + i / CH_NUM;
size_t len = FRAME_NUM * CH_NUM * 2;
int frameIndex = i / len;
AudioContext::OneFrameContext oneFrameContext = {.aec_state = 1, .aec_farend = 1};
if (oldcontext && (int)oldcontext->oneFrameContexts.size() > frameIndex) {
oneFrameContext = oldcontext->oneFrameContexts[frameIndex];
} }
thisThread.sleepForMs(5);
voicealgo(oneFrameContext, data, output, len);
newcontext->oneFrameContexts.push_back(oneFrameContext);
i += len;
} }
}
void SoundBox4MicVoiceProcesser::bfAsrVoiceProcessThreadFunc(shared_ptr<AudioClip> audioClip) {}
void SoundBox4MicVoiceProcesser::bfWakeupVoiceProcessThreadFunc(shared_ptr<AudioClip> audioClip) {}
shared_ptr<AudioClip> outputClip = make_shared<AudioClip>(outputbuf.data(), outputbuf.size(), outputchNum,
audioClip->getRate(), audioClip->getFormat());
outputClip->updateTp(audioClip->getTp(), audioClip->getHumanReadableTp());
outputClip->setContext("audioContext", newcontext);
return outputClip;
}
void SoundBox4MicVoiceProcesser::processCommonVoiceInter(shared_ptr<AudioClip> audioClip) { void SoundBox4MicVoiceProcesser::processCommonVoiceInter(shared_ptr<AudioClip> audioClip) {
shared_ptr<AudioClip> outaudioClip =
processVoice(audioClip, CH_NUM /*output ch*/, //
[&](AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len) {
processCommonVoiceInter(context, data, output, len);
});
onAfterCommonVoiceProcess(outaudioClip);
}
// 通用处理语音
void SoundBox4MicVoiceProcesser::processBfWakeupVoiceInter(shared_ptr<AudioClip> audioClip) {
shared_ptr<AudioClip> outaudioClip =
processVoice(audioClip, 1 /*output ch*/, //
[&](AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len) {
processBfWakeupVoiceInter(context, data, output, len);
});
onAfterBfWakeupVoiceProcess(outaudioClip);
}
void SoundBox4MicVoiceProcesser::processBfAsrVoiceInter(shared_ptr<AudioClip> audioClip) {
shared_ptr<AudioClip> outaudioClip =
processVoice(audioClip, 1 /*output ch*/, //
[&](AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len) {
processBfAsrVoiceInter(context, data, output, len);
});
onAfterBfAsrVoiceProcess(outaudioClip);
}
void SoundBox4MicVoiceProcesser::processBfWakeupVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data,
uint8_t* output, size_t len) {
/** /**
* @brief MIC的语音AEC
* @brief
* 1. 5MIC to 1MIC
* 2. NS
* 3. AGC
*/ */
vector<uint8_t> outputbuf;
shared_ptr<AudioContext> context = make_shared<AudioContext>();
// 1. 5MIC to 1MIC
outputbuf.resize(audioClip->size());
for (size_t i = 0; i < audioClip->size();) {
uint8_t* data = audioClip->data() + i;
uint8_t* output = outputbuf.data() + i;
size_t len = FRAME_LEN * 5;
AudioContext::OneFrameAECContext oneFrameContext;
vector<int16_t> ch1voice;
vector<int16_t> nsoutvoice;
vector<int16_t> agcoutvoice;
int framenum = len / 2 / CH_NUM;
processCommonVoiceInter(oneFrameContext, data, output, len);
context->oneFrameContexts.push_back(oneFrameContext);
i += len;
ch1voice.resize(framenum);
nsoutvoice.resize(framenum);
agcoutvoice.resize(framenum);
int16_t* indata16 = (int16_t*)data;
for (int frameoff = 0; frameoff < framenum; frameoff++) {
ch1voice[frameoff] = indata16[frameoff * CH_NUM];
} }
// 2. NS
audio_ns_process(m_wakeup_ns, ch1voice.data(), nsoutvoice.data(), context.aec_state);
shared_ptr<AudioClip> outputClip = make_shared<AudioClip>(outputbuf.data(), outputbuf.size(), audioClip->getCh(),
audioClip->getRate(), audioClip->getFormat());
outputClip->updateTp(audioClip->getTp(), audioClip->getHumanReadableTp());
outputClip->setContext("audioContext", context);
// 3. AGC
audio_agc_process(m_wakeup_agc, nsoutvoice.data(), agcoutvoice.data(), 1, context.aec_farend);
onAfterCommonVoiceProcess(outputClip);
// 4 . copy to output
memcpy(output, agcoutvoice.data(), framenum * 2);
} }
void SoundBox4MicVoiceProcesser::processBfAsrVoiceInter(shared_ptr<AudioClip> audioClip) {}
void SoundBox4MicVoiceProcesser::processBfWakeupVoiceInter(shared_ptr<AudioClip> audioClip) {}
void SoundBox4MicVoiceProcesser::processCommonVoiceInter(AudioContext::OneFrameAECContext& aec, uint8_t* data,
void SoundBox4MicVoiceProcesser::processBfAsrVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data,
uint8_t* output, size_t len) {
/**
* @brief
* 1. GSC 5MIC to 1MIC
* 2. NS
* 3. AGC
*/
// 1. 5MIC to 1MIC
vector<int16_t> ch1voice;
vector<int16_t> nsoutvoice;
vector<int16_t> agcoutvoice;
int framenum = len / 2 / CH_NUM;
ch1voice.resize(framenum);
nsoutvoice.resize(framenum);
agcoutvoice.resize(framenum);
int16_t* indata16 = (int16_t*)data;
audio_gsc_fixed(m_asr_gsc, indata16, context.aec_state, context.aec_farend, 0.0, 180.0, ch1voice.data(),
&context.vad_state);
// 2. NS
audio_ns_process(m_wakeup_ns, ch1voice.data(), nsoutvoice.data(), context.aec_state);
// 3. AGC
audio_agc_process(m_wakeup_agc, nsoutvoice.data(), agcoutvoice.data(), 1, context.aec_farend);
// 4 . copy to output
memcpy(output, agcoutvoice.data(), framenum * 2);
}
void SoundBox4MicVoiceProcesser::processCommonVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data,
uint8_t* output, size_t len) { uint8_t* output, size_t len) {
/** /**
* @brief * @brief
@ -148,37 +238,36 @@ void SoundBox4MicVoiceProcesser::processCommonVoiceInter(AudioContext::OneFrameA
* *
*/ */
ZCHECK(len == FRAME_LEN * 5, "len is not FRAME_LEN * 5");
ZCHECK(len == FRAME_NUM * CH_NUM * 2, "len is not FRAME_NUM * CH_NUM");
if (!m_config.enableAec) {
memcpy(output, data, len);
context.aec_farend = 1;
context.aec_state = 1;
}
int eachframelen = 2 * 5; // 5mic*2byte
int eachframelen = 2 * CH_NUM; // 5mic*2byte
int frameNum = len / eachframelen; int frameNum = len / eachframelen;
vector<short> voice; vector<short> voice;
vector<short> ref; vector<short> ref;
vector<short> aecvoice; vector<short> aecvoice;
vector<short> outputvoice; vector<short> outputvoice;
voice.resize(len / 5 * 4);
aecvoice.resize(len / 5 * 4);
ref.resize(len / 5);
voice.resize(len / CH_NUM * MIC_NUM);
aecvoice.resize(len / CH_NUM * MIC_NUM);
ref.resize(len / CH_NUM);
outputvoice.resize(len); outputvoice.resize(len);
for (size_t i = 0; i < frameNum; frameNum++) {
memcpy(voice.data() + i * 4 * 2 /*4 channel 2byte*/, data + i * eachframelen, 4 * 2);
memcpy(ref.data() + i * 2 /*1 channel 2byte*/, data + i * eachframelen + 4 * 2, 2);
for (int i = 0; i < frameNum; frameNum++) {
memcpy(voice.data() + i * MIC_NUM * 2 /*4 channel 2byte*/, data + i * eachframelen, MIC_NUM * 2);
memcpy(ref.data() + i * 2 /*1 channel 2byte*/, data + i * eachframelen + MIC_NUM * 2, 2);
} }
int aec_state = 0;
int aec_farend = 0;
audio_aec_process(m_common_aec, voice.data(), ref.data(), aecvoice.data(), &aec.aec_state, &aec.aec_farend);
audio_aec_process(m_common_aec, voice.data(), ref.data(), aecvoice.data(), &context.aec_state, &context.aec_farend);
for (size_t i = 0; i < frameNum; frameNum++) {
memcpy(outputvoice.data() + i * eachframelen, /* */ aecvoice.data() + i * 4 * 2, 4 * 2);
memcpy(outputvoice.data() + i * eachframelen + 4 * 2, ref.data() + i * 2 /* */, 2);
for (int i = 0; i < frameNum; frameNum++) {
memcpy(outputvoice.data() + i * eachframelen, /* */ aecvoice.data() + i * MIC_NUM * 2, MIC_NUM * 2);
memcpy(outputvoice.data() + i * eachframelen + MIC_NUM * 2, ref.data() + i * 2 /* */, 2);
} }
memcpy(output, outputvoice.data(), outputvoice.size()); memcpy(output, outputvoice.data(), outputvoice.size());
} }
void SoundBox4MicVoiceProcesser::processBfAsrVoiceInter(shared_ptr<AudioContext> audioContext, uint8_t* data,
size_t len) {}
void SoundBox4MicVoiceProcesser::processBfWakeupVoiceInter(shared_ptr<AudioContext> audioContext, uint8_t* data,
size_t len) {}

37
src/iflytopvoicecpp/soundbox4mic_voice_processer.hpp

@ -41,21 +41,29 @@ using namespace moodycamel;
class SoundBox4MicVoiceProcesser : public enable_shared_from_this<SoundBox4MicVoiceProcesser> { class SoundBox4MicVoiceProcesser : public enable_shared_from_this<SoundBox4MicVoiceProcesser> {
ENABLE_LOGGER(SoundBox4MicVoiceProcesser); ENABLE_LOGGER(SoundBox4MicVoiceProcesser);
public:
class AudioContext { class AudioContext {
public: public:
struct OneFrameAECContext {
struct OneFrameContext {
public: public:
int aec_state;
int aec_farend;
int aec_state = 1;
int aec_farend = 1;
int vad_state = 1;
}; };
vector<OneFrameAECContext> oneFrameContexts;
vector<OneFrameContext> oneFrameContexts;
}; };
typedef float iflytop_voice_hander_t; typedef float iflytop_voice_hander_t;
class Config {
public:
bool enableAec = true;
};
private:
iflytop_voice_hander_t* m_common_aec = nullptr; iflytop_voice_hander_t* m_common_aec = nullptr;
iflytop_voice_hander_t* m_wakeup_ns = nullptr; iflytop_voice_hander_t* m_wakeup_ns = nullptr;
iflytop_voice_hander_t* m_wakeuop_agc = nullptr;
iflytop_voice_hander_t* m_wakeup_agc = nullptr;
iflytop_voice_hander_t* m_asr_ns = nullptr; iflytop_voice_hander_t* m_asr_ns = nullptr;
iflytop_voice_hander_t* m_asr_agc = nullptr; iflytop_voice_hander_t* m_asr_agc = nullptr;
@ -69,6 +77,11 @@ class SoundBox4MicVoiceProcesser : public enable_shared_from_this<SoundBox4MicVo
unique_ptr<Thread> m_bfAsrVoiceProcessThread; unique_ptr<Thread> m_bfAsrVoiceProcessThread;
unique_ptr<Thread> m_bfWakeupVoiceProcessThread; unique_ptr<Thread> m_bfWakeupVoiceProcessThread;
Config m_config;
typedef function<void(AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len)>
voicealgo_t;
public: public:
SoundBox4MicVoiceProcesser(){}; SoundBox4MicVoiceProcesser(){};
void initialize(); void initialize();
@ -93,19 +106,15 @@ class SoundBox4MicVoiceProcesser : public enable_shared_from_this<SoundBox4MicVo
nod::signal<void(shared_ptr<AudioClip> audioClip)> onAfterBfAsrVoiceProcess; nod::signal<void(shared_ptr<AudioClip> audioClip)> onAfterBfAsrVoiceProcess;
nod::signal<void(shared_ptr<AudioClip> audioClip)> onAfterBfWakeupVoiceProcess; nod::signal<void(shared_ptr<AudioClip> audioClip)> onAfterBfWakeupVoiceProcess;
private: private:
void commonVoiceProcessThreadFunc(shared_ptr<AudioClip> audioClip);
void bfAsrVoiceProcessThreadFunc(shared_ptr<AudioClip> audioClip);
void bfWakeupVoiceProcessThreadFunc(shared_ptr<AudioClip> audioClip);
void processCommonVoiceInter(shared_ptr<AudioClip> audioClip); void processCommonVoiceInter(shared_ptr<AudioClip> audioClip);
void processBfAsrVoiceInter(shared_ptr<AudioClip> audioClip); void processBfAsrVoiceInter(shared_ptr<AudioClip> audioClip);
void processBfWakeupVoiceInter(shared_ptr<AudioClip> audioClip); void processBfWakeupVoiceInter(shared_ptr<AudioClip> audioClip);
void processCommonVoiceInter(AudioContext::OneFrameAECContext& aec, uint8_t* data, uint8_t* output,
size_t len);
void processBfAsrVoiceInter(shared_ptr<AudioContext> audioContext, uint8_t* data, size_t len);
void processBfWakeupVoiceInter(shared_ptr<AudioContext> audioContext, uint8_t* data, size_t len);
shared_ptr<AudioClip> processVoice(shared_ptr<AudioClip> audioClip, int outputchNum, voicealgo_t voicealgo);
void processCommonVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len);
void processBfAsrVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len);
void processBfWakeupVoiceInter(AudioContext::OneFrameContext& context, uint8_t* data, uint8_t* output, size_t len);
}; };
} // namespace iflytop } // namespace iflytop
Loading…
Cancel
Save