From 13b8ab65990e66f6bfed8437f3a306d4b95ed4cd Mon Sep 17 00:00:00 2001 From: zhaohe Date: Fri, 17 Mar 2023 11:51:30 +0800 Subject: [PATCH] update --- dep/zlinuxcomponents | 2 +- src/service/conversation_session.hpp | 23 ++++++- src/service/main_control_service.cpp | 30 ++++++++- src/service/main_control_service.hpp | 10 ++- src/service/voiceprocess/audio_logging_service.cpp | 74 ++++++++++++++++------ src/service/voiceprocess/audio_logging_service.hpp | 18 ++++-- .../voiceprocess/beforeasr_voiceprocesser.cpp | 13 ++++ .../voiceprocess/beforeasr_voiceprocesser.hpp | 10 ++- src/service/voiceprocess/wakeup_processer.cpp | 13 +++- src/service/voiceprocess/wakeup_processer.hpp | 2 +- 10 files changed, 160 insertions(+), 35 deletions(-) diff --git a/dep/zlinuxcomponents b/dep/zlinuxcomponents index f8ebf83..c771fa7 160000 --- a/dep/zlinuxcomponents +++ b/dep/zlinuxcomponents @@ -1 +1 @@ -Subproject commit f8ebf838a0aa7fdbe7915cc620875d4031c0b77a +Subproject commit c771fa7af061fd5d672d37a703a536e27f2d6522 diff --git a/src/service/conversation_session.hpp b/src/service/conversation_session.hpp index eab3038..af1136a 100644 --- a/src/service/conversation_session.hpp +++ b/src/service/conversation_session.hpp @@ -14,6 +14,7 @@ #include #include "iflytopcpp/core/basic/nlohmann/json.hpp" +#include "iflytopcpp/core/components/timeutils.hpp" #include "iflytopcpp/core/spdlogfactory/logger.hpp" #include "iflytopcpp/core/thread/thread.hpp" #include "iflytopcpp/core/utils/uuid/uuid.hpp" @@ -40,15 +41,33 @@ class ConversationSession : public enable_shared_from_this string asrTTSLocalURL; json nlpResult; + zsteady_tp buildtp = zsteady_clock().now(); + + static string gettimestamp() { + struct tm tm = {0}; + time_t t = time(nullptr); + if (t == -1) { + return ""; + } + struct tm* tmp = localtime_r(&t, &tm); + if (!tmp) { + return ""; + } + return fmt::format("{:0>4}{:0>2}{:0>2}{:0>2}{:0>2}{:0>2}", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, + tm.tm_min, tm.tm_sec); + } + public: - ConversationSession() { sessionId = UUID().toString(); }; + ConversationSession() { sessionId = gettimestamp(); }; string getSessionId() { return sessionId; }; string getAsrTTSLocalURL() { return asrTTSLocalURL; }; void setAsrTTSLocalURL(string asrTTSLocalURL) { this->asrTTSLocalURL = asrTTSLocalURL; }; - json &getNlpResult() { return nlpResult; }; + json& getNlpResult() { return nlpResult; }; void setNlpResult(json nlpResult) { this->nlpResult = nlpResult; }; + + zsteady_tp getBuildtp() { return buildtp; }; }; } // namespace iflytop \ No newline at end of file diff --git a/src/service/main_control_service.cpp b/src/service/main_control_service.cpp index c65b5a4..9226486 100644 --- a/src/service/main_control_service.cpp +++ b/src/service/main_control_service.cpp @@ -22,6 +22,9 @@ using namespace std; void MainControlService::initializeVoiceProcess() { m_beforeWakeupVoiceProcesser->setAmplifyDB(20); + m_beforeasrVoiceProcesser->setAmplifyDB(24); + + logger->info("MainControlService::start....."); /** * @brief 声卡录音回调 @@ -33,6 +36,7 @@ void MainControlService::initializeVoiceProcess() { logger->error("onRecordData audioClip is null"); return; } + m_audioLoggingService->loggerMICVoice(audioClip); // 录音 m_beforeWakeupVoiceProcesser->writeVoice(audioClip); // 交给唤醒词预处理逻辑 if (m_conversationSession) m_beforeasrVoiceProcesser->writeVoice(audioClip); // 交给asr预处理逻辑 @@ -71,15 +75,20 @@ void MainControlService::initializeVoiceProcess() { logger->error("onAfterProcessVoice audioClip is null"); return; } + auto session = m_conversationSession; + + if (!session) return; - if (!m_conversationSession) return; + if (zsteady_clock().elapsedTimeMs(session->getBuildtp()) < 1000) return; + + m_audioLoggingService->loggerASRVoice(audioClip); m_aiuiService->aiuiWrite((const char*)audioClip->data(), audioClip->size()); }); m_aiuiService->onMessage.connect([&](json& rxjson) { - // lock_guard lock(m_voiceprocessmutex); - json msg = rxjson; + + json msg = rxjson; m_workQueue->enQueue([this, msg]() { try { processasrResult(msg); @@ -91,6 +100,17 @@ void MainControlService::initializeVoiceProcess() { m_audioRecoderService->startRecord(); } + +void MainControlService::triggerProcessConversationSession() { + m_smartSoundboxPlayer->playConversationTTS(m_conversationSession->getAsrTTSLocalURL(), nullptr); + json nlpResult = m_conversationSession->getNlpResult(); + if (nlpResult["data"]["intent"]["shouldEndSession"]) { + logger->info("endSession"); + endSession(); + return; + } +} + void MainControlService::processasrResult_nlp(json& rxjson) { logger->info("rx nlp:{}", rxjson.dump()); m_conversationSession->setNlpResult(rxjson); @@ -107,6 +127,7 @@ void MainControlService::processasrResult_tts(json& rxjson) { if (isendFrame) { logger->info("rx tts end,url={}", ttsurl); m_conversationSession->setAsrTTSLocalURL(ttsurl); + triggerProcessConversationSession(); } } @@ -162,6 +183,8 @@ void MainControlService::constructSession() { } m_conversationSession = make_shared(); + logger->info("constructSession:============ {} ===========", m_conversationSession->getSessionId()); + m_audioLoggingService->triggerWakeup(m_conversationSession->getSessionId()); m_aiuiService->aiuiInit(); if (m_endsessionTimer->isRunning()) m_endsessionTimer->stop(); @@ -182,6 +205,7 @@ void MainControlService::endSession() { m_aiuiService->aiuiFinished(); m_aiuiService->aiuiDestroy(); } + m_audioLoggingService->endwakeup(); } void MainControlService::initialize() { diff --git a/src/service/main_control_service.hpp b/src/service/main_control_service.hpp index 77f409c..7eebb5a 100644 --- a/src/service/main_control_service.hpp +++ b/src/service/main_control_service.hpp @@ -64,8 +64,9 @@ class MainControlService : public enable_shared_from_this { shared_ptr m_aiuiService; shared_ptr m_endsessionTimer; - shared_ptr m_workQueue; // - recursive_mutex m_voiceprocessmutex; // + shared_ptr m_workQueue; // + recursive_mutex m_voiceprocessmutex; // + zsteady_tp m_systemsetuptime = zsteady_clock().now(); // typedef enum { kzwebService, @@ -92,5 +93,10 @@ class MainControlService : public enable_shared_from_this { void processasrResult(json rxjson); void processasrResult_nlp(json& rxjson); void processasrResult_tts(json& rxjson); + + /** + * @brief 处理本次对话,一般接收完全部的ASR结果后,调用此函数 + */ + void triggerProcessConversationSession(); }; } // namespace iflytop \ No newline at end of file diff --git a/src/service/voiceprocess/audio_logging_service.cpp b/src/service/voiceprocess/audio_logging_service.cpp index 909f259..b623376 100644 --- a/src/service/voiceprocess/audio_logging_service.cpp +++ b/src/service/voiceprocess/audio_logging_service.cpp @@ -48,7 +48,6 @@ void AudioLoggingService::loggerMICVoice(shared_ptr audioClip) { cleanupLogVoiceByTime("./voice/rt/mic_voice_*.wav", RT_STORAGE_CLIP_NUMS); m_rtMicVoiceFile.reset(new WavRecorder("./voice/rt/mic_voice_" + gettimestamp() + ".wav")); m_rtMicVoiceFile->writeHeader(audioClip->getRate(), audioClip->getBitsPerSample(), audioClip->getCh(), 0); - // m_rtMicVoiceFile->dumpheader(logger); m_rtMicVoiceFile->writeVoice(audioClip); } else { m_rtMicVoiceFile->writeVoice(audioClip); @@ -57,33 +56,51 @@ void AudioLoggingService::loggerMICVoice(shared_ptr audioClip) { } } - if (m_wakeupState) { - if (!m_rtBfwakeupVoiceFile) { - cleanupLogVoiceByTime("./voice/wakeup/mic_voice_*.wav", WAKEUP_STORAGE_CLIP_NUMS); - m_rtBfwakeupVoiceFile.reset(new WavRecorder("./voice/wakeup/mic_voice_" + gettimestamp() + ".wav")); - // m_rtBfwakeupVoiceFile->writeHeader(audioClip->getRate(), audioClip->getBitsPerSample(), audioClip->getCh(), 0); - m_rtBfwakeupVoiceFile->writeVoice(audioClip); - } else { - m_rtBfwakeupVoiceFile->writeVoice(audioClip); + /** + * @brief 每次唤醒都记录下对应的原始语音 + */ + { + lock_guard lock(m_mutex); + if (m_wakeupState) { + if (!m_wakeupMicVoiceFile) { + cleanupLogVoiceByTime("./voice/wakeup/mic_voice_*.wav", WAKEUP_STORAGE_CLIP_NUMS); + m_wakeupMicVoiceFile.reset(new WavRecorder("./voice/wakeup/mic_voice_" + m_wakeupsessionid + ".wav")); + m_wakeupMicVoiceFile->writeHeader(audioClip->getRate(), audioClip->getBitsPerSample(), audioClip->getCh(), 0); + m_wakeupMicVoiceFile->writeVoice(audioClip); + } else { + m_wakeupMicVoiceFile->writeVoice(audioClip); + } } } } -void AudioLoggingService::setWakeupState(bool state) { - if (!state) { - m_wakeupState = true; - } else { - m_wakeupState = false; - m_rtBfwakeupVoiceFile.reset(); +void AudioLoggingService::triggerWakeup(string sessionid) { + lock_guard lock(m_mutex); + m_wakeupsessionid = sessionid; + m_wakeupState = true; +} +void AudioLoggingService::endwakeup() { + lock_guard lock(m_mutex); + m_wakeupState = false; + + if (m_wakeupMicVoiceFile) { + m_wakeupMicVoiceFile.reset(); + } + if (m_asrFile) { + m_asrFile.reset(); } } +/** + * @brief 记录送给唤醒词的语音 + * + * @param audioClip + */ void AudioLoggingService::loggerBeforeWakeupVoice(shared_ptr audioClip) { /** - * @brief MIC的语音会存储在两个地方 + * @brief * 1. ./voice/rt/beforewakeup*.wav */ - if (!m_rtBfwakeupVoiceFile) { cleanupLogVoiceByTime("./voice/rt/rtbfwakeup*.wav", RT_STORAGE_CLIP_NUMS); m_rtBfwakeupVoiceFile.reset(new WavRecorder("./voice/rt/rtbfwakeup" + gettimestamp() + ".wav")); @@ -96,4 +113,25 @@ void AudioLoggingService::loggerBeforeWakeupVoice(shared_ptr audioCli } } } -void AudioLoggingService::loggerASRVoice(shared_ptr audioClip) {} \ No newline at end of file + +/** + * @brief 记录送给ASR的语音 + * + * @param audioClip + */ +void AudioLoggingService::loggerASRVoice(shared_ptr audioClip) { + /** + * @brief + * ./voice/wakeup/asr*.wav + */ + lock_guard lock(m_mutex); + if (!m_wakeupState) return; + if (!m_asrFile) { + cleanupLogVoiceByTime("./voice/wakeup/asr*.wav", WAKEUP_STORAGE_CLIP_NUMS); + m_asrFile.reset(new WavRecorder("./voice/wakeup/asr" + m_wakeupsessionid + ".wav")); + m_asrFile->writeHeader(audioClip->getRate(), audioClip->getBitsPerSample(), audioClip->getCh(), 0); + m_asrFile->writeVoice(audioClip); + } else { + m_asrFile->writeVoice(audioClip); + } +} \ No newline at end of file diff --git a/src/service/voiceprocess/audio_logging_service.hpp b/src/service/voiceprocess/audio_logging_service.hpp index b3120a1..da6260a 100644 --- a/src/service/voiceprocess/audio_logging_service.hpp +++ b/src/service/voiceprocess/audio_logging_service.hpp @@ -85,9 +85,17 @@ class AudioLoggingService : public enable_shared_from_this } }; - bool m_wakeupState = false; - unique_ptr m_rtMicVoiceFile; - unique_ptr m_rtBfwakeupVoiceFile; + bool m_wakeupState = false; + + unique_ptr m_rtMicVoiceFile; // 实时记录语音-MIC语音 + unique_ptr m_rtBfwakeupVoiceFile; // 实时记录语音-送给唤醒词的语音 + + unique_ptr m_wakeupMicVoiceFile; // 唤醒时记录的语音-MIC原始语音 + unique_ptr m_asrFile; // 唤醒时记录的语音-ASR识别语音 + + string m_wakeupsessionid; + + mutex m_mutex; public: AudioLoggingService(){}; @@ -99,7 +107,9 @@ class AudioLoggingService : public enable_shared_from_this void loggerASRVoice(shared_ptr audioClip); void setWakeupState(bool state); - void clearupWavRecorder(); + + void triggerWakeup(string sessionid); + void endwakeup(); private: void cleanupLogVoiceByTime(string prefix, size_t maxnum); diff --git a/src/service/voiceprocess/beforeasr_voiceprocesser.cpp b/src/service/voiceprocess/beforeasr_voiceprocesser.cpp index 6a5e182..84c6bdf 100644 --- a/src/service/voiceprocess/beforeasr_voiceprocesser.cpp +++ b/src/service/voiceprocess/beforeasr_voiceprocesser.cpp @@ -27,12 +27,25 @@ void BfAsrVProcesser::processVoice(shared_ptr audioClip) { vector voice; audioClip->getOneCHVoice(voice, 0); + ZCHECK(audioClip->getFormat() == S16_LE, "audioClip format is not S16_LE"); + + int16_t *p = (int16_t *)voice.data(); + for (int i = 0; i < voice.size() / 2; i++) { + p[i] = p[i] * m_amplify; + } + shared_ptr afterProcessAudioClip = make_shared((uint8_t *)voice.data(), voice.size(), 1, audioClip->getRate(), audioClip->getFormat()); onAfterProcessVoice(afterProcessAudioClip); } +void BfAsrVProcesser::setAmplify(float amplify) { + logger->info("BfAsrVProcesser::setAmplify amplify={}", amplify); + m_amplify = amplify; +} +void BfAsrVProcesser::setAmplifyDB(float amplifydb) { m_amplify = pow(10, amplifydb / 20); } + void BfAsrVProcesser::writeVoice(shared_ptr audioClip) { ZCHECK(audioClip != nullptr, "audioClip is null"); ZCHECK(audioClip->getFormat() == S16_LE, "audioClip format is not S16_LE"); diff --git a/src/service/voiceprocess/beforeasr_voiceprocesser.hpp b/src/service/voiceprocess/beforeasr_voiceprocesser.hpp index 6f0c9f5..4e24392 100644 --- a/src/service/voiceprocess/beforeasr_voiceprocesser.hpp +++ b/src/service/voiceprocess/beforeasr_voiceprocesser.hpp @@ -23,8 +23,9 @@ * * service:BfAsrVProcesser * - * 作用: 波束增强,降噪,回声消除 - * + * 作用: + * bosuzengqiang + * 简单的波束增强 */ namespace iflytop { @@ -39,6 +40,8 @@ class BfAsrVProcesser : public enable_shared_from_this { unique_ptr m_thread; + float m_amplify = 1.0; + public: nod::signal audioClip)> onAfterProcessVoice; @@ -46,6 +49,9 @@ class BfAsrVProcesser : public enable_shared_from_this { void initialize(); + void setAmplify(float amplify); + void setAmplifyDB(float amplifydb); + void writeVoice(shared_ptr audioClip); private: diff --git a/src/service/voiceprocess/wakeup_processer.cpp b/src/service/voiceprocess/wakeup_processer.cpp index 9ecf1ad..06a429a 100644 --- a/src/service/voiceprocess/wakeup_processer.cpp +++ b/src/service/voiceprocess/wakeup_processer.cpp @@ -3,14 +3,23 @@ using namespace std; using namespace iflytop; using namespace core; -#define CHUNK_SIZE "1600" +#define WAKEUP_MODULE_INIT_TIMEOUT_S 8 void WakeupProcesser::initialize(string precise_engine, string wakeupmodulepath, string chunksize) { logger->info("initialize precise_engine:{}, wakeupmodulepath:{}, chunksize:{}", precise_engine, wakeupmodulepath, chunksize); wakeupProcesser.reset(new MycroftPreciseWapper()); wakeupProcesser->initialize(precise_engine.c_str(), wakeupmodulepath.c_str(), chunksize.c_str()); - m_chucksize = atoi(chunksize.c_str()); + m_chucksize = atoi(chunksize.c_str()); + uint16_t s_zerovoicebuf[m_chucksize] = {0}; + wakeupProcesser->processVoice((uint8_t*)&s_zerovoicebuf[0], m_chucksize * 2); + wakeupProcesser->processVoice((uint8_t*)&s_zerovoicebuf[0], m_chucksize * 2); + for (size_t i = 0; i < WAKEUP_MODULE_INIT_TIMEOUT_S; i++) { + logger->info("wait for wakeup module init {}/{}", i, WAKEUP_MODULE_INIT_TIMEOUT_S); + sleep(1); + } + + } int WakeupProcesser::getChunkSize() { return m_chucksize; } diff --git a/src/service/voiceprocess/wakeup_processer.hpp b/src/service/voiceprocess/wakeup_processer.hpp index 1b2a402..405d516 100644 --- a/src/service/voiceprocess/wakeup_processer.hpp +++ b/src/service/voiceprocess/wakeup_processer.hpp @@ -40,7 +40,7 @@ class WakeupProcesser : public enable_shared_from_this { bool wakeupflag = false; tp_steady last_wakeup_timepoint; function m_cbfunc; - int m_chucksize = 0; + size_t m_chucksize = 0; public: nod::signal onWakeupSignal;