• 首页 首页 icon
  • 工具库 工具库 icon
    • IP查询 IP查询 icon
  • 内容库 内容库 icon
    • 快讯库 快讯库 icon
    • 精品库 精品库 icon
    • 问答库 问答库 icon
  • 更多 更多 icon
    • 服务条款 服务条款 icon

FreeSwitch插件实现静音检测VAD和语音识别ASR

武飞扬头像
shanghaimoon
帮助1

个人长期从事通信语音方面的产品开发工作,且近期打算开发一款智能机器人语音通信产品,由于产品的主体是基于目前广泛使用的FreeSwitch软交换开源系统,因此基于FreeSwitch的插件技术开发一些智能语音机器人的基础模块是比较理想的选择。

本文主要就语音机器人的两个核心功能静音检测(VAD)和语音识别(ASR)来完成这一款插件。插件主要是通过FreeSwitch的media bug技术实时获取语音流,然后通过抽取opus的VAD检测算法进行静音检测,最后对接了科大讯飞的实时语音转写接口实现了ASR识别,结果通过ESL事件方式上报给应用层使用。整体效果测试下来比较理想,响应速度快,识别准确率高。有需要的朋友可以参考实现。核心的代码展示如下:

  1.  
     
  2.  
    #define DR_WAV_IMPLEMENTATION
  3.  
     
  4.  
    #include <switch.h>
  5.  
    #include "dr_wav.h"
  6.  
    #include "opusvad.h"
  7.  
    #include "queue.h"
  8.  
    #include "xfasr.h"
  9.  
     
  10.  
    #define VAD_EVENT_START "vad::start"
  11.  
    #define VAD_EVENT_STOP "vad::stop"
  12.  
    #define VAD_EVENT_ASR "vad::asr"
  13.  
     
  14.  
    static switch_bool_t robot_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type);
  15.  
     
  16.  
    #define MAX_VOICE_LEN 240000
  17.  
    #define MAX_VOICE_LEN_BASE64 645000
  18.  
    #define MAXFILES 8
  19.  
    #define TTS_MAX_SIZE 900
  20.  
    #define MAX_HZ_SIZE 240
  21.  
    #define VAD_VOICE_FRAMES 5
  22.  
    #define VAD_SILINCE_FRAMES 50
  23.  
    #define VAD_HIS_LEN 100
  24.  
    #define VAD_ADD_FRAME_SIZE 5
  25.  
     
  26.  
    static struct {
  27.  
    char* appid;
  28.  
    char* appkey;
  29.  
    } globals;
  30.  
     
  31.  
     
  32.  
    typedef struct robot_session_info {
  33.  
    int index;
  34.  
    int filetime;
  35.  
    int fileplaytime;
  36.  
    int nostoptime;
  37.  
    int asrtimeout;
  38.  
    int asr;
  39.  
    int play, pos;
  40.  
    int sos, eos, ec, count;
  41.  
    int eos_silence_threshold;
  42.  
    int final_timeout_ms;
  43.  
    int silence_threshold;
  44.  
    int harmonic;
  45.  
    int monitor;
  46.  
    int lanid;
  47.  
    int vadvoicems;
  48.  
    int vadsilencems;
  49.  
    int nslevel;
  50.  
    switch_core_session_t *session;
  51.  
    char taskid[32];
  52.  
    char groupid[32];
  53.  
    char telno[32];
  54.  
    char userid[64];
  55.  
    char callid[64];
  56.  
    char orgi[64];
  57.  
    char extid[64];
  58.  
    char uuid[64];
  59.  
    char uuidbak[64];
  60.  
    char recordfilename[128];
  61.  
    char para1[256];
  62.  
    char para2[256];
  63.  
    char para3[256];
  64.  
    char filename[TTS_MAX_SIZE];
  65.  
    char vadfilename[TTS_MAX_SIZE];
  66.  
    short buffer[MAX_VOICE_LEN];
  67.  
    drwav *fwav;
  68.  
    drwav *fvadwav;
  69.  
    int state; // 0:silence 1:voice
  70.  
    queue *vadqueue;
  71.  
    int16_t *vadbuffer;
  72.  
    int16_t framecount;
  73.  
    switch_audio_resampler_t *resampler;
  74.  
    asr_session_t *asrsession;
  75.  
     
  76.  
    } robot_session_info_t;
  77.  
     
  78.  
     
  79.  
    SWITCH_BEGIN_EXTERN_C
  80.  
     
  81.  
    SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_vadasr_shutdown);
  82.  
    SWITCH_MODULE_LOAD_FUNCTION(mod_vadasr_load);
  83.  
    SWITCH_MODULE_DEFINITION(mod_vadasr, mod_vadasr_load, mod_vadasr_shutdown, NULL);
  84.  
    SWITCH_STANDARD_APP(robotasr_start_function);
  85.  
     
  86.  
    SWITCH_MODULE_LOAD_FUNCTION(mod_vadasr_load)
  87.  
    {
  88.  
     
  89.  
    switch_application_interface_t *app_interface;
  90.  
    char *cf = "asr.conf";
  91.  
    switch_xml_t cfg, xml, settings, param;
  92.  
     
  93.  
    memset(&globals, 0, sizeof(globals));
  94.  
    globals.appid = NULL;
  95.  
    globals.appkey = NULL;
  96.  
     
  97.  
    if (switch_event_reserve_subclass(VAD_EVENT_START) != SWITCH_STATUS_SUCCESS) {
  98.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Robot Couldn't register subclass %s!\n",
  99.  
    VAD_EVENT_START);
  100.  
    return SWITCH_STATUS_TERM;
  101.  
    }
  102.  
     
  103.  
    if (switch_event_reserve_subclass(VAD_EVENT_STOP) != SWITCH_STATUS_SUCCESS) {
  104.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Robot Couldn't register subclass %s!\n",
  105.  
    VAD_EVENT_STOP);
  106.  
    return SWITCH_STATUS_TERM;
  107.  
    }
  108.  
     
  109.  
    if (switch_event_reserve_subclass(VAD_EVENT_ASR) != SWITCH_STATUS_SUCCESS) {
  110.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Robot Couldn't register subclass %s!\n",
  111.  
    VAD_EVENT_ASR);
  112.  
    return SWITCH_STATUS_TERM;
  113.  
    }
  114.  
     
  115.  
    /* connect my internal structure to the blank pointer passed to me */
  116.  
    *module_interface = switch_loadable_module_create_module_interface(pool, modname);
  117.  
     
  118.  
    if (!(xml = switch_xml_open_cfg(cf, &cfg, NULL))) {
  119.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open of %s failed\n", cf);
  120.  
    }
  121.  
    else {
  122.  
    if ((settings = switch_xml_child(cfg, "settings"))) {
  123.  
    for (param = switch_xml_child(settings, "param"); param; param = param->next) {
  124.  
    char *var = (char *)switch_xml_attr_soft(param, "name");
  125.  
    char *val = (char *)switch_xml_attr_soft(param, "value");
  126.  
    if (!strcmp(var, "appid")) {
  127.  
    globals.appid = val;
  128.  
    }
  129.  
    if (!strcmp(var, "appkey")) {
  130.  
    globals.appkey = val;
  131.  
    }
  132.  
    }
  133.  
    }
  134.  
     
  135.  
    switch_xml_free(xml);
  136.  
    }
  137.  
     
  138.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Robot enabled,appid=%s,appkey=%s\n", globals.appid, globals.appkey);
  139.  
     
  140.  
    // 为此模块增加app,调用名称即为 vad
  141.  
    SWITCH_ADD_APP(app_interface, "vad", "vad", "ai robot", robotasr_start_function, "[<ACTION ><VAD_VOICE_FRAMES> <VAD_SILINCE_FRAMES> <NS_LEVEL>]", SAF_NONE);
  142.  
     
  143.  
    /* indicate that the module should continue to be loaded */
  144.  
    return SWITCH_STATUS_SUCCESS;
  145.  
    }
  146.  
     
  147.  
    // Called when the system shuts down
  148.  
    SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_vadasr_shutdown)
  149.  
    {
  150.  
    switch_event_free_subclass(VAD_EVENT_START);
  151.  
    switch_event_free_subclass(VAD_EVENT_STOP);
  152.  
    switch_event_free_subclass(VAD_EVENT_ASR);
  153.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "myapplication disabled\n");
  154.  
    return SWITCH_STATUS_SUCCESS;
  155.  
    }
  156.  
     
  157.  
    SWITCH_STANDARD_APP(robotasr_start_function)
  158.  
    {
  159.  
    switch_media_bug_t *bug;
  160.  
    switch_status_t status;
  161.  
    switch_channel_t *channel;
  162.  
    robot_session_info_t *robot_info;
  163.  
     
  164.  
    // switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "robot_start_function start\n");
  165.  
    if (session == NULL) {
  166.  
    switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR,
  167.  
    "FreeSWITCH is NULL! Please report to developers\n");
  168.  
    return;
  169.  
    }
  170.  
    channel = switch_core_session_get_channel(session);
  171.  
    if (channel == NULL) {
  172.  
    switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR,
  173.  
    "No channel for FreeSWITCH session! Please report this "
  174.  
    "to the developers.\n");
  175.  
    return;
  176.  
    }
  177.  
     
  178.  
    /* Is this channel already set? */
  179.  
    bug = (switch_media_bug_t *)switch_channel_get_private(channel, "_robot_");
  180.  
     
  181.  
    /* If yes */
  182.  
     
  183.  
    if (bug != NULL) {
  184.  
     
  185.  
    /* If we have a stop remove audio bug */
  186.  
    if (strcasecmp(data, "stop") == 0) {
  187.  
    // robot_info = (robot_session_info_t *)switch_channel_get_private(channel, "_robotinfo_");
  188.  
    switch_channel_set_private(channel, "_robot_", NULL);
  189.  
    // process_close(robot_info);
  190.  
    switch_core_media_bug_remove(session, &bug);
  191.  
    return;
  192.  
    }
  193.  
    /* We have already started */
  194.  
    switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_WARNING,
  195.  
    "Robot Cannot run 2 at once on the same channel!\n");
  196.  
    return;
  197.  
    }
  198.  
     
  199.  
    const char *action = NULL, *vadvoicems = NULL, *vadsilencems = NULL, *nslevel = NULL;
  200.  
    char *argv[4] = { 0 };
  201.  
    char *mycmd = NULL;
  202.  
     
  203.  
    if (!zstr(data)) {
  204.  
    mycmd = switch_core_session_strdup(session, data);
  205.  
    switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0])));
  206.  
    }
  207.  
     
  208.  
    if (argv[0]) action = argv[0];
  209.  
    if (argv[1]) vadvoicems = argv[1];
  210.  
    if (argv[2]) vadsilencems = argv[2];
  211.  
    if (argv[3]) nslevel = argv[3];
  212.  
     
  213.  
    if (!action || !vadvoicems || !vadsilencems || !nslevel) {
  214.  
    switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "-ERR Missing Arguments\n");
  215.  
    return;
  216.  
    }
  217.  
     
  218.  
    switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO,
  219.  
    "action %s vadvoicems %s vadsilencems %s nslevel %s\n", action, vadvoicems, vadsilencems,
  220.  
    nslevel);
  221.  
     
  222.  
    // 初始化变量, 一定记得要 free
  223.  
    robot_info = (robot_session_info_t *)malloc(sizeof(robot_session_info_t));
  224.  
    if (robot_info == NULL) return;
  225.  
    robot_info->session = session;
  226.  
    strcpy(robot_info->uuid, switch_core_session_get_uuid(robot_info->session));
  227.  
    robot_info->vadvoicems = atoi(vadvoicems);
  228.  
    robot_info->vadsilencems = atoi(vadsilencems);
  229.  
    robot_info->nslevel = atoi(nslevel);
  230.  
     
  231.  
    status = switch_core_media_bug_add(session, "vmd", NULL, robot_callback, robot_info, 0, SMBF_READ_REPLACE, &bug);
  232.  
     
  233.  
    if (status != SWITCH_STATUS_SUCCESS) {
  234.  
    switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Robot Failure hooking to stream\n");
  235.  
    return;
  236.  
    }
  237.  
    switch_channel_set_private(channel, "_robot_", bug);
  238.  
    }
  239.  
     
  240.  
    SWITCH_END_EXTERN_C
  241.  
     
  242.  
     
  243.  
    static switch_bool_t process_close(robot_session_info_t *rh)
  244.  
    {
  245.  
    switch_channel_t *channel;
  246.  
     
  247.  
    rh->uuid[0] = 0;
  248.  
    rh->index = -1;
  249.  
    if (NULL != rh->fwav) { drwav_uninit(rh->fwav); }
  250.  
    if (NULL != rh->fvadwav) { drwav_uninit(rh->fvadwav); }
  251.  
    destroy_queue(rh->vadqueue);
  252.  
    channel = switch_core_session_get_channel(rh->session);
  253.  
    switch_channel_set_private(channel, "_robot_", NULL);
  254.  
    delete rh->asrsession;
  255.  
    free(rh);
  256.  
    return SWITCH_TRUE;
  257.  
    }
  258.  
     
  259.  
     
  260.  
     
  261.  
    void handle_event(const std::string & message, void *arg)
  262.  
    {
  263.  
    switch_event_t *event;
  264.  
    switch_status_t status;
  265.  
    switch_event_t *event_copy;
  266.  
    switch_channel_t *channel;
  267.  
     
  268.  
    robot_session_info_t *robot_info = (robot_session_info_t *)arg;
  269.  
    channel = switch_core_session_get_channel(robot_info->session);
  270.  
     
  271.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "eventAsrText:%s\n", message.c_str());
  272.  
     
  273.  
    status = switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, VAD_EVENT_ASR);
  274.  
    if (status != SWITCH_STATUS_SUCCESS) { return; }
  275.  
     
  276.  
    switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Asr-Text", message.c_str());
  277.  
    switch_channel_event_set_data(channel, event);
  278.  
    switch_event_fire(&event);
  279.  
    }
  280.  
     
  281.  
    void handle_message(const std::string & message, void *arg)
  282.  
    {
  283.  
    char middleText[500] = { 0 };
  284.  
    //printf(">>> %s\n", message.c_str());
  285.  
    cJSON* cjson_test = NULL;
  286.  
    cJSON* cjson_action = NULL;
  287.  
    cJSON* cjson_code = NULL;
  288.  
    cJSON* cjson_data = NULL;
  289.  
    cJSON* cjson_desc = NULL;
  290.  
    cJSON* cjson_sid = NULL;
  291.  
    cJSON* cjson_text = NULL;
  292.  
    cJSON* cjson_segid = NULL;
  293.  
    cJSON* cjson_cn = NULL;
  294.  
    cJSON* cjson_st = NULL;
  295.  
    cJSON* cjson_rt = NULL;
  296.  
    cJSON* cjson_rt_item = NULL;
  297.  
    cJSON* cjson_cw_item = NULL;
  298.  
    cJSON* cjson_w_item = NULL;
  299.  
    cJSON* cjson_type = NULL;
  300.  
    cJSON* cjson_ws = NULL;
  301.  
    cJSON* cjson_cw = NULL;
  302.  
    cJSON* cjson_w = NULL;
  303.  
     
  304.  
    asr_session_t *asr = (asr_session_t *)arg;
  305.  
     
  306.  
    cjson_test = cJSON_Parse(message.c_str());
  307.  
    cjson_action = cJSON_GetObjectItem(cjson_test, "action");
  308.  
    cjson_code = cJSON_GetObjectItem(cjson_test, "code");
  309.  
    cjson_data = cJSON_GetObjectItem(cjson_test, "data");
  310.  
    cjson_desc = cJSON_GetObjectItem(cjson_test, "desc");
  311.  
    cjson_sid = cJSON_GetObjectItem(cjson_test, "sid");
  312.  
     
  313.  
    if (strcmp(cjson_action->valuestring, "result") == 0 && strcmp(cjson_code->valuestring, "0") == 0 && strlen(cjson_data->valuestring) > 0)
  314.  
    {
  315.  
    cjson_text = cJSON_Parse(cjson_data->valuestring);
  316.  
    cjson_segid = cJSON_GetObjectItem(cjson_text, "seg_id");
  317.  
    cjson_cn = cJSON_GetObjectItem(cjson_text, "cn");
  318.  
    cjson_st = cJSON_GetObjectItem(cjson_cn, "st");
  319.  
    cjson_rt = cJSON_GetObjectItem(cjson_st, "rt");
  320.  
    cjson_type = cJSON_GetObjectItem(cjson_st, "type");
  321.  
     
  322.  
    if (strcmp(cjson_type->valuestring, "0") == 0)
  323.  
    {
  324.  
    int rt_array_size = cJSON_GetArraySize(cjson_rt);
  325.  
    //printf("rt_array_size:%d", rt_array_size);
  326.  
    for (int i = 0; i < rt_array_size; i )
  327.  
    {
  328.  
    cjson_rt_item = cJSON_GetArrayItem(cjson_rt, i);
  329.  
    cjson_ws = cJSON_GetObjectItem(cjson_rt_item, "ws");
  330.  
     
  331.  
    int ws_array_size = cJSON_GetArraySize(cjson_ws);
  332.  
    for (int j = 0; j < ws_array_size; j )
  333.  
    {
  334.  
    cjson_cw_item = cJSON_GetArrayItem(cjson_ws, j);
  335.  
    cjson_cw = cJSON_GetObjectItem(cjson_cw_item, "cw");
  336.  
     
  337.  
    int cw_array_size = cJSON_GetArraySize(cjson_cw);
  338.  
    for (int k = 0; k < cw_array_size; k )
  339.  
    {
  340.  
    cjson_w_item = cJSON_GetArrayItem(cjson_cw, k);
  341.  
    cjson_w = cJSON_GetObjectItem(cjson_w_item, "w");
  342.  
    //printf("w:%s", cjson_w->valuestring);
  343.  
    if (strlen(asr->asr_text) <= BFLEN - 20)
  344.  
    {
  345.  
    strcat(asr->asr_text, cjson_w->valuestring);
  346.  
    }
  347.  
    else
  348.  
    {
  349.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "content too long!!!!!!\n");
  350.  
    }
  351.  
     
  352.  
    }
  353.  
     
  354.  
    }
  355.  
     
  356.  
    }
  357.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "asrFinalResult:%s\n", asr->asr_text);
  358.  
     
  359.  
    }
  360.  
    else
  361.  
    {
  362.  
    int rt_array_size = cJSON_GetArraySize(cjson_rt);
  363.  
    //printf("rt_array_size:%d", rt_array_size);
  364.  
    for (int i = 0; i < rt_array_size; i )
  365.  
    {
  366.  
    cjson_rt_item = cJSON_GetArrayItem(cjson_rt, i);
  367.  
    cjson_ws = cJSON_GetObjectItem(cjson_rt_item, "ws");
  368.  
     
  369.  
    int ws_array_size = cJSON_GetArraySize(cjson_ws);
  370.  
    for (int j = 0; j < ws_array_size; j )
  371.  
    {
  372.  
    cjson_cw_item = cJSON_GetArrayItem(cjson_ws, j);
  373.  
    cjson_cw = cJSON_GetObjectItem(cjson_cw_item, "cw");
  374.  
     
  375.  
    int cw_array_size = cJSON_GetArraySize(cjson_cw);
  376.  
    for (int k = 0; k < cw_array_size; k )
  377.  
    {
  378.  
    cjson_w_item = cJSON_GetArrayItem(cjson_cw, k);
  379.  
    cjson_w = cJSON_GetObjectItem(cjson_w_item, "w");
  380.  
    strcat(middleText, cjson_w->valuestring);
  381.  
     
  382.  
    }
  383.  
    }
  384.  
    }
  385.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "asrTempResult:%s\n", middleText);
  386.  
    }
  387.  
    }
  388.  
    else if (strcmp(cjson_action->valuestring, "error") == 0 )
  389.  
    {
  390.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "asrErrorInfo:%s\n", cjson_desc->valuestring);
  391.  
     
  392.  
    }
  393.  
     
  394.  
    }
  395.  
     
  396.  
    static switch_bool_t robot_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type)
  397.  
    {
  398.  
    robot_session_info_t *robot_info;
  399.  
    // switch_codec_t *read_codec;
  400.  
    switch_frame_t *frame;
  401.  
    int flag;
  402.  
    drwav_data_format format;// = { 0 };
  403.  
    int16_t len;
  404.  
    int voiceflagcount;
  405.  
    int silenceflagcount;
  406.  
    int nslevel;
  407.  
    switch_event_t *event;
  408.  
    switch_status_t status;
  409.  
    switch_event_t *event_copy;
  410.  
    char *recorddir = NULL;
  411.  
    switch_codec_implementation_t read_impl;
  412.  
    switch_channel_t *channel;
  413.  
     
  414.  
     
  415.  
     
  416.  
    robot_info = (robot_session_info_t *)user_data;
  417.  
    if (robot_info == NULL) { return SWITCH_FALSE; }
  418.  
     
  419.  
    channel = switch_core_session_get_channel(robot_info->session);
  420.  
     
  421.  
    voiceflagcount = robot_info->vadvoicems / 20;
  422.  
    silenceflagcount = robot_info->vadsilencems / 20;
  423.  
    nslevel = robot_info->nslevel;
  424.  
     
  425.  
    format.container = drwav_container_riff;
  426.  
    format.format = DR_WAVE_FORMAT_PCM;
  427.  
    format.channels = 1;
  428.  
    format.sampleRate = (drwav_uint32)8000;
  429.  
    format.bitsPerSample = 16;
  430.  
     
  431.  
    recorddir = switch_core_get_variable_dup("record_prefix");
  432.  
     
  433.  
    switch (type) {
  434.  
     
  435.  
    case SWITCH_ABC_TYPE_INIT:
  436.  
    sprintf(robot_info->filename, "%s%s.wav", recorddir, robot_info->uuid);
  437.  
    robot_info->fwav = drwav_open_file_write(robot_info->filename, &format);
  438.  
    if (!robot_info->fwav) {
  439.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "full record openfile error %s\n",
  440.  
    robot_info->filename);
  441.  
    }
  442.  
     
  443.  
    SetConsoleOutputCP(CP_UTF8); //解决windows控制台输出中文乱码
  444.  
     
  445.  
    robot_info->vadqueue = create_queue();
  446.  
    robot_info->state = 0;
  447.  
    robot_info->framecount = 0;
  448.  
    robot_info->fvadwav = NULL;
  449.  
     
  450.  
    //初始话语音识别
  451.  
    robot_info->asrsession = new asr_session_t();
  452.  
    robot_info->asrsession->handle_message = handle_message;
  453.  
    robot_info->asrsession->handle_event = handle_event;
  454.  
    robot_info->asrsession->event_arg = robot_info;
  455.  
     
  456.  
    switch_core_session_get_read_impl(robot_info->session, &read_impl);
  457.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Read imp %u %u.\n", read_impl.samples_per_second, read_impl.number_of_channels);
  458.  
    status = switch_resample_create(&robot_info->resampler, read_impl.actual_samples_per_second, 16000, 640, SWITCH_RESAMPLE_QUALITY, 1);
  459.  
    if (status != SWITCH_STATUS_SUCCESS) {
  460.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to allocate resampler\n");
  461.  
    }
  462.  
     
  463.  
    break;
  464.  
     
  465.  
    case SWITCH_ABC_TYPE_READ_REPLACE:
  466.  
     
  467.  
    if (robot_info->uuid[0] == 0) break;
  468.  
     
  469.  
    //获取语音数据
  470.  
    frame = switch_core_media_bug_get_read_replace_frame(bug);
  471.  
     
  472.  
    //静音检测
  473.  
    flag = silk_VAD_Get((const short*)frame->data);
  474.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "vad result %d\n", flag);
  475.  
     
  476.  
    //静音标志缓冲
  477.  
    len = get_queue_length(robot_info->vadqueue);
  478.  
    if (len == VAD_HIS_LEN) { delete_queue(robot_info->vadqueue); }
  479.  
    insert_queue(robot_info->vadqueue, flag, NULL, 0);
  480.  
     
  481.  
     
  482.  
    //语音检测
  483.  
    if (getvadflagcount(robot_info->vadqueue, voiceflagcount, 1) && robot_info->state == 0) {
  484.  
     
  485.  
    robot_info->state = 1;
  486.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, " Speech Detected!!! \n");
  487.  
     
  488.  
    //开启语音识别
  489.  
    init_asr((char*)globals.appid, (char*)globals.appkey, robot_info->asrsession);
  490.  
     
  491.  
    sprintf(robot_info->vadfilename, "%s%s_%d.wav", recorddir, robot_info->uuid, robot_info->framecount);
  492.  
    robot_info->fvadwav = drwav_open_file_write(robot_info->vadfilename, &format);
  493.  
    if (!robot_info->fvadwav) {
  494.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "vad open file error %s\n",
  495.  
    robot_info->vadfilename);
  496.  
    strcpy(robot_info->vadfilename, "");
  497.  
    //break;
  498.  
    }
  499.  
     
  500.  
     
  501.  
    status = switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, VAD_EVENT_START);
  502.  
    if (status != SWITCH_STATUS_SUCCESS) { break; }
  503.  
    switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Vad-Status", "start");
  504.  
    switch_channel_event_set_data(channel, event);
  505.  
    /*if ((switch_event_dup(&event_copy, event)) != SWITCH_STATUS_SUCCESS) { break; }
  506.  
    switch_core_session_queue_event(robot_info->session, &event);
  507.  
    switch_event_fire(&event_copy);*/
  508.  
    switch_event_fire(&event);
  509.  
    }
  510.  
     
  511.  
    //静音检测
  512.  
    if (getvadflagcount(robot_info->vadqueue, silenceflagcount, 0) && robot_info->state == 1) {
  513.  
    robot_info->state = 0;
  514.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE,
  515.  
    "-----Silence Detected,Stop Recording!!! FileName:%s.-----\n", robot_info->vadfilename);
  516.  
    if (robot_info->fvadwav) { drwav_uninit(robot_info->fvadwav); }
  517.  
    robot_info->fvadwav = NULL;
  518.  
     
  519.  
    status = switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, VAD_EVENT_STOP);
  520.  
    if (status != SWITCH_STATUS_SUCCESS) { break; }
  521.  
    switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Vad-Status", "stop");
  522.  
    switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Vad-RecordFile", robot_info->vadfilename);
  523.  
    switch_channel_event_set_data(channel, event);
  524.  
    switch_event_fire(&event);
  525.  
     
  526.  
    //发送Asr结束标记
  527.  
    send_end(robot_info->asrsession);
  528.  
    }
  529.  
     
  530.  
    //录音-vad部分
  531.  
    if (robot_info->fvadwav) { drwav_write_pcm_frames(robot_info->fvadwav, frame->samples, frame->data); }
  532.  
    //完整部分
  533.  
    if (robot_info->fwav){ drwav_write_pcm_frames(robot_info->fwav, frame->samples, frame->data); }
  534.  
    robot_info->framecount ;
  535.  
     
  536.  
    //检测到语音时发送语音数据包
  537.  
    if(robot_info->state == 1)
  538.  
    {
  539.  
    //上采样至16K
  540.  
    switch_resample_process(robot_info->resampler, (int16_t *)frame->data, frame->datalen);
  541.  
    send_data(robot_info->asrsession, (char*)robot_info->resampler->to, robot_info->resampler->to_len);
  542.  
    }
  543.  
    break;
  544.  
     
  545.  
    case SWITCH_ABC_TYPE_CLOSE:
  546.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "SWITCH_ABC_TYPE_CLOSE\n");
  547.  
    send_end(robot_info->asrsession);
  548.  
    thrd_join(robot_info->asrsession->thr, NULL);
  549.  
    thrd_detach(robot_info->asrsession->thr);
  550.  
    mtx_destroy(&robot_info->asrsession->mutex);
  551.  
     
  552.  
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "the asr thread closed!!!\n");
  553.  
     
  554.  
    if (robot_info->resampler)
  555.  
    {
  556.  
    switch_resample_destroy(&robot_info->resampler);
  557.  
    }
  558.  
    process_close(robot_info);
  559.  
    break;
  560.  
    default:
  561.  
    break;
  562.  
    }
  563.  
     
  564.  
    switch_safe_free(recorddir);
  565.  
    return SWITCH_TRUE;
  566.  
    }
  567.  
     
学新通

代码工程是在Windows下编译通过的,FreeSitch使用的是1.6.20版本,代码基本是标准C和标准C 混合编码的,在Linux下编译不会有太大的改动,大家可以自行处理。

项目已开源到github,地址为:https://github.com/shanghaimoon888/mod_vadasr,如有问题,欢迎添加QQ号:1869731沟通交流。

这篇好文章是转载于:学新通技术网

  • 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
  • 本站站名: 学新通技术网
  • 本文地址: /boutique/detail/tanhfikaaj
系列文章
更多 icon
同类精品
更多 icon
继续加载