目前大部分的只能呼叫中心系统都对接了ASR语音识别引擎,主流的识别引擎有科大讯飞,阿里,百度等大厂的产品,价格不菲,有没有什么免费的引擎可以使用呢,答案是肯定的。

免费又好用的语音识别引擎----Vosk

vosk是一个离线开源语音识别工具,它可以识别16种语言,包括中文,而且总体效果还是不错的,因为我们要对接到呼叫中心,因此我们需要实时的流式传输语音数据,目前主流的解决方案是采用websocket协议传输语音,这块的话Vosk直接提供了websocket的server程序。而且程序已经打包成docker发布,所以启动起来相当简单,真的很贴心,一句命令搞定:

docker run -d -p 2700:2700 alphacep/kaldi-cn:latest

如何与freeswitch进行对接

因为我以前做过freeswtich与科大以及阿里的识别对接,所以非常自然的就考虑采用freeswtich插件的方式进行asr对接,而且freeswtich已经定义好了asr识别的接口,我们只要根据接口去做实现就可以了,主要要实现的接口如下:

asr_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_ASR_INTERFACE);

asr_interface->interface_name = "vosk";

asr_interface->asr_open = vosk_asr_open;

asr_interface->asr_close = vosk_asr_close;

asr_interface->asr_load_grammar = vosk_asr_load_grammar;

asr_interface->asr_unload_grammar = vosk_asr_unload_grammar;

asr_interface->asr_resume = vosk_asr_resume;

asr_interface->asr_pause = vosk_asr_pause;

asr_interface->asr_feed = vosk_asr_feed;

asr_interface->asr_check_results = vosk_asr_check_results;

asr_interface->asr_get_results = vosk_asr_get_results;

asr_interface->asr_start_input_timers = vosk_asr_start_input_timers;

参考了一些资料已经熟悉了一下vosk的websocket的通信协议,自定义了一个mod_vosk的插件模块,主要源码如下:

#include

#include "libks/ks.h"

#define AUDIO_BLOCK_SIZE 3200

SWITCH_MODULE_LOAD_FUNCTION(mod_vosk_load);

SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_vosk_shutdown);

SWITCH_MODULE_DEFINITION(mod_vosk, mod_vosk_load, mod_vosk_shutdown, NULL);

static switch_mutex_t *MUTEX = NULL;

static switch_event_node_t *NODE = NULL;

static struct {

char *server_url;

int return_json;

int auto_reload;

switch_memory_pool_t *pool;

ks_pool_t *ks_pool;

} globals;

typedef struct {

kws_t *ws;

char *result;

switch_mutex_t *mutex;

switch_buffer_t *audio_buffer;

} vosk_t;

/*! function to open the asr interface */

static switch_status_t vosk_asr_open(switch_asr_handle_t *ah, const char *codec, int rate, const char *dest, switch_asr_flag_t *flags)

{

vosk_t *vosk;

ks_json_t *req = ks_json_create_object();

ks_json_add_string_to_object(req, "url", (dest ? dest : globals.server_url));

if (!(vosk = (vosk_t *) switch_core_alloc(ah->memory_pool, sizeof(*vosk)))) {

return SWITCH_STATUS_MEMERR;

}

ah->private_info = vosk;

switch_mutex_init(&vosk->mutex, SWITCH_MUTEX_NESTED, ah->memory_pool);

if (switch_buffer_create_dynamic(&vosk->audio_buffer, AUDIO_BLOCK_SIZE, AUDIO_BLOCK_SIZE, 0) != SWITCH_STATUS_SUCCESS) {

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Buffer create failed\n");

return SWITCH_STATUS_MEMERR;

}

codec = "L16";

ah->codec = switch_core_strdup(ah->memory_pool, codec);

if (kws_connect_ex(&vosk->ws, req, KWS_BLOCK | KWS_CLOSE_SOCK, globals.ks_pool, NULL, 30000) != KS_STATUS_SUCCESS) {

ks_json_delete(&req);

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Websocket connect to %s failed\n", globals.server_url);

return SWITCH_STATUS_GENERR;

}

ks_json_delete(&req);

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "ASR open\n");

return SWITCH_STATUS_SUCCESS;

}

/*! function to close the asr interface */

static switch_status_t vosk_asr_close(switch_asr_handle_t *ah, switch_asr_flag_t *flags)

{

vosk_t *vosk = (vosk_t *) ah->private_info;

switch_mutex_lock(vosk->mutex);

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "ASR closed\n");

/** FIXME: websockets server still expects us to read the close confirmation and only then close

libks library doens't implement it yet. */

kws_close(vosk->ws, KWS_CLOSE_SOCK);

kws_destroy(&vosk->ws);

switch_set_flag(ah, SWITCH_ASR_FLAG_CLOSED);

switch_buffer_destroy(&vosk->audio_buffer);

switch_safe_free(vosk->result);

switch_mutex_unlock(vosk->mutex);

return SWITCH_STATUS_SUCCESS;

}

/*! function to feed audio to the ASR */

static switch_status_t vosk_asr_feed(switch_asr_handle_t *ah, void *data, unsigned int len, switch_asr_flag_t *flags)

{

int poll_result;

kws_opcode_t oc;

uint8_t *rdata;

int rlen;

vosk_t *vosk = (vosk_t *) ah->private_info;

if (switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED))

return SWITCH_STATUS_BREAK;

switch_mutex_lock(vosk->mutex);

switch_buffer_write(vosk->audio_buffer, data, len);

if (switch_buffer_inuse(vosk->audio_buffer) > AUDIO_BLOCK_SIZE) {

char buf[AUDIO_BLOCK_SIZE];

int rlen;

rlen = switch_buffer_read(vosk->audio_buffer, buf, AUDIO_BLOCK_SIZE);

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Sending data %d\n", rlen);

if (kws_write_frame(vosk->ws, WSOC_BINARY, buf, rlen) < 0) {

switch_mutex_unlock(vosk->mutex);

return SWITCH_STATUS_BREAK;

}

}

poll_result = kws_wait_sock(vosk->ws, 0, KS_POLL_READ | KS_POLL_ERROR);

if (poll_result != KS_POLL_READ) {

//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Received Poll Failed\n");

switch_mutex_unlock(vosk->mutex);

return SWITCH_STATUS_SUCCESS;

}

rlen = kws_read_frame(vosk->ws, &oc, &rdata);

if (rlen < 0) {

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Received Read Failed\n");

switch_mutex_unlock(vosk->mutex);

return SWITCH_STATUS_BREAK;

}

if (oc == WSOC_PING) {

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Received ping\n");

kws_write_frame(vosk->ws, WSOC_PONG, rdata, rlen);

switch_mutex_unlock(vosk->mutex);

return SWITCH_STATUS_SUCCESS;

}

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Recieved %d bytes:\n%s\n", rlen, rdata);

switch_safe_free(vosk->result);

vosk->result = switch_safe_strdup((const char *)rdata);

switch_mutex_unlock(vosk->mutex);

return SWITCH_STATUS_SUCCESS;

}

/*! function to pause recognizer */

static switch_status_t vosk_asr_pause(switch_asr_handle_t *ah)

{

return SWITCH_STATUS_SUCCESS;

}

/*! function to resume recognizer */

static switch_status_t vosk_asr_resume(switch_asr_handle_t *ah)

{

return SWITCH_STATUS_SUCCESS;

}

/*! Process asr_load_grammar request from FreeSWITCH. */

static switch_status_t vosk_asr_load_grammar(switch_asr_handle_t *ah, const char *grammar, const char *name)

{

return SWITCH_STATUS_SUCCESS;

}

/*! Process asr_unload_grammar request from FreeSWITCH. */

static switch_status_t vosk_asr_unload_grammar(switch_asr_handle_t *ah, const char *name)

{

return SWITCH_STATUS_SUCCESS;

}

/*! function to read results from the ASR*/

static switch_status_t vosk_asr_check_results(switch_asr_handle_t *ah, switch_asr_flag_t *flags)

{

vosk_t *vosk = (vosk_t *) ah->private_info;

return (vosk->result && (strstr(vosk->result, "\"\"") == NULL)) ? SWITCH_STATUS_SUCCESS : SWITCH_STATUS_FALSE;

}

/*! function to read results from the ASR */

static switch_status_t vosk_asr_get_results(switch_asr_handle_t *ah, char **xmlstr, switch_asr_flag_t *flags)

{

vosk_t *vosk = (vosk_t *) ah->private_info;

switch_status_t ret;

switch_mutex_lock(vosk->mutex);

if (globals.return_json) {

if (strstr(vosk->result, "\"partial\"") == NULL) {

*xmlstr = switch_safe_strdup(vosk->result);

ret = SWITCH_STATUS_SUCCESS;

} else {

*xmlstr = switch_safe_strdup(vosk->result);

ret = SWITCH_STATUS_MORE_DATA;

}

} else {

cJSON *result = cJSON_Parse(vosk->result);

if (cJSON_HasObjectItem(result, "text")) {

*xmlstr = switch_safe_strdup(cJSON_GetObjectCstr(result, "text"));

ret = SWITCH_STATUS_SUCCESS;

} else if (cJSON_HasObjectItem(result, "partial")) {

*xmlstr = switch_safe_strdup(cJSON_GetObjectCstr(result, "partial"));

ret = SWITCH_STATUS_MORE_DATA;

} else {

ret = SWITCH_STATUS_GENERR;

}

cJSON_Delete(result);

}

switch_safe_free(vosk->result);

vosk->result = NULL;

switch_mutex_unlock(vosk->mutex);

return ret;

}

/*! function to start input timeouts */

static switch_status_t vosk_asr_start_input_timers(switch_asr_handle_t *ah)

{

return SWITCH_STATUS_SUCCESS;

}

static switch_status_t load_config(void)

{

char *cf = "vosk.conf";

switch_xml_t cfg, xml = NULL, param, settings;

switch_status_t status = SWITCH_STATUS_SUCCESS;

if (!(xml = switch_xml_open_cfg(cf, &cfg, NULL))) {

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open of %s failed\n", cf);

status = SWITCH_STATUS_FALSE;

goto done;

}

if ((settings = switch_xml_child(cfg, "settings"))) {

for (param = switch_xml_child(settings, "param"); param; param = param->next) {

char *var = (char *) switch_xml_attr_soft(param, "name");

char *val = (char *) switch_xml_attr_soft(param, "value");

if (!strcasecmp(var, "server-url")) {

globals.server_url = switch_core_strdup(globals.pool, val);

}

if (!strcasecmp(var, "return-json")) {

globals.return_json = atoi(val);

}

}

}

done:

if (!globals.server_url) {

globals.server_url = switch_core_strdup(globals.pool, "ws://127.0.0.1:2700");

}

if (xml) {

switch_xml_free(xml);

}

return status;

}

static void do_load(void)

{

switch_mutex_lock(MUTEX);

load_config();

switch_mutex_unlock(MUTEX);

}

static void event_handler(switch_event_t *event)

{

if (globals.auto_reload) {

do_load();

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Vosk Reloaded\n");

}

}

SWITCH_MODULE_LOAD_FUNCTION(mod_vosk_load)

{

switch_asr_interface_t *asr_interface;

switch_mutex_init(&MUTEX, SWITCH_MUTEX_NESTED, pool);

globals.pool = pool;

ks_init();

ks_pool_open(&globals.ks_pool);

ks_global_set_default_logger(7);

if ((switch_event_bind_removable(modname, SWITCH_EVENT_RELOADXML, NULL, event_handler, NULL, &NODE) != SWITCH_STATUS_SUCCESS)) {

switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't bind!\n");

}

do_load();

/* connect my internal structure to the blank pointer passed to me */

*module_interface = switch_loadable_module_create_module_interface(pool, modname);

asr_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_ASR_INTERFACE);

asr_interface->interface_name = "vosk";

asr_interface->asr_open = vosk_asr_open;

asr_interface->asr_close = vosk_asr_close;

asr_interface->asr_load_grammar = vosk_asr_load_grammar;

asr_interface->asr_unload_grammar = vosk_asr_unload_grammar;

asr_interface->asr_resume = vosk_asr_resume;

asr_interface->asr_pause = vosk_asr_pause;

asr_interface->asr_feed = vosk_asr_feed;

asr_interface->asr_check_results = vosk_asr_check_results;

asr_interface->asr_get_results = vosk_asr_get_results;

asr_interface->asr_start_input_timers = vosk_asr_start_input_timers;

/* indicate that the module should continue to be loaded */

return SWITCH_STATUS_SUCCESS;

}

SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_vosk_shutdown)

{

ks_pool_close(&globals.ks_pool);

ks_shutdown();

switch_event_unbind(&NODE);

return SWITCH_STATUS_UNLOAD;

}

好啦,注册一个分机测试一下,配置一个dialplan如下:

效果杠杠的,重点是免费!

mod的源码已发布到github上,地址shanghaimoon888/mod_voskasr (github.com),有兴趣的朋友可以自行下载,有问题也可以加QQ:1869731探讨!

相关链接

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: