1.先注册百度开放平台的账号,然后按文档申请试用资格及创建应用:https://ai.baidu.com/ai-doc/SPEECH/qknh9i8ed
Windows平台选择“不需要”就可以了。
2.下载C# SDK包
解压以后里面包含这两个文件夹,我工程里面用的是net45,将文件夹放入unity Assets/Plugins文件夹下:
对应unity的Api Compatibility Level属性设置要改为.NET 4.X,设置路径为PlayerSetting-OtherSetting-Configuration-Api Compatibility Level
至此百度语音SDK导入完毕,目前我自己总结了两种方式去实现语音识别的方式:
第一种是用unity的UnityWebRequest去实现,是看的一位博主的教程,链接地址是:Unity百度语音识别-CSDN博客
第二种是看官方文档以后获取SDK的接口来实现的,具体实现方式如下:
在Git上下载了SDK源码进行学习,代码里面有具体功能实现的对应接口,源码地址:GitHub - Baidu-AIP/dotnet-sdk: 百度AI开放平台 .Net SDK
下载完毕解压后如下图,从Git的说明文档上可知speech文件夹中即是语音识别的代码:
在Asr类中,找到“识别语音数据”的方法接口JObject Recognize
byte[] data:音频数据;
string format:音频格式;
int rate:采样频率;
options:语言类型,默认为1537普通话,还支持粤语、四川话、英语等,具体看官方文档介绍。
1.进行录音采集保存
void StartRecord(){Debug.LogError("开始");saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);}
2.对录音进行转换,将音频转换为Byte文件
public byte[] ConvertClipToBytes(AudioClip audioClip){float[] samples = new float[audioClip.samples];audioClip.GetData(samples, 0);short[] intData = new short[samples.Length];byte[] bytesData = new byte[samples.Length * 2];int rescaleFactor = 32767;for (int i = 0; i < samples.Length; i++){intData[i] = (short)(samples[i] * rescaleFactor);byte[] byteArr = new byte[2];byteArr = BitConverter.GetBytes(intData[i]);byteArr.CopyTo(bytesData, i * 2);}return bytesData;}
3.转换完毕通过SDK接口发送
var result = asr.Recognize(ConvertClipToBytes(saveAudioClip),"pcm", recordFrequency,languageType);
4.将返回的数据转换为字符串,并用正则表达式提取自己想要的信息
string str = JsonConvert.SerializeObject(result, Formatting.None);if (!string.IsNullOrEmpty(str)){if (Regex.IsMatch(str, @"err_msg.:.success")){Match match = Regex.Match(str, "result.:..(.*?)..]");if (match.Success){str = match.Groups[1].ToString();}}else{str = "识别结果为空";}tex.text = str;}
完整代码如下:
偷懒写的代码不牢固,result在获取的时候有可能会出现NULL值,所以你们可以用Try Catch做一下容错
using System.Collections;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using UnityEngine;
using UnityEngine.UI;
using UnityEngine.Networking;
using System;
using Baidu.Aip.Speech;
using Newtonsoft.Json;public class Test : MonoBehaviour
{public string app_id;public string api_key;public string secret_Key;public Asr asr;string accessToken = string.Empty;int recordFrequency = 8000; //录音频率int recordMaxTime = 20;//最大录音时长AudioClip saveAudioClip;//存储当前录音的片段AudioSource source;string currentDeviceName = string.Empty;Text tex;Dictionary<string, object> languageType = new Dictionary<string, object>();// Start is called before the first frame updatevoid Start(){saveAudioClip = this.transform.GetComponent<AudioClip>();source = this.transform.GetComponent<AudioSource>();tex = GameObject.Find("Canvas/ResultTex").GetComponent<Text>();asr = new Asr(app_id, api_key, secret_Key);languageType.Add("dev_pid", 1537);}// Update is called once per framevoid Update(){if (Input.GetKeyDown(KeyCode.Space)){StartRecord();} else if (Input.GetKeyUp(KeyCode.Space)){EndRecord();}}public byte[] ConvertClipToBytes(AudioClip audioClip){float[] samples = new float[audioClip.samples];audioClip.GetData(samples, 0);short[] intData = new short[samples.Length];byte[] bytesData = new byte[samples.Length * 2];int rescaleFactor = 32767;for (int i = 0; i < samples.Length; i++){intData[i] = (short)(samples[i] * rescaleFactor);byte[] byteArr = new byte[2];byteArr = BitConverter.GetBytes(intData[i]);byteArr.CopyTo(bytesData, i * 2);}return bytesData;}/// <summary>/// 开始录音/// </summary>void StartRecord(){Debug.LogError("开始");saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);}/// <summary>/// 结束录音/// </summary>void EndRecord(){Debug.LogError("结束");Microphone.End(currentDeviceName);source.PlayOneShot(saveAudioClip);var result = asr.Recognize(ConvertClipToBytes(saveAudioClip),"pcm", recordFrequency,languageType);string str = JsonConvert.SerializeObject(result, Formatting.None);if (!string.IsNullOrEmpty(str)){if (Regex.IsMatch(str, @"err_msg.:.success")){Match match = Regex.Match(str, "result.:..(.*?)..]");if (match.Success){str = match.Groups[1].ToString();}}else{str = "识别结果为空";}tex.text = str;}// StartCoroutine(RequestASR());//请求语音识别}}