思路:
前端录音生成wav文件后端去解析
技术:
后端:
Vosk是一个离线开源语音识别工具。它可以识别16种语言,包括中文。
API接口,让您可以只用几行代码,即可迅速免费调用、体验功能。
目前支持 WAV声音文件格式,支持中英文等18种语言。
前端:
js-audio-recorder 录音组件
资料:
下载vosk语言模型:
springboot整合vosk实现简单的语音识别功能
javaswing窗体
问题:
就是录音组件会要求后端使用https协议,生产环境必须将后端http转https,测试环境中有以下两种方法第一种录音只能在(http://localhost:项目端口)中使用,第二种使用谷歌配置网站具有使用录音权限
后端:
依赖:
<!-- 语音识别 --><!-- 获取音频信息 --><dependency><groupId>org</groupId><artifactId>jaudiotagger</artifactId><version>2.0.3</version></dependency><dependency><groupId>net.java.dev.jna</groupId><artifactId>jna</artifactId><version>5.13.0</version></dependency><dependency><groupId>com.alphacephei</groupId><artifactId>vosk</artifactId><version>0.3.45</version></dependency><!-- JAVE2(Java音频视频编码器)库是ffmpeg项目上的Java包装器。 --><dependency><groupId>ws.schild</groupId><artifactId>jave-core</artifactId><version>3.1.1</version></dependency><!-- 在windows上开发 开发机可实现压缩效果 window64位 --><dependency><groupId>ws.schild</groupId><artifactId>jave-nativebin-win32</artifactId><version>3.1.1</version></dependency><dependency><groupId>ws.schild</groupId><artifactId>jave-nativebin-win64</artifactId><version>3.1.1</version></dependency><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId><version>1.18.24</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><scope>test</scope></dependency>
代码(我会按代码的调用顺序展示)
package com.rouyi.yuyin.model;public class VoskResult {private String text;public String getText() {return text;}public void setText(String text) {this.text = text;}}
vosk模型加载(将你从官网下的语言模型放到你的项目中并解压,修改下面的modelStr的值)
package com.rouyi.yuyin.model;import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;import java.io.IOException;/*** vosk模型加载* @author zhou*/
public class VoskModel {/*** 3. 使用 volatile 保证线程安全* 禁止指令重排* 保证可见性* 不保证原子性*/private static volatile VoskModel instance;private Model voskModel;public Model getVoskModel() {return voskModel;}/*** 1.私有构造函数*/private VoskModel() {System.out.println("SingleLazyPattern实例化了");//String modelStr = "D:\\work\\project\\fjdci-vosk\\src\\main\\resources\\vosk-model-small-cn-0.22";String modelStr = "H:\\afterProject\\qiyedianzixuke\\RuoYi-Cloud\\ruoyi-modules\\yuyinshibie\\src\\main\\resources\\lib\\vosk-model-cn-0.22";try {voskModel = new Model(modelStr);LibVosk.setLogLevel(LogLevel.INFO);} catch (IOException e) {e.printStackTrace();}}/*** 2.通过静态方法获取一个唯一实例* DCL 双重检查锁定 (Double-CheckedLocking)* 在多线程情况下保持⾼性能*/public static VoskModel getInstance() {if (instance == null) {synchronized (VoskModel.class) {if (instance == null) {// 1. 分配内存空间 2、执行构造方法,初始化对象 3、把这个对象指向这个空间instance = new VoskModel();}}}return instance;}/*** 多线程测试加载* @param args*/public static void main(String[] args) {for (int i = 0; i < 5; i++) {new Thread(() -> {VoskModel.getInstance();}).start();}}}
package com.rouyi.yuyin.model;import ws.schild.jave.Encoder;
import ws.schild.jave.EncoderException;
import ws.schild.jave.InputFormatException;
import ws.schild.jave.MultimediaObject;
import ws.schild.jave.encode.AudioAttributes;
import ws.schild.jave.encode.EncodingAttributes;
import ws.schild.jave.info.AudioInfo;
import ws.schild.jave.info.MultimediaInfo;import java.io.File;public class Jave2Util {/*** @param src 来源文件路径* @param target 目标文件路径* @param offset 设置起始偏移量(秒)* @param duration 设置切片的音频长度(秒)* @throws EncoderException*/public static void cut(String src, String target, Float offset, Float duration) throws EncoderException {File targetFile = new File(target);if (targetFile.exists()) {targetFile.delete();}File srcFile = new File(src);MultimediaObject srcMultiObj = new MultimediaObject(srcFile);MultimediaInfo srcMediaInfo = srcMultiObj.getInfo();Encoder encoder = new Encoder();EncodingAttributes encodingAttributes = new EncodingAttributes();//设置起始偏移量(秒)encodingAttributes.setOffset(offset);//设置切片的音频长度(秒)encodingAttributes.setDuration(duration);// 输入格式encodingAttributes.setInputFormat("wav");//设置音频属性AudioAttributes audio = new AudioAttributes();audio.setBitRate(srcMediaInfo.getAudio().getBitRate());//audio.setSamplingRate(srcMediaInfo.getAudio().getSamplingRate());// 转换为16KHZ 满足vosk识别的标准audio.setSamplingRate(16000);audio.setChannels(srcMediaInfo.getAudio().getChannels());//如果截取的时候,希望同步调整编码,可以设置不同的编码
// audio.setCodec("pcm_u8");//audio.setCodec(srcMediaInfo.getAudio().getDecoder().split(" ")[0]);encodingAttributes.setAudioAttributes(audio);//写文件encoder.encode(srcMultiObj, new File(target), encodingAttributes);}/*** 转化音频格式** @param oldFormatPath : 原音乐路径* @param newFormatPath : 目标音乐路径* @return*/public static boolean transforMusicFormat(String oldFormatPath, String newFormatPath) {File source = new File(oldFormatPath);File target = new File(newFormatPath);// 音频转换格式类Encoder encoder = new Encoder();// 设置音频属性AudioAttributes audio = new AudioAttributes();audio.setCodec(null);// 设置转码属性EncodingAttributes attrs = new EncodingAttributes();attrs.setInputFormat("wav");attrs.setAudioAttributes(audio);try {encoder.encode(new MultimediaObject(source), target, attrs);System.out.println("传唤已完成...");return true;} catch (IllegalArgumentException e) {e.printStackTrace();} catch (InputFormatException e) {e.printStackTrace();} catch (EncoderException e) {e.printStackTrace();}return false;}public static void main(String[] args) throws EncoderException {String src = "D:\\fjFile\\annex\\xwbl\\ly8603f22f24e0409fa9747d50a78ff7e5.wav";String target = "D:\\fjFile\\annex\\xwbl\\tem_2.wav";Jave2Util.cut(src, target, 0.0F, 60.0F);String inputFormatPath = "D:\\fjFile\\annex\\xwbl\\ly8603f22f24e0409fa9747d50a78ff7e5.m4a";String outputFormatPath = "D:\\fjFile\\annex\\xwbl\\ly8603f22f24e0409fa9747d50a78ff7e5.wav";info(inputFormatPath);// audioEncode(inputFormatPath, outputFormatPath);}/*** 获取音频文件的编码信息** @param filePath* @throws EncoderException*/private static void info(String filePath) throws EncoderException {File file = new File(filePath);MultimediaObject multimediaObject = new MultimediaObject(file);MultimediaInfo info = multimediaObject.getInfo();// 时长long duration = info.getDuration();String format = info.getFormat();// format:movSystem.out.println("format:" + format);AudioInfo audio = info.getAudio();// 它设置将在重新编码的音频流中使用的音频通道数(1 =单声道,2 =立体声)。如果未设置任何通道值,则编码器将选择默认值。int channels = audio.getChannels();// 它为新的重新编码的音频流设置比特率值。如果未设置比特率值,则编码器将选择默认值。// 该值应以每秒位数表示。例如,如果您想要128 kb / s的比特率,则应调用setBitRate(new Integer(128000))。int bitRate = audio.getBitRate();// 它为新的重新编码的音频流设置采样率。如果未设置采样率值,则编码器将选择默认值。该值应以赫兹表示。例如,如果您想要类似CD// 采样率、音频采样级别 16000 = 16KHzint samplingRate = audio.getSamplingRate();// 设置音频音量// 可以调用此方法来更改音频流的音量。值为256表示音量不变。因此,小于256的值表示音量减小,而大于256的值将增大音频流的音量。// setVolume(Integer volume)String decoder = audio.getDecoder();System.out.println("声音时长:毫秒" + duration);System.out.println("声道:" + channels);System.out.println("bitRate:" + bitRate);System.out.println("samplingRate 采样率、音频采样级别 16000 = 16KHz:" + samplingRate);// aac (LC) (mp4a / 0x6134706D)System.out.println("decoder:" + decoder);}/*** 音频格式转换* @param inputFormatPath* @param outputFormatPath* @return*/public static boolean audioEncode(String inputFormatPath, String outputFormatPath) {String outputFormat = getSuffix(outputFormatPath);String inputFormat = getSuffix(inputFormatPath);File source = new File(inputFormatPath);File target = new File(outputFormatPath);try {MultimediaObject multimediaObject = new MultimediaObject(source);// 获取音频文件的编码信息MultimediaInfo info = multimediaObject.getInfo();AudioInfo audioInfo = info.getAudio();//设置音频属性AudioAttributes audio = new AudioAttributes();audio.setBitRate(audioInfo.getBitRate());audio.setSamplingRate(audioInfo.getSamplingRate());audio.setChannels(audioInfo.getChannels());// 设置转码属性EncodingAttributes attrs = new EncodingAttributes();attrs.setInputFormat(inputFormat);attrs.setOutputFormat(outputFormat);attrs.setAudioAttributes(audio);// 音频转换格式类Encoder encoder = new Encoder();// 进行转换encoder.encode(new MultimediaObject(source), target, attrs);return true;} catch (IllegalArgumentException | EncoderException e) {e.printStackTrace();}return false;}/*** 获取文件路径的.后缀* @param outputFormatPath* @return*/private static String getSuffix(String outputFormatPath) {return outputFormatPath.substring(outputFormatPath.lastIndexOf(".") + 1);}}
修改wavFilePath的值为你的wav格式的文件所在路径,wav文件可以自行使用手机自带的录音功能去生成最后点击main方法就可以测试了,如果需要与前台对接请自行修改为接口
package com.rouyi.yuyin.model;import com.alibaba.fastjson.JSON;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import org.vosk.Model;
import org.vosk.Recognizer;
import ws.schild.jave.EncoderException;
import ws.schild.jave.MultimediaObject;
import ws.schild.jave.info.AudioInfo;
import ws.schild.jave.info.MultimediaInfo;import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;@Slf4j
@Component
public class VoiceUtil {public static void main(String[] args) throws EncoderException {String wavFilePath = "H:\\afterProject\\qiyedianzixuke\\RuoYi-Cloud\\ruoyi-modules\\yuyinshibie\\src\\main\\resources\\audio\\11月7日 下午4点10分.wav";// 秒long cutDuration = 20;String waveForm = acceptWaveForm( wavFilePath, cutDuration);System.out.println( waveForm );}/*** 对Wav格式音频文件进行语音识别翻译** @param wavFilePath* @param cutDuration* @return* @throws EncoderException*/private static String acceptWaveForm(String wavFilePath, long cutDuration) throws EncoderException {// 判断视频的长度long startTime = System.currentTimeMillis();MultimediaObject multimediaObject = new MultimediaObject(new File(wavFilePath));MultimediaInfo info = multimediaObject.getInfo();// 时长/毫秒long duration = info.getDuration();AudioInfo audio = info.getAudio();// 通道数int channels = audio.getChannels();// 秒long offset = 0;long forNum = (duration / 1000) / cutDuration;if (duration % (cutDuration * 1000) > 0) {forNum = forNum + 1;}// 进行切块处理List<String> strings = cutWavFile(wavFilePath, cutDuration, offset, forNum);// 循环进行翻译StringBuilder result = new StringBuilder();for (String string : strings) {File f = new File(string);result.append(VoiceUtil.getRecognizerResult(f, channels));}long endTime = System.currentTimeMillis();String msg = "耗时:" + (endTime - startTime) + "ms";System.out.println(msg);return result.toString();}/*** 对wav进行切块处理** @param wavFilePath 处理的wav文件路径* @param cutDuration 切割的固定长度/秒* @param offset 设置起始偏移量(秒)* @param forNum 切块的次数* @return* @throws EncoderException*/private static List<String> cutWavFile(String wavFilePath, long cutDuration, long offset, long forNum) throws EncoderException {UUID uuid = UUID.randomUUID();// 大文件切割为固定时长的小文件List<String> strings = new ArrayList<>();for (int i = 0; i < forNum; i++) {String target = "D:\\fjFile\\annex\\xwbl\\" + uuid + "\\" + i + ".wav";Float offsetF = Float.valueOf(String.valueOf(offset));Float cutDurationF = Float.valueOf(String.valueOf(cutDuration));Jave2Util.cut(wavFilePath, target, offsetF, cutDurationF);offset = offset + cutDuration;strings.add(target);}return strings;}/*** 进行翻译** @param f* @param channels*/public static String getRecognizerResult(File f, int channels) {StringBuilder result = new StringBuilder();Model voskModel = VoskModel.getInstance().getVoskModel();// 采样率为音频采样率的声道倍数log.info("====加载完成,开始分析====");try (Recognizer recognizer = new Recognizer(voskModel, 16000 * channels);InputStream ais = new FileInputStream(f)) {int nbytes;byte[] b = new byte[4096];while ((nbytes = ais.read(b)) >= 0) {if (recognizer.acceptWaveForm(b, nbytes)) {// 返回语音识别结果result.append(getResult(recognizer.getResult()));}}// 返回语音识别结果。和结果一样,但不要等待沉默。你通常在流的最后调用它来获得音频的最后部分。它刷新功能管道,以便处理所有剩余的音频块。result.append(getResult(recognizer.getFinalResult()));log.info("识别结果:{}", result.toString());} catch (Exception e) {e.printStackTrace();}return result.toString();}/*** 获取返回结果** @param result* @return*/private static String getResult(String result) {VoskResult vr=JSON.parseObject(result,VoskResult.class);return Optional.ofNullable(vr).map(VoskResult::getText).orElse("");}}
vue:
这里呢我前端也没整完前端这里生成录音后传给后台就可以了,后台用上面的Java代码一解析,别说我懒,做东西还不做完美,想啥呢搬砖很累的哈哈!!!
<template><div style="padding: 20px;"><h3>录音上传</h3><div style="font-size:14px"><h3>录音时长:{{ recorder && recorder.duration.toFixed(4) }}</h3><br /><el-button type="primary" @click="handleStart">开始录音</el-button><el-button type="info" @click="handlePause">暂停录音</el-button><el-button type="success" @click="handleResume">继续录音</el-button><el-button type="warning" @click="handleStop">停止录音</el-button><el-button type="error" @click="handleDestroy">销毁录音</el-button><el-button type="primary" @click="uploadRecord">上传</el-button><!-- <br /><br /><h3>播放时长:{{recorder &&(playTime > recorder.duration? recorder.duration.toFixed(4): playTime.toFixed(4))}}</h3><br /><el-button type="primary" @click="handlePlay">播放录音</el-button><el-button type="info" @click="handlePausePlay">暂停播放</el-button><el-button type="success" @click="handleResumePlay">继续播放</el-button><el-button type="warning" @click="handleStopPlay">停止播放</el-button><el-button type="error" @click="handleDestroy">销毁录音</el-button><el-button type="primary" @click="uploadRecord">上传</el-button> --></div></div>
</template><script>import Recorder from 'js-audio-recorder'export default {data() {return {recorder: null,playTime: 0,timer: null,src: null}},created() {this.recorder = new Recorder()},methods: {// 开始录音handleStart() {this.recorder = new Recorder()Recorder.getPermission().then(() => {console.log('开始录音')this.recorder.start() // 开始录音}, (error) => {this.$message({message: '请先允许该网页使用麦克风',type: 'info'})console.log(`${error.name} : ${error.message}`)})},handlePause() {console.log('暂停录音')this.recorder.pause() // 暂停录音},handleResume() {console.log('恢复录音')this.recorder.resume() // 恢复录音},handleStop() {console.log('停止录音')this.recorder.stop() // 停止录音},handlePlay() {console.log('播放录音')console.log(this.recorder)this.recorder.play() // 播放录音// 播放时长this.timer = setInterval(() => {try {this.playTime = this.recorder.getPlayTime()} catch (error) {this.timer = null}}, 100)},handlePausePlay() {console.log('暂停播放')this.recorder.pausePlay() // 暂停播放// 播放时长this.playTime = this.recorder.getPlayTime()this.time = null},handleResumePlay() {console.log('恢复播放')this.recorder.resumePlay() // 恢复播放// 播放时长this.timer = setInterval(() => {try {this.playTime = this.recorder.getPlayTime()} catch (error) {this.timer = null}}, 100)},handleStopPlay() {console.log('停止播放')this.recorder.stopPlay() // 停止播放// 播放时长this.playTime = this.recorder.getPlayTime()this.timer = null},handleDestroy() {console.log('销毁实例')this.recorder.destroy() // 毁实例this.timer = null},uploadRecord() {if (this.recorder == null || this.recorder.duration === 0) {this.$message({message: '请先录音',type: 'error'})return false}this.recorder.pause() // 暂停录音this.timer = nullconsole.log('上传录音')// 上传录音const formData = new FormData()const blob = this.recorder.getWAVBlob()// 获取wav格式音频数据// 此处获取到blob对象后需要设置fileName满足当前项目上传需求,其它项目可直接传把blob作为file塞入formDataconst newbolb = new Blob([blob], { type: 'audio/wav' })const fileOfBlob = new File([newbolb], new Date().getTime() + '.wav')formData.append('file', fileOfBlob)const url = window.URL.createObjectURL(fileOfBlob)this.src = url// const axios = require('axios')// axios.post(url, formData).then(res => {//console.log(res.data.data[0].url)// })}}}</script>