解决方案
最近要实现语音转文字接口,具体需求是录制语音后再聊天室实时显示文字:
目前腾讯有三种解决方案:
实时语音识别使用场景是一边录语音,一遍转文字,实时性较高;一句话识别是对60s内的语音文件或语音链接转换后返回语音文本;录制文件识别是对较大的语音文件识别。
经过调用接口测试,一句话识别速度很快,对比实时语音识别接口调用费用,最后确定使用一句话识别接口。
这里公共请求参数是腾讯云接口调用时共用的参数,具体方法可参考:
功能实现
/*** <p>* 语音转文字表 服务实现类* </p>** @author* @since 2020-05-11*/
@Service
@Transactional
@Slf4j
@DS("slave")
public class VoiceToTextServiceImpl extends ServiceImpl<VoiceToTextMapper, VoiceToText> implements IVoiceToTextService {@Value("${tencent.secretId}")private String secretId;@Value("${tencent.secretKey}")private String secretKey;@Autowiredprivate VoiceToTextMapper voiceToTextMapper;@Overridepublic ResultVO<?> searchVoiceText(VoiceToTextDTO dto) throws Exception {// TreeMap可以自动排序TreeMap<String, Object> params = new TreeMap<String, Object>();// 公共参数params.put("Nonce", new Random().nextInt(Integer.MAX_VALUE));params.put("Timestamp", System.currentTimeMillis() / 1000);params.put("SecretId", secretId);params.put("Action", "SentenceRecognition");params.put("Version", "2019-06-14");params.put("Region", "ap-shanghai");// 业务参数params.put("ProjectId", 0);params.put("SubServiceType", 2);params.put("EngSerViceType", "16k");params.put("SourceType", 0);params.put("VoiceFormat", dto.getVoiceFormat());String tempNo = CodeNoEnum.VOICE_TO_TEXT.getTableNO() + CommentUtil.createNo();params.put("UsrAudioKey", tempNo);params.put("Url", dto.getVoiceUrl());// 公共参数params.put("Signature", TencentCloudVoiceUtil.sign(TencentCloudVoiceUtil.getStringToSign(params), secretKey, "HmacSHA1"));String url = TencentCloudVoiceUtil.getUrl(params);String getResponse = HttpUtil.get(url);log.info(getResponse);JSONObject parse = JSONObject.parseObject(getResponse);JSONObject response = parse.getJSONObject("Response");String result = response.getString("Result");String errorResult = response.getString("Error");log.info("Error:"+errorResult);log.info("语音识别结果:"+result);if (errorResult != null) {return ResultVOUtil.returnFail(-1,"文件识别失败!");}VoiceToText entity = new VoiceToText();entity.setNo(tempNo);entity.setVoiceUrl(dto.getVoiceUrl());entity.setUrl(url);entity.setVoiceText(result);voiceToTextMapper.insert(entity);return ResultVOUtil.returnSuccess("voiceText",result);}}