java版阿里云,百度ai,讯飞语音识别效果简单对比及demo
来源:互联网 发布:mac os内存清理命令 编辑:程序博客网 时间:2024/04/27 14:21
因为公司的业务的需要,对三家的语音识别(简短语句识别java版)进行了调用和对比,把自己的测试成果贴出来供需要的人参考使用.并贴出主要代码块
阿里云的一句话识别:
package com.alibaba.idst.nls.demo;import java.io.File;import java.io.FileInputStream;import java.io.InputStream;import com.alibaba.idst.nls.NlsClient;import com.alibaba.idst.nls.NlsFuture;import com.alibaba.idst.nls.event.NlsEvent;import com.alibaba.idst.nls.event.NlsListener;import com.alibaba.idst.nls.protocol.NlsRequest;import com.alibaba.idst.nls.protocol.NlsResponse;public class AsrDemo implements NlsListener { private static NlsClient client = new NlsClient(); private String akId; private String akSecret; public AsrDemo(String akId, String akSecret) { System.out.println("init Nls client..."); this.akId = akId; this.akSecret = akSecret; // 初始化NlsClient client.init(); } public void startAsr() { // 开始发送语音 System.out.println("open audio file..."); InputStream fis = null; try { //不知道为什么用官方demo的类加载取不到fis,后来又自己写的,求解. // fis = this.getClass().getClassLoader().getResourceAsStream("E:\\1.pcm"); String filepath = "E:\\1.pcm"; File file = new File(filepath); fis = new FileInputStream(file); System.out.println(fis); } catch (Exception e) { e.printStackTrace(); } if (fis != null) { System.out.println("create NLS future"); try { NlsRequest req = new NlsRequest(); req.setAppKey("nls-service"); // appkey请从 "快速开始" 帮助页面的appkey列表中获取 req.setAsrFormat("pcm"); // 设置语音文件格式为pcm,我们支持16k 16bit 的无头的pcm文件。 /* 热词相关配置 */ // req.setAsrVocabularyId("热词词表id");//热词词表id /* 热词相关配置 */ req.authorize("ak", "as"); // 请替换为用户申请到的Access // Key ID和Access // Key // SecretNlsFuture future = client.createNlsFuture(req, this); // 实例化请求,传入请求和监听器 System.out.println("call NLS service"); byte[] b = new byte[8000]; int len = 0; while ((len = fis.read(b)) > 0) { future.sendVoice(b, 0, len); // 发送语音数据 Thread.sleep(50);// } future.sendFinishSignal(); // 语音识别结束时,发送结束符 System.out.println("main thread enter waiting for less than 10s."); future.await(10000); // 设置服务端结果返回的超时时间 } catch (Exception e) { e.printStackTrace(); } System.out.println("calling NLS service end"); } } public void shutDown() { System.out.println("close NLS client"); // 关闭客户端并释放资源 client.close(); System.out.println("demo done"); } @Override public void onMessageReceived(NlsEvent e) { // 识别结果的回调 NlsResponse response = e.getResponse(); String result = "";// 先初始化, int statusCode = response.getStatus_code(); if (response.getAsr_ret() != null) { System.out.println("11111111111111111111111"); result += "\nget asr result: statusCode=[" + statusCode + "], " + response.getAsr_ret(); } if (result != null) { System.out.println("2222222222222222222222"); System.out.println(result); System.out.println("33333333333333333333333"); } else { System.out.println(response.jsonResults.toString()); } } @Override public void onOperationFailed(NlsEvent e) { // 识别失败的回调 String result = ""; result += "on operation failed: statusCode=[" + e.getResponse().getStatus_code() + "], " + e.getErrorMessage(); System.out.println(result); } @Override public void onChannelClosed(NlsEvent e) { // socket 连接关闭的回调 System.out.println("on websocket closed."); } public static void main(String[] args) { String akId = "ak";//自己的秘钥 String akSecret = "as"; AsrDemo asrDemo = new AsrDemo(akId, akSecret); asrDemo.startAsr(); asrDemo.shutDown(); }}
百度Ai智能语音识别
/** * */package baidu.ocr.aip;import java.io.BufferedReader;import java.io.DataOutputStream;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.net.HttpURLConnection;import java.net.URL;import java.net.URLDecoder;import java.net.URLEncoder;import javax.xml.bind.DatatypeConverter;import org.json.JSONObject;/** * @author Administrator * */public class speech { private static final String serverURL = "http://vop.baidu.com/server_api"; private static String token = ""; private static final String testFileName = "E:\\1.pcm"; // 百度语音提供技术支持 // put your own params here // 下面3个值要填写自己申请的app对应的值 private static final String apiKey = ""; private static final String secretKey = ""; private static final String cuid = "84-EF-18-C7-1F-AF";//查看本机的mac值,物理地址,在命令栏查看ip地址的时候就能看到. public static void main(String[] args) throws Exception { getToken(); method1(); // method2(); } private static void getToken() throws Exception { String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" + "&client_id=" + apiKey + "&client_secret=" + secretKey; HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection(); token = new JSONObject(printResponse(conn)).getString("access_token"); } private static void method1() throws Exception { File pcmFile = new File(testFileName); HttpURLConnection conn = (HttpURLConnection) new URL(serverURL).openConnection(); // construct params JSONObject params = new JSONObject(); params.put("format", "pcm");//文件格式 params.put("rate", 16000);//可以更改采样率 params.put("channel", "1"); params.put("token", token); params.put("lan", "zh"); params.put("cuid", cuid); params.put("len", pcmFile.length()); params.put("speech", DatatypeConverter.printBase64Binary(loadFile(pcmFile))); // add request header conn.setRequestMethod("POST"); conn.setRequestProperty("Content-Type", "application/json; charset=utf-8"); conn.setDoInput(true); conn.setDoOutput(true); // send request DataOutputStream wr = new DataOutputStream(conn.getOutputStream()); wr.writeBytes(params.toString()); wr.flush(); wr.close(); printResponse(conn); } private static void method2() throws Exception { File pcmFile = new File(testFileName); HttpURLConnection conn = (HttpURLConnection) new URL(serverURL + "?cuid=" + cuid + "&token=" + token) .openConnection(); // add request header conn.setRequestMethod("POST"); conn.setRequestProperty("Content-Type", "audio/pcm; rate=8000"); conn.setDoInput(true); conn.setDoOutput(true); // send request DataOutputStream wr = new DataOutputStream(conn.getOutputStream()); wr.write(loadFile(pcmFile)); wr.flush(); wr.close(); System.out.println(getUtf8String(printResponse(conn))); } private static String printResponse(HttpURLConnection conn) throws Exception { if (conn.getResponseCode() != 200) { // request error System.out.println("conn.getResponseCode() = " + conn.getResponseCode()); return ""; } InputStream is = conn.getInputStream(); BufferedReader rd = new BufferedReader(new InputStreamReader(is)); String line; StringBuffer response = new StringBuffer(); while ((line = rd.readLine()) != null) { response.append(line); response.append('\r'); } rd.close(); System.out.println(new JSONObject(response.toString()).toString(4)); return response.toString(); } private static byte[] loadFile(File file) throws IOException { InputStream is = new FileInputStream(file); long length = file.length(); byte[] bytes = new byte[(int) length]; int offset = 0; int numRead = 0; while (offset < bytes.length && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) { offset += numRead; } if (offset < bytes.length) { is.close(); throw new IOException("Could not completely read file " + file.getName()); } is.close(); return bytes; } // GBK编码转为UTF-8 private static String getUtf8String(String s) throws UnsupportedEncodingException { StringBuffer sb = new StringBuffer(); sb.append(s); String xmlString = ""; String xmlUtf8 = ""; xmlString = new String(sb.toString().getBytes("GBK")); xmlUtf8 = URLEncoder.encode(xmlString, "GBK"); return URLDecoder.decode(xmlUtf8, "UTF-8"); }}
讯飞Sdk很完整,拿过来就能直接用
.
测试结果:
1.采用了16k的pcm无压缩的文件.
2.
3
综合来看.
三个平台对基本的语音识别都有较好的辨识度,识别率可以接收并可以商用.
好像阿里和百度小批量均免费,讯飞有时长限制企业版要收费,但是讯飞识别效果最佳,加入了标点和语气的分析,更人性化,识别效果综合感觉讯飞>百度>ali,具体使用哪种还需要看公司的选择了.
如有不足,欢迎批评指正.
阅读全文
0 0
- java版阿里云,百度ai,讯飞语音识别效果简单对比及demo
- 讯飞语音Demo—java版
- 讯飞语音识别Demo简洁版,语音识别、文字转语音、按压识别
- 科大讯飞语音识别 demo
- 讯飞语音识别最简Demo实现(1)
- 讯飞语音识别
- 讯飞语音识别
- iOS开发-讯飞语音识别SDK简单实用
- 讯飞语音听写Demo
- 语音听写与合成--(讯飞语音识别与合成&&百度语音识别)
- 百度云ai-车型识别
- 使用Java的讯飞语音识别示例
- 讯飞语音命令词离线识别,离线听写demo修改
- 【Demo】iOS平台上的讯飞语音识别语音合成开发
- android之基于百度语音合讯飞语音识别的语音交互
- Unity接入讯飞语音识别___Android版
- 百度语音识别Demo -- 简单应用
- 阿里 旷视 百度 OCR图片文字识别功能对比
- 简单的逆波兰式 c语言
- 查看mysql和rabbitmq的log
- QT中文输入法状态下获取键盘输入事件
- DFS深度优先算法
- Permission denied: user=root, access=WRITE, inode="/user":hdfs:supergroup:drwxr-xr-x解决办法
- java版阿里云,百度ai,讯飞语音识别效果简单对比及demo
- xcode9报 Implicit declaration of function 'require' is invalid in c99 的错误
- 登录表单JS静态验证
- Source Insight 代码自动排版
- 最长公共子串
- SQL Server 生成时间序列
- 【矩阵快速幂】Recurrences UVA
- 移动开发之安全学习
- 实验四:用可重用的链表模块来实现命令行菜单小程序