java版阿里云,百度ai,讯飞语音识别效果简单对比及demo

来源:互联网 发布:mac os内存清理命令 编辑:程序博客网 时间:2024/04/27 14:21

因为公司的业务的需要,对三家的语音识别(简短语句识别java版)进行了调用和对比,把自己的测试成果贴出来供需要的人参考使用.并贴出主要代码块

阿里云的一句话识别:

package com.alibaba.idst.nls.demo;import java.io.File;import java.io.FileInputStream;import java.io.InputStream;import com.alibaba.idst.nls.NlsClient;import com.alibaba.idst.nls.NlsFuture;import com.alibaba.idst.nls.event.NlsEvent;import com.alibaba.idst.nls.event.NlsListener;import com.alibaba.idst.nls.protocol.NlsRequest;import com.alibaba.idst.nls.protocol.NlsResponse;public class AsrDemo implements NlsListener {    private static NlsClient client = new NlsClient();    private String akId;    private String akSecret;    public AsrDemo(String akId, String akSecret) {        System.out.println("init Nls client...");        this.akId = akId;        this.akSecret = akSecret;        // 初始化NlsClient        client.init();    }    public void startAsr() {        // 开始发送语音        System.out.println("open audio file...");        InputStream fis = null;        try {        //不知道为什么用官方demo的类加载取不到fis,后来又自己写的,求解.        // fis = this.getClass().getClassLoader().getResourceAsStream("E:\\1.pcm");            String filepath = "E:\\1.pcm";            File file = new File(filepath);            fis = new FileInputStream(file);            System.out.println(fis);        } catch (Exception e) {            e.printStackTrace();        }        if (fis != null) {            System.out.println("create NLS future");            try {                NlsRequest req = new NlsRequest();                req.setAppKey("nls-service"); // appkey请从 "快速开始" 帮助页面的appkey列表中获取                req.setAsrFormat("pcm"); // 设置语音文件格式为pcm,我们支持16k 16bit 的无头的pcm文件。                /* 热词相关配置 */                // req.setAsrVocabularyId("热词词表id");//热词词表id                /* 热词相关配置 */                req.authorize("ak", "as"); // 请替换为用户申请到的Access                                                                                        // Key ID和Access                                                                                        // Key                // SecretNlsFuture future = client.createNlsFuture(req, this); // 实例化请求,传入请求和监听器                System.out.println("call NLS service");                byte[] b = new byte[8000];                int len = 0;                while ((len = fis.read(b)) > 0) {                    future.sendVoice(b, 0, len); // 发送语音数据                    Thread.sleep(50);//                }                future.sendFinishSignal(); // 语音识别结束时,发送结束符                System.out.println("main thread enter waiting for less than 10s.");                future.await(10000); // 设置服务端结果返回的超时时间            } catch (Exception e) {                e.printStackTrace();            }            System.out.println("calling NLS service end");        }    }    public void shutDown() {        System.out.println("close NLS client");        // 关闭客户端并释放资源        client.close();        System.out.println("demo done");    }    @Override    public void onMessageReceived(NlsEvent e) {        // 识别结果的回调        NlsResponse response = e.getResponse();        String result = "";// 先初始化,        int statusCode = response.getStatus_code();        if (response.getAsr_ret() != null) {            System.out.println("11111111111111111111111");            result += "\nget asr result: statusCode=[" + statusCode + "], " + response.getAsr_ret();        }        if (result != null) {            System.out.println("2222222222222222222222");            System.out.println(result);            System.out.println("33333333333333333333333");        } else {            System.out.println(response.jsonResults.toString());        }    }    @Override    public void onOperationFailed(NlsEvent e) {        // 识别失败的回调        String result = "";        result += "on operation failed: statusCode=[" + e.getResponse().getStatus_code() + "], " + e.getErrorMessage();        System.out.println(result);    }    @Override    public void onChannelClosed(NlsEvent e) {        // socket 连接关闭的回调        System.out.println("on websocket closed.");    }    public static void main(String[] args) {        String akId = "ak";//自己的秘钥        String akSecret = "as";        AsrDemo asrDemo = new AsrDemo(akId, akSecret);        asrDemo.startAsr();        asrDemo.shutDown();    }}

百度Ai智能语音识别

/** *  */package baidu.ocr.aip;import java.io.BufferedReader;import java.io.DataOutputStream;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.net.HttpURLConnection;import java.net.URL;import java.net.URLDecoder;import java.net.URLEncoder;import javax.xml.bind.DatatypeConverter;import org.json.JSONObject;/** * @author Administrator * */public class speech {    private static final String serverURL = "http://vop.baidu.com/server_api";    private static String token = "";    private static final String testFileName = "E:\\1.pcm"; // 百度语音提供技术支持    // put your own params here    // 下面3个值要填写自己申请的app对应的值    private static final String apiKey = "";    private static final String secretKey = "";    private static final String cuid = "84-EF-18-C7-1F-AF";//查看本机的mac值,物理地址,在命令栏查看ip地址的时候就能看到.    public static void main(String[] args) throws Exception {        getToken();        method1();        // method2();    }    private static void getToken() throws Exception {        String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" + "&client_id="                + apiKey + "&client_secret=" + secretKey;        HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();        token = new JSONObject(printResponse(conn)).getString("access_token");    }    private static void method1() throws Exception {        File pcmFile = new File(testFileName);        HttpURLConnection conn = (HttpURLConnection) new URL(serverURL).openConnection();        // construct params        JSONObject params = new JSONObject();        params.put("format", "pcm");//文件格式        params.put("rate", 16000);//可以更改采样率        params.put("channel", "1");        params.put("token", token);        params.put("lan", "zh");        params.put("cuid", cuid);        params.put("len", pcmFile.length());        params.put("speech", DatatypeConverter.printBase64Binary(loadFile(pcmFile)));        // add request header        conn.setRequestMethod("POST");        conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");        conn.setDoInput(true);        conn.setDoOutput(true);        // send request        DataOutputStream wr = new DataOutputStream(conn.getOutputStream());        wr.writeBytes(params.toString());        wr.flush();        wr.close();        printResponse(conn);    }    private static void method2() throws Exception {        File pcmFile = new File(testFileName);        HttpURLConnection conn = (HttpURLConnection) new URL(serverURL + "?cuid=" + cuid + "&token=" + token)                .openConnection();        // add request header        conn.setRequestMethod("POST");        conn.setRequestProperty("Content-Type", "audio/pcm; rate=8000");        conn.setDoInput(true);        conn.setDoOutput(true);        // send request        DataOutputStream wr = new DataOutputStream(conn.getOutputStream());        wr.write(loadFile(pcmFile));        wr.flush();        wr.close();        System.out.println(getUtf8String(printResponse(conn)));    }    private static String printResponse(HttpURLConnection conn) throws Exception {        if (conn.getResponseCode() != 200) {            // request error            System.out.println("conn.getResponseCode() = " + conn.getResponseCode());            return "";        }        InputStream is = conn.getInputStream();        BufferedReader rd = new BufferedReader(new InputStreamReader(is));        String line;        StringBuffer response = new StringBuffer();        while ((line = rd.readLine()) != null) {            response.append(line);            response.append('\r');        }        rd.close();        System.out.println(new JSONObject(response.toString()).toString(4));        return response.toString();    }    private static byte[] loadFile(File file) throws IOException {        InputStream is = new FileInputStream(file);        long length = file.length();        byte[] bytes = new byte[(int) length];        int offset = 0;        int numRead = 0;        while (offset < bytes.length && (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {            offset += numRead;        }        if (offset < bytes.length) {            is.close();            throw new IOException("Could not completely read file " + file.getName());        }        is.close();        return bytes;    }    // GBK编码转为UTF-8    private static String getUtf8String(String s) throws UnsupportedEncodingException {        StringBuffer sb = new StringBuffer();        sb.append(s);        String xmlString = "";        String xmlUtf8 = "";        xmlString = new String(sb.toString().getBytes("GBK"));        xmlUtf8 = URLEncoder.encode(xmlString, "GBK");        return URLDecoder.decode(xmlUtf8, "UTF-8");    }}

讯飞Sdk很完整,拿过来就能直接用

.
测试结果:
1.采用了16k的pcm无压缩的文件.
测试1
2.测试2
3测试3

综合来看.
三个平台对基本的语音识别都有较好的辨识度,识别率可以接收并可以商用.
好像阿里和百度小批量均免费,讯飞有时长限制企业版要收费,但是讯飞识别效果最佳,加入了标点和语气的分析,更人性化,识别效果综合感觉讯飞>百度>ali,具体使用哪种还需要看公司的选择了.

如有不足,欢迎批评指正.

原创粉丝点击