baidu nlp api

来源:互联网 发布:火影忍者世界观知乎 编辑:程序博客网 时间:2024/05/20 05:07

上图为目录结构。gson包需要在网上查找,或者我提供百度网盘地址。链接:http://pan.baidu.com/s/1miLVJr6 密码:8iex

package myce.casia.baidu.nlp;import org.json.JSONObject;import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.HttpURLConnection;import java.net.URL;import java.util.List;import java.util.Map;/** * 获取token类 */public class AuthService {    /**     * 获取权限token     *     * @return 返回示例:     * {     * "access_token": "24.460da4889caad24cccdb1fea17221975.2592000.1491995545.282335-9261443",     * "session_key": "9mzdDxLPLXYfhlmFRBSGI8jOy19HNzFT6cle3RWf2m9HL8LdLxyVhEETkPS8gQtLbhw1TP/ly3+l59n4wknbmwPk6NC1",     * "scope": "public vis-faceverify_faceverify vis-ocr_ocr vis-faceattribute_faceattribute vis-ocr_bankcard nlp_wordseg nlp_simnet nlp_wordemb nlp_comtag nlp_wordpos nlp_dnnlm_cn vis-antiporn_antiporn_v2 brain_ocr_scope wise_adapt lebo_resource_base lightservice_public hetu_basic lightcms_map_poi kaidian_kaidian wangrantest_test wangrantest_test1 bnstest_test1 bnstest_test2 ApsMisTest_Test权限 vis-classify_flower",     * "refresh_token": "25.bf1ec1814779878486cb9ba68f7defe5.315360000.1804763545.282335-9261443",     * "session_secret": "b292e183bb394fb8d8065e7f8b137757",     * "expires_in": 2592000     * }     */    public static String getAuth() {        // 获取token地址        String authHost = "https://aip.baidubce.com/oauth/2.0/token?";        // 官网获取的 API Key 更新为你注册的        String clientId = "gAZboZVt6BjVaU8z1TlQ2KGD";        // 官网获取的 Secret Key 更新为你注册的        String clientSecret = "meQDCz9LAndeq1gh4ZWdAMDxkDdbufnK";        String getAccessTokenUrl = authHost                // 1. grant_type为固定参数                + "grant_type=client_credentials"                // 2. 官网获取的 API Key                + "&client_id=" + clientId                // 3. 官网获取的 Secret Key                + "&client_secret=" + clientSecret;        try {            URL realUrl = new URL(getAccessTokenUrl);            // 打开和URL之间的连接            HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();            connection.setRequestMethod("GET");            connection.connect();            // 获取所有响应头字段            Map<String, List<String>> map = connection.getHeaderFields();            // 遍历所有的响应头字段            for (String key : map.keySet()) {                System.out.println(key + "--->" + map.get(key));            }            // 定义 BufferedReader输入流来读取URL的响应            BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));            String result = "";            String line;            while ((line = in.readLine()) != null) {                result += line;            }            /**             * 返回结果示例             */            System.out.println("result:" + result);            JSONObject jsonObject = new JSONObject(result);            String access_token = jsonObject.getString("access_token");            return access_token;        } catch (Exception e) {            System.out.printf("获取token失败!");            e.printStackTrace();        }        return null;    }    public static void main(String[]  args){    getAuth();    }    // the access-token is :24.3b1c18830d9173f045b326ef6141c23d.2592000.1497073914.282335-9632067        }

同理还是需要authservice 来生成access token。

package myce.casia.nlp;import java.io.BufferedWriter;import java.io.File;import java.io.FileWriter;import java.io.IOException;import java.util.List;import myce.casia.baidu.nlp.SimNet;import myce.casia.db.ExamationAndKnowledge;import myce.casia.db.ExamationAndKnowledgeDao;import myce.casia.db.Knowledge;import myce.casia.db.KnowledgeDao;public class EKSim {public static FileWriter fw;public static BufferedWriter writer;static {try {fw=new FileWriter(new File("./data/知识点试题相似度.txt"));} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}writer=new BufferedWriter(fw);}public static void main(String[] args) throws IOException {// TODO Auto-generated method stubString content="16公路上行驶的一辆汽车车牌为偶数的频率约是(  A.50%  B.100%  C.由各车所在单位或个人定   D.无法确定";Eksim(content);}public static void Eksim(String content) throws IOException{KnowledgeDao kDao=new KnowledgeDao();List<Knowledge> knowledges=kDao.findAll();for(int i=0;i<knowledges.size();i++){double d=SimNet.TextSim(content, knowledges.get(i).toString());System.out.println(knowledges.get(i).getName().toString()+" "+d);writer.write(knowledges.get(i).getName().toString()+" "+d+"\r\n");writer.flush();}}}

这里应用到的试题与知识点的短文本相似度计算,效果不是太好。输入的是试题,知识点已经存在数据库中了。输出的是相似度值。

package myce.casia.baidu.nlp;import java.net.URLEncoder;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import org.json.JSONObject;import myce.casia.baidu.utils.GsonUtils;import myce.casia.baidu.utils.HttpUtil;/** * 短文本相似度 */public class SimNet {    /**     * 代码中所需工具类     * FileUtil,Base64Util,HttpUtil请从     * https://ai.baidu.com/file/BA73D199EED14D8AA5FC5A4BF4BDDA34     * https://ai.baidu.com/file/C8D81F3301E24D2892968F09AE1AD6E2     * https://ai.baidu.com/file/88C6E86FB5D141889391693FC84504B1     * 下载     */public static void main(String[] args){String shortText1 = "16公路上行驶的一辆汽车车牌为偶数的频率约是(  A.50%  B.100%  C.由各车所在单位或个人定   D.无法确定";String shortText2 = "二元一次方程组的求解";double d=TextSim(shortText1, shortText2);System.out.println(shortText1+"与"+shortText2+"这两个短文本的相似度是:"+d);}    public static double  TextSim(String shortText1,String shortText2) {        // 短文本相似度url        String simNetUrl = "https://aip.baidubce.com/rpc/2.0/nlp/v1/simnet";        double d=0.0;        try {          //  String shortText1 = "你好百度";            Map<String, Object> qslots_params = new HashMap<String, Object>();            qslots_params.put("terms_sequence", shortText1);            qslots_params.put("type", 0);            qslots_params.put("items", new ArrayList());            List<Object> qslots = new ArrayList<Object>();            qslots.add(qslots_params);           // String shortText2 = "你好世界";            Map<String, Object> tslots_params = new HashMap<String, Object>();            tslots_params.put("terms_sequence", shortText2);            tslots_params.put("type", 0);            tslots_params.put("items", new ArrayList());            List<Object> tslots = new ArrayList<Object>();            tslots.add(tslots_params);            Map<String, Object> input = new HashMap<String, Object>();            input.put("qslots", qslots);            input.put("tslots", tslots);            input.put("type", 0);            Map<String, Object> map = new HashMap<String, Object>();            map.put("input", input);            String params = GsonUtils.toJson(map);            params = URLEncoder.encode(params, "GBK");            /**             * 线上环境access_token有过期时间, 客户端可自行缓存,过期后重新获取。             */            String accessToken = "24.3b1c18830d9173f045b326ef6141c23d.2592000.1497073914.282335-9632067";            String result = HttpUtil.post(simNetUrl, accessToken, params);            System.out.println(result);            JSONObject json=new JSONObject(result);            System.out.println(json.getJSONObject("output").get("score"));            d=(double) json.getJSONObject("output").get("score");                  } catch (Exception e) {            e.printStackTrace();        }return d;    }}

simnet这是百度提供的短文本相似度计算函数,开放领域的,所以对专业领域的效果不好,是特别的不好。输入的是两个短文本,输出的是相似度值

0 0
原创粉丝点击